Commit 8edf19c2fe028563fc6ea9cb1995b8ee4172d4b6
Committed by
David S. Miller
1 parent
c720c7e838
Exists in
master
and in
7 other branches
net: sk_drops consolidation part 2
- skb_kill_datagram() can increment sk->sk_drops itself, not callers. - UDP on IPV4 & IPV6 dropped frames (because of bad checksum or policy checks) increment sk_drops Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 6 additions and 2 deletions Inline Diff
net/core/datagram.c
1 | /* | 1 | /* |
2 | * SUCS NET3: | 2 | * SUCS NET3: |
3 | * | 3 | * |
4 | * Generic datagram handling routines. These are generic for all | 4 | * Generic datagram handling routines. These are generic for all |
5 | * protocols. Possibly a generic IP version on top of these would | 5 | * protocols. Possibly a generic IP version on top of these would |
6 | * make sense. Not tonight however 8-). | 6 | * make sense. Not tonight however 8-). |
7 | * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and | 7 | * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and |
8 | * NetROM layer all have identical poll code and mostly | 8 | * NetROM layer all have identical poll code and mostly |
9 | * identical recvmsg() code. So we share it here. The poll was | 9 | * identical recvmsg() code. So we share it here. The poll was |
10 | * shared before but buried in udp.c so I moved it. | 10 | * shared before but buried in udp.c so I moved it. |
11 | * | 11 | * |
12 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old | 12 | * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old |
13 | * udp.c code) | 13 | * udp.c code) |
14 | * | 14 | * |
15 | * Fixes: | 15 | * Fixes: |
16 | * Alan Cox : NULL return from skb_peek_copy() | 16 | * Alan Cox : NULL return from skb_peek_copy() |
17 | * understood | 17 | * understood |
18 | * Alan Cox : Rewrote skb_read_datagram to avoid the | 18 | * Alan Cox : Rewrote skb_read_datagram to avoid the |
19 | * skb_peek_copy stuff. | 19 | * skb_peek_copy stuff. |
20 | * Alan Cox : Added support for SOCK_SEQPACKET. | 20 | * Alan Cox : Added support for SOCK_SEQPACKET. |
21 | * IPX can no longer use the SO_TYPE hack | 21 | * IPX can no longer use the SO_TYPE hack |
22 | * but AX.25 now works right, and SPX is | 22 | * but AX.25 now works right, and SPX is |
23 | * feasible. | 23 | * feasible. |
24 | * Alan Cox : Fixed write poll of non IP protocol | 24 | * Alan Cox : Fixed write poll of non IP protocol |
25 | * crash. | 25 | * crash. |
26 | * Florian La Roche: Changed for my new skbuff handling. | 26 | * Florian La Roche: Changed for my new skbuff handling. |
27 | * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. | 27 | * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET. |
28 | * Linus Torvalds : BSD semantic fixes. | 28 | * Linus Torvalds : BSD semantic fixes. |
29 | * Alan Cox : Datagram iovec handling | 29 | * Alan Cox : Datagram iovec handling |
30 | * Darryl Miles : Fixed non-blocking SOCK_STREAM. | 30 | * Darryl Miles : Fixed non-blocking SOCK_STREAM. |
31 | * Alan Cox : POSIXisms | 31 | * Alan Cox : POSIXisms |
32 | * Pete Wyckoff : Unconnected accept() fix. | 32 | * Pete Wyckoff : Unconnected accept() fix. |
33 | * | 33 | * |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #include <linux/module.h> | 36 | #include <linux/module.h> |
37 | #include <linux/types.h> | 37 | #include <linux/types.h> |
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
40 | #include <asm/system.h> | 40 | #include <asm/system.h> |
41 | #include <linux/mm.h> | 41 | #include <linux/mm.h> |
42 | #include <linux/interrupt.h> | 42 | #include <linux/interrupt.h> |
43 | #include <linux/errno.h> | 43 | #include <linux/errno.h> |
44 | #include <linux/sched.h> | 44 | #include <linux/sched.h> |
45 | #include <linux/inet.h> | 45 | #include <linux/inet.h> |
46 | #include <linux/netdevice.h> | 46 | #include <linux/netdevice.h> |
47 | #include <linux/rtnetlink.h> | 47 | #include <linux/rtnetlink.h> |
48 | #include <linux/poll.h> | 48 | #include <linux/poll.h> |
49 | #include <linux/highmem.h> | 49 | #include <linux/highmem.h> |
50 | #include <linux/spinlock.h> | 50 | #include <linux/spinlock.h> |
51 | 51 | ||
52 | #include <net/protocol.h> | 52 | #include <net/protocol.h> |
53 | #include <linux/skbuff.h> | 53 | #include <linux/skbuff.h> |
54 | 54 | ||
55 | #include <net/checksum.h> | 55 | #include <net/checksum.h> |
56 | #include <net/sock.h> | 56 | #include <net/sock.h> |
57 | #include <net/tcp_states.h> | 57 | #include <net/tcp_states.h> |
58 | #include <trace/events/skb.h> | 58 | #include <trace/events/skb.h> |
59 | 59 | ||
60 | /* | 60 | /* |
61 | * Is a socket 'connection oriented' ? | 61 | * Is a socket 'connection oriented' ? |
62 | */ | 62 | */ |
63 | static inline int connection_based(struct sock *sk) | 63 | static inline int connection_based(struct sock *sk) |
64 | { | 64 | { |
65 | return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; | 65 | return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; |
66 | } | 66 | } |
67 | 67 | ||
68 | static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, | 68 | static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, |
69 | void *key) | 69 | void *key) |
70 | { | 70 | { |
71 | unsigned long bits = (unsigned long)key; | 71 | unsigned long bits = (unsigned long)key; |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Avoid a wakeup if event not interesting for us | 74 | * Avoid a wakeup if event not interesting for us |
75 | */ | 75 | */ |
76 | if (bits && !(bits & (POLLIN | POLLERR))) | 76 | if (bits && !(bits & (POLLIN | POLLERR))) |
77 | return 0; | 77 | return 0; |
78 | return autoremove_wake_function(wait, mode, sync, key); | 78 | return autoremove_wake_function(wait, mode, sync, key); |
79 | } | 79 | } |
80 | /* | 80 | /* |
81 | * Wait for a packet.. | 81 | * Wait for a packet.. |
82 | */ | 82 | */ |
83 | static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | 83 | static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) |
84 | { | 84 | { |
85 | int error; | 85 | int error; |
86 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); | 86 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); |
87 | 87 | ||
88 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 88 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); |
89 | 89 | ||
90 | /* Socket errors? */ | 90 | /* Socket errors? */ |
91 | error = sock_error(sk); | 91 | error = sock_error(sk); |
92 | if (error) | 92 | if (error) |
93 | goto out_err; | 93 | goto out_err; |
94 | 94 | ||
95 | if (!skb_queue_empty(&sk->sk_receive_queue)) | 95 | if (!skb_queue_empty(&sk->sk_receive_queue)) |
96 | goto out; | 96 | goto out; |
97 | 97 | ||
98 | /* Socket shut down? */ | 98 | /* Socket shut down? */ |
99 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 99 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
100 | goto out_noerr; | 100 | goto out_noerr; |
101 | 101 | ||
102 | /* Sequenced packets can come disconnected. | 102 | /* Sequenced packets can come disconnected. |
103 | * If so we report the problem | 103 | * If so we report the problem |
104 | */ | 104 | */ |
105 | error = -ENOTCONN; | 105 | error = -ENOTCONN; |
106 | if (connection_based(sk) && | 106 | if (connection_based(sk) && |
107 | !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) | 107 | !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN)) |
108 | goto out_err; | 108 | goto out_err; |
109 | 109 | ||
110 | /* handle signals */ | 110 | /* handle signals */ |
111 | if (signal_pending(current)) | 111 | if (signal_pending(current)) |
112 | goto interrupted; | 112 | goto interrupted; |
113 | 113 | ||
114 | error = 0; | 114 | error = 0; |
115 | *timeo_p = schedule_timeout(*timeo_p); | 115 | *timeo_p = schedule_timeout(*timeo_p); |
116 | out: | 116 | out: |
117 | finish_wait(sk->sk_sleep, &wait); | 117 | finish_wait(sk->sk_sleep, &wait); |
118 | return error; | 118 | return error; |
119 | interrupted: | 119 | interrupted: |
120 | error = sock_intr_errno(*timeo_p); | 120 | error = sock_intr_errno(*timeo_p); |
121 | out_err: | 121 | out_err: |
122 | *err = error; | 122 | *err = error; |
123 | goto out; | 123 | goto out; |
124 | out_noerr: | 124 | out_noerr: |
125 | *err = 0; | 125 | *err = 0; |
126 | error = 1; | 126 | error = 1; |
127 | goto out; | 127 | goto out; |
128 | } | 128 | } |
129 | 129 | ||
130 | /** | 130 | /** |
131 | * __skb_recv_datagram - Receive a datagram skbuff | 131 | * __skb_recv_datagram - Receive a datagram skbuff |
132 | * @sk: socket | 132 | * @sk: socket |
133 | * @flags: MSG_ flags | 133 | * @flags: MSG_ flags |
134 | * @peeked: returns non-zero if this packet has been seen before | 134 | * @peeked: returns non-zero if this packet has been seen before |
135 | * @err: error code returned | 135 | * @err: error code returned |
136 | * | 136 | * |
137 | * Get a datagram skbuff, understands the peeking, nonblocking wakeups | 137 | * Get a datagram skbuff, understands the peeking, nonblocking wakeups |
138 | * and possible races. This replaces identical code in packet, raw and | 138 | * and possible races. This replaces identical code in packet, raw and |
139 | * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes | 139 | * udp, as well as the IPX AX.25 and Appletalk. It also finally fixes |
140 | * the long standing peek and read race for datagram sockets. If you | 140 | * the long standing peek and read race for datagram sockets. If you |
141 | * alter this routine remember it must be re-entrant. | 141 | * alter this routine remember it must be re-entrant. |
142 | * | 142 | * |
143 | * This function will lock the socket if a skb is returned, so the caller | 143 | * This function will lock the socket if a skb is returned, so the caller |
144 | * needs to unlock the socket in that case (usually by calling | 144 | * needs to unlock the socket in that case (usually by calling |
145 | * skb_free_datagram) | 145 | * skb_free_datagram) |
146 | * | 146 | * |
147 | * * It does not lock socket since today. This function is | 147 | * * It does not lock socket since today. This function is |
148 | * * free of race conditions. This measure should/can improve | 148 | * * free of race conditions. This measure should/can improve |
149 | * * significantly datagram socket latencies at high loads, | 149 | * * significantly datagram socket latencies at high loads, |
150 | * * when data copying to user space takes lots of time. | 150 | * * when data copying to user space takes lots of time. |
151 | * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet | 151 | * * (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet |
152 | * * 8) Great win.) | 152 | * * 8) Great win.) |
153 | * * --ANK (980729) | 153 | * * --ANK (980729) |
154 | * | 154 | * |
155 | * The order of the tests when we find no data waiting are specified | 155 | * The order of the tests when we find no data waiting are specified |
156 | * quite explicitly by POSIX 1003.1g, don't change them without having | 156 | * quite explicitly by POSIX 1003.1g, don't change them without having |
157 | * the standard around please. | 157 | * the standard around please. |
158 | */ | 158 | */ |
159 | struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, | 159 | struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, |
160 | int *peeked, int *err) | 160 | int *peeked, int *err) |
161 | { | 161 | { |
162 | struct sk_buff *skb; | 162 | struct sk_buff *skb; |
163 | long timeo; | 163 | long timeo; |
164 | /* | 164 | /* |
165 | * Caller is allowed not to check sk->sk_err before skb_recv_datagram() | 165 | * Caller is allowed not to check sk->sk_err before skb_recv_datagram() |
166 | */ | 166 | */ |
167 | int error = sock_error(sk); | 167 | int error = sock_error(sk); |
168 | 168 | ||
169 | if (error) | 169 | if (error) |
170 | goto no_packet; | 170 | goto no_packet; |
171 | 171 | ||
172 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); | 172 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
173 | 173 | ||
174 | do { | 174 | do { |
175 | /* Again only user level code calls this function, so nothing | 175 | /* Again only user level code calls this function, so nothing |
176 | * interrupt level will suddenly eat the receive_queue. | 176 | * interrupt level will suddenly eat the receive_queue. |
177 | * | 177 | * |
178 | * Look at current nfs client by the way... | 178 | * Look at current nfs client by the way... |
179 | * However, this function was corrent in any case. 8) | 179 | * However, this function was corrent in any case. 8) |
180 | */ | 180 | */ |
181 | unsigned long cpu_flags; | 181 | unsigned long cpu_flags; |
182 | 182 | ||
183 | spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); | 183 | spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); |
184 | skb = skb_peek(&sk->sk_receive_queue); | 184 | skb = skb_peek(&sk->sk_receive_queue); |
185 | if (skb) { | 185 | if (skb) { |
186 | *peeked = skb->peeked; | 186 | *peeked = skb->peeked; |
187 | if (flags & MSG_PEEK) { | 187 | if (flags & MSG_PEEK) { |
188 | skb->peeked = 1; | 188 | skb->peeked = 1; |
189 | atomic_inc(&skb->users); | 189 | atomic_inc(&skb->users); |
190 | } else | 190 | } else |
191 | __skb_unlink(skb, &sk->sk_receive_queue); | 191 | __skb_unlink(skb, &sk->sk_receive_queue); |
192 | } | 192 | } |
193 | spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); | 193 | spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); |
194 | 194 | ||
195 | if (skb) | 195 | if (skb) |
196 | return skb; | 196 | return skb; |
197 | 197 | ||
198 | /* User doesn't want to wait */ | 198 | /* User doesn't want to wait */ |
199 | error = -EAGAIN; | 199 | error = -EAGAIN; |
200 | if (!timeo) | 200 | if (!timeo) |
201 | goto no_packet; | 201 | goto no_packet; |
202 | 202 | ||
203 | } while (!wait_for_packet(sk, err, &timeo)); | 203 | } while (!wait_for_packet(sk, err, &timeo)); |
204 | 204 | ||
205 | return NULL; | 205 | return NULL; |
206 | 206 | ||
207 | no_packet: | 207 | no_packet: |
208 | *err = error; | 208 | *err = error; |
209 | return NULL; | 209 | return NULL; |
210 | } | 210 | } |
211 | EXPORT_SYMBOL(__skb_recv_datagram); | 211 | EXPORT_SYMBOL(__skb_recv_datagram); |
212 | 212 | ||
213 | struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, | 213 | struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, |
214 | int noblock, int *err) | 214 | int noblock, int *err) |
215 | { | 215 | { |
216 | int peeked; | 216 | int peeked; |
217 | 217 | ||
218 | return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | 218 | return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), |
219 | &peeked, err); | 219 | &peeked, err); |
220 | } | 220 | } |
221 | 221 | ||
222 | void skb_free_datagram(struct sock *sk, struct sk_buff *skb) | 222 | void skb_free_datagram(struct sock *sk, struct sk_buff *skb) |
223 | { | 223 | { |
224 | consume_skb(skb); | 224 | consume_skb(skb); |
225 | sk_mem_reclaim_partial(sk); | 225 | sk_mem_reclaim_partial(sk); |
226 | } | 226 | } |
227 | 227 | ||
228 | /** | 228 | /** |
229 | * skb_kill_datagram - Free a datagram skbuff forcibly | 229 | * skb_kill_datagram - Free a datagram skbuff forcibly |
230 | * @sk: socket | 230 | * @sk: socket |
231 | * @skb: datagram skbuff | 231 | * @skb: datagram skbuff |
232 | * @flags: MSG_ flags | 232 | * @flags: MSG_ flags |
233 | * | 233 | * |
234 | * This function frees a datagram skbuff that was received by | 234 | * This function frees a datagram skbuff that was received by |
235 | * skb_recv_datagram. The flags argument must match the one | 235 | * skb_recv_datagram. The flags argument must match the one |
236 | * used for skb_recv_datagram. | 236 | * used for skb_recv_datagram. |
237 | * | 237 | * |
238 | * If the MSG_PEEK flag is set, and the packet is still on the | 238 | * If the MSG_PEEK flag is set, and the packet is still on the |
239 | * receive queue of the socket, it will be taken off the queue | 239 | * receive queue of the socket, it will be taken off the queue |
240 | * before it is freed. | 240 | * before it is freed. |
241 | * | 241 | * |
242 | * This function currently only disables BH when acquiring the | 242 | * This function currently only disables BH when acquiring the |
243 | * sk_receive_queue lock. Therefore it must not be used in a | 243 | * sk_receive_queue lock. Therefore it must not be used in a |
244 | * context where that lock is acquired in an IRQ context. | 244 | * context where that lock is acquired in an IRQ context. |
245 | * | 245 | * |
246 | * It returns 0 if the packet was removed by us. | 246 | * It returns 0 if the packet was removed by us. |
247 | */ | 247 | */ |
248 | 248 | ||
249 | int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) | 249 | int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) |
250 | { | 250 | { |
251 | int err = 0; | 251 | int err = 0; |
252 | 252 | ||
253 | if (flags & MSG_PEEK) { | 253 | if (flags & MSG_PEEK) { |
254 | err = -ENOENT; | 254 | err = -ENOENT; |
255 | spin_lock_bh(&sk->sk_receive_queue.lock); | 255 | spin_lock_bh(&sk->sk_receive_queue.lock); |
256 | if (skb == skb_peek(&sk->sk_receive_queue)) { | 256 | if (skb == skb_peek(&sk->sk_receive_queue)) { |
257 | __skb_unlink(skb, &sk->sk_receive_queue); | 257 | __skb_unlink(skb, &sk->sk_receive_queue); |
258 | atomic_dec(&skb->users); | 258 | atomic_dec(&skb->users); |
259 | err = 0; | 259 | err = 0; |
260 | } | 260 | } |
261 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 261 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
262 | } | 262 | } |
263 | 263 | ||
264 | kfree_skb(skb); | 264 | kfree_skb(skb); |
265 | atomic_inc(&sk->sk_drops); | ||
265 | sk_mem_reclaim_partial(sk); | 266 | sk_mem_reclaim_partial(sk); |
266 | 267 | ||
267 | return err; | 268 | return err; |
268 | } | 269 | } |
269 | 270 | ||
270 | EXPORT_SYMBOL(skb_kill_datagram); | 271 | EXPORT_SYMBOL(skb_kill_datagram); |
271 | 272 | ||
272 | /** | 273 | /** |
273 | * skb_copy_datagram_iovec - Copy a datagram to an iovec. | 274 | * skb_copy_datagram_iovec - Copy a datagram to an iovec. |
274 | * @skb: buffer to copy | 275 | * @skb: buffer to copy |
275 | * @offset: offset in the buffer to start copying from | 276 | * @offset: offset in the buffer to start copying from |
276 | * @to: io vector to copy to | 277 | * @to: io vector to copy to |
277 | * @len: amount of data to copy from buffer to iovec | 278 | * @len: amount of data to copy from buffer to iovec |
278 | * | 279 | * |
279 | * Note: the iovec is modified during the copy. | 280 | * Note: the iovec is modified during the copy. |
280 | */ | 281 | */ |
281 | int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, | 282 | int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, |
282 | struct iovec *to, int len) | 283 | struct iovec *to, int len) |
283 | { | 284 | { |
284 | int start = skb_headlen(skb); | 285 | int start = skb_headlen(skb); |
285 | int i, copy = start - offset; | 286 | int i, copy = start - offset; |
286 | struct sk_buff *frag_iter; | 287 | struct sk_buff *frag_iter; |
287 | 288 | ||
288 | trace_skb_copy_datagram_iovec(skb, len); | 289 | trace_skb_copy_datagram_iovec(skb, len); |
289 | 290 | ||
290 | /* Copy header. */ | 291 | /* Copy header. */ |
291 | if (copy > 0) { | 292 | if (copy > 0) { |
292 | if (copy > len) | 293 | if (copy > len) |
293 | copy = len; | 294 | copy = len; |
294 | if (memcpy_toiovec(to, skb->data + offset, copy)) | 295 | if (memcpy_toiovec(to, skb->data + offset, copy)) |
295 | goto fault; | 296 | goto fault; |
296 | if ((len -= copy) == 0) | 297 | if ((len -= copy) == 0) |
297 | return 0; | 298 | return 0; |
298 | offset += copy; | 299 | offset += copy; |
299 | } | 300 | } |
300 | 301 | ||
301 | /* Copy paged appendix. Hmm... why does this look so complicated? */ | 302 | /* Copy paged appendix. Hmm... why does this look so complicated? */ |
302 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 303 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
303 | int end; | 304 | int end; |
304 | 305 | ||
305 | WARN_ON(start > offset + len); | 306 | WARN_ON(start > offset + len); |
306 | 307 | ||
307 | end = start + skb_shinfo(skb)->frags[i].size; | 308 | end = start + skb_shinfo(skb)->frags[i].size; |
308 | if ((copy = end - offset) > 0) { | 309 | if ((copy = end - offset) > 0) { |
309 | int err; | 310 | int err; |
310 | u8 *vaddr; | 311 | u8 *vaddr; |
311 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 312 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
312 | struct page *page = frag->page; | 313 | struct page *page = frag->page; |
313 | 314 | ||
314 | if (copy > len) | 315 | if (copy > len) |
315 | copy = len; | 316 | copy = len; |
316 | vaddr = kmap(page); | 317 | vaddr = kmap(page); |
317 | err = memcpy_toiovec(to, vaddr + frag->page_offset + | 318 | err = memcpy_toiovec(to, vaddr + frag->page_offset + |
318 | offset - start, copy); | 319 | offset - start, copy); |
319 | kunmap(page); | 320 | kunmap(page); |
320 | if (err) | 321 | if (err) |
321 | goto fault; | 322 | goto fault; |
322 | if (!(len -= copy)) | 323 | if (!(len -= copy)) |
323 | return 0; | 324 | return 0; |
324 | offset += copy; | 325 | offset += copy; |
325 | } | 326 | } |
326 | start = end; | 327 | start = end; |
327 | } | 328 | } |
328 | 329 | ||
329 | skb_walk_frags(skb, frag_iter) { | 330 | skb_walk_frags(skb, frag_iter) { |
330 | int end; | 331 | int end; |
331 | 332 | ||
332 | WARN_ON(start > offset + len); | 333 | WARN_ON(start > offset + len); |
333 | 334 | ||
334 | end = start + frag_iter->len; | 335 | end = start + frag_iter->len; |
335 | if ((copy = end - offset) > 0) { | 336 | if ((copy = end - offset) > 0) { |
336 | if (copy > len) | 337 | if (copy > len) |
337 | copy = len; | 338 | copy = len; |
338 | if (skb_copy_datagram_iovec(frag_iter, | 339 | if (skb_copy_datagram_iovec(frag_iter, |
339 | offset - start, | 340 | offset - start, |
340 | to, copy)) | 341 | to, copy)) |
341 | goto fault; | 342 | goto fault; |
342 | if ((len -= copy) == 0) | 343 | if ((len -= copy) == 0) |
343 | return 0; | 344 | return 0; |
344 | offset += copy; | 345 | offset += copy; |
345 | } | 346 | } |
346 | start = end; | 347 | start = end; |
347 | } | 348 | } |
348 | if (!len) | 349 | if (!len) |
349 | return 0; | 350 | return 0; |
350 | 351 | ||
351 | fault: | 352 | fault: |
352 | return -EFAULT; | 353 | return -EFAULT; |
353 | } | 354 | } |
354 | 355 | ||
355 | /** | 356 | /** |
356 | * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. | 357 | * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. |
357 | * @skb: buffer to copy | 358 | * @skb: buffer to copy |
358 | * @offset: offset in the buffer to start copying from | 359 | * @offset: offset in the buffer to start copying from |
359 | * @to: io vector to copy to | 360 | * @to: io vector to copy to |
360 | * @to_offset: offset in the io vector to start copying to | 361 | * @to_offset: offset in the io vector to start copying to |
361 | * @len: amount of data to copy from buffer to iovec | 362 | * @len: amount of data to copy from buffer to iovec |
362 | * | 363 | * |
363 | * Returns 0 or -EFAULT. | 364 | * Returns 0 or -EFAULT. |
364 | * Note: the iovec is not modified during the copy. | 365 | * Note: the iovec is not modified during the copy. |
365 | */ | 366 | */ |
366 | int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, | 367 | int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, |
367 | const struct iovec *to, int to_offset, | 368 | const struct iovec *to, int to_offset, |
368 | int len) | 369 | int len) |
369 | { | 370 | { |
370 | int start = skb_headlen(skb); | 371 | int start = skb_headlen(skb); |
371 | int i, copy = start - offset; | 372 | int i, copy = start - offset; |
372 | struct sk_buff *frag_iter; | 373 | struct sk_buff *frag_iter; |
373 | 374 | ||
374 | /* Copy header. */ | 375 | /* Copy header. */ |
375 | if (copy > 0) { | 376 | if (copy > 0) { |
376 | if (copy > len) | 377 | if (copy > len) |
377 | copy = len; | 378 | copy = len; |
378 | if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) | 379 | if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy)) |
379 | goto fault; | 380 | goto fault; |
380 | if ((len -= copy) == 0) | 381 | if ((len -= copy) == 0) |
381 | return 0; | 382 | return 0; |
382 | offset += copy; | 383 | offset += copy; |
383 | to_offset += copy; | 384 | to_offset += copy; |
384 | } | 385 | } |
385 | 386 | ||
386 | /* Copy paged appendix. Hmm... why does this look so complicated? */ | 387 | /* Copy paged appendix. Hmm... why does this look so complicated? */ |
387 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 388 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
388 | int end; | 389 | int end; |
389 | 390 | ||
390 | WARN_ON(start > offset + len); | 391 | WARN_ON(start > offset + len); |
391 | 392 | ||
392 | end = start + skb_shinfo(skb)->frags[i].size; | 393 | end = start + skb_shinfo(skb)->frags[i].size; |
393 | if ((copy = end - offset) > 0) { | 394 | if ((copy = end - offset) > 0) { |
394 | int err; | 395 | int err; |
395 | u8 *vaddr; | 396 | u8 *vaddr; |
396 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 397 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
397 | struct page *page = frag->page; | 398 | struct page *page = frag->page; |
398 | 399 | ||
399 | if (copy > len) | 400 | if (copy > len) |
400 | copy = len; | 401 | copy = len; |
401 | vaddr = kmap(page); | 402 | vaddr = kmap(page); |
402 | err = memcpy_toiovecend(to, vaddr + frag->page_offset + | 403 | err = memcpy_toiovecend(to, vaddr + frag->page_offset + |
403 | offset - start, to_offset, copy); | 404 | offset - start, to_offset, copy); |
404 | kunmap(page); | 405 | kunmap(page); |
405 | if (err) | 406 | if (err) |
406 | goto fault; | 407 | goto fault; |
407 | if (!(len -= copy)) | 408 | if (!(len -= copy)) |
408 | return 0; | 409 | return 0; |
409 | offset += copy; | 410 | offset += copy; |
410 | to_offset += copy; | 411 | to_offset += copy; |
411 | } | 412 | } |
412 | start = end; | 413 | start = end; |
413 | } | 414 | } |
414 | 415 | ||
415 | skb_walk_frags(skb, frag_iter) { | 416 | skb_walk_frags(skb, frag_iter) { |
416 | int end; | 417 | int end; |
417 | 418 | ||
418 | WARN_ON(start > offset + len); | 419 | WARN_ON(start > offset + len); |
419 | 420 | ||
420 | end = start + frag_iter->len; | 421 | end = start + frag_iter->len; |
421 | if ((copy = end - offset) > 0) { | 422 | if ((copy = end - offset) > 0) { |
422 | if (copy > len) | 423 | if (copy > len) |
423 | copy = len; | 424 | copy = len; |
424 | if (skb_copy_datagram_const_iovec(frag_iter, | 425 | if (skb_copy_datagram_const_iovec(frag_iter, |
425 | offset - start, | 426 | offset - start, |
426 | to, to_offset, | 427 | to, to_offset, |
427 | copy)) | 428 | copy)) |
428 | goto fault; | 429 | goto fault; |
429 | if ((len -= copy) == 0) | 430 | if ((len -= copy) == 0) |
430 | return 0; | 431 | return 0; |
431 | offset += copy; | 432 | offset += copy; |
432 | to_offset += copy; | 433 | to_offset += copy; |
433 | } | 434 | } |
434 | start = end; | 435 | start = end; |
435 | } | 436 | } |
436 | if (!len) | 437 | if (!len) |
437 | return 0; | 438 | return 0; |
438 | 439 | ||
439 | fault: | 440 | fault: |
440 | return -EFAULT; | 441 | return -EFAULT; |
441 | } | 442 | } |
442 | EXPORT_SYMBOL(skb_copy_datagram_const_iovec); | 443 | EXPORT_SYMBOL(skb_copy_datagram_const_iovec); |
443 | 444 | ||
444 | /** | 445 | /** |
445 | * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. | 446 | * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. |
446 | * @skb: buffer to copy | 447 | * @skb: buffer to copy |
447 | * @offset: offset in the buffer to start copying to | 448 | * @offset: offset in the buffer to start copying to |
448 | * @from: io vector to copy to | 449 | * @from: io vector to copy to |
449 | * @from_offset: offset in the io vector to start copying from | 450 | * @from_offset: offset in the io vector to start copying from |
450 | * @len: amount of data to copy to buffer from iovec | 451 | * @len: amount of data to copy to buffer from iovec |
451 | * | 452 | * |
452 | * Returns 0 or -EFAULT. | 453 | * Returns 0 or -EFAULT. |
453 | * Note: the iovec is not modified during the copy. | 454 | * Note: the iovec is not modified during the copy. |
454 | */ | 455 | */ |
455 | int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, | 456 | int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, |
456 | const struct iovec *from, int from_offset, | 457 | const struct iovec *from, int from_offset, |
457 | int len) | 458 | int len) |
458 | { | 459 | { |
459 | int start = skb_headlen(skb); | 460 | int start = skb_headlen(skb); |
460 | int i, copy = start - offset; | 461 | int i, copy = start - offset; |
461 | struct sk_buff *frag_iter; | 462 | struct sk_buff *frag_iter; |
462 | 463 | ||
463 | /* Copy header. */ | 464 | /* Copy header. */ |
464 | if (copy > 0) { | 465 | if (copy > 0) { |
465 | if (copy > len) | 466 | if (copy > len) |
466 | copy = len; | 467 | copy = len; |
467 | if (memcpy_fromiovecend(skb->data + offset, from, from_offset, | 468 | if (memcpy_fromiovecend(skb->data + offset, from, from_offset, |
468 | copy)) | 469 | copy)) |
469 | goto fault; | 470 | goto fault; |
470 | if ((len -= copy) == 0) | 471 | if ((len -= copy) == 0) |
471 | return 0; | 472 | return 0; |
472 | offset += copy; | 473 | offset += copy; |
473 | from_offset += copy; | 474 | from_offset += copy; |
474 | } | 475 | } |
475 | 476 | ||
476 | /* Copy paged appendix. Hmm... why does this look so complicated? */ | 477 | /* Copy paged appendix. Hmm... why does this look so complicated? */ |
477 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 478 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
478 | int end; | 479 | int end; |
479 | 480 | ||
480 | WARN_ON(start > offset + len); | 481 | WARN_ON(start > offset + len); |
481 | 482 | ||
482 | end = start + skb_shinfo(skb)->frags[i].size; | 483 | end = start + skb_shinfo(skb)->frags[i].size; |
483 | if ((copy = end - offset) > 0) { | 484 | if ((copy = end - offset) > 0) { |
484 | int err; | 485 | int err; |
485 | u8 *vaddr; | 486 | u8 *vaddr; |
486 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 487 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
487 | struct page *page = frag->page; | 488 | struct page *page = frag->page; |
488 | 489 | ||
489 | if (copy > len) | 490 | if (copy > len) |
490 | copy = len; | 491 | copy = len; |
491 | vaddr = kmap(page); | 492 | vaddr = kmap(page); |
492 | err = memcpy_fromiovecend(vaddr + frag->page_offset + | 493 | err = memcpy_fromiovecend(vaddr + frag->page_offset + |
493 | offset - start, | 494 | offset - start, |
494 | from, from_offset, copy); | 495 | from, from_offset, copy); |
495 | kunmap(page); | 496 | kunmap(page); |
496 | if (err) | 497 | if (err) |
497 | goto fault; | 498 | goto fault; |
498 | 499 | ||
499 | if (!(len -= copy)) | 500 | if (!(len -= copy)) |
500 | return 0; | 501 | return 0; |
501 | offset += copy; | 502 | offset += copy; |
502 | from_offset += copy; | 503 | from_offset += copy; |
503 | } | 504 | } |
504 | start = end; | 505 | start = end; |
505 | } | 506 | } |
506 | 507 | ||
507 | skb_walk_frags(skb, frag_iter) { | 508 | skb_walk_frags(skb, frag_iter) { |
508 | int end; | 509 | int end; |
509 | 510 | ||
510 | WARN_ON(start > offset + len); | 511 | WARN_ON(start > offset + len); |
511 | 512 | ||
512 | end = start + frag_iter->len; | 513 | end = start + frag_iter->len; |
513 | if ((copy = end - offset) > 0) { | 514 | if ((copy = end - offset) > 0) { |
514 | if (copy > len) | 515 | if (copy > len) |
515 | copy = len; | 516 | copy = len; |
516 | if (skb_copy_datagram_from_iovec(frag_iter, | 517 | if (skb_copy_datagram_from_iovec(frag_iter, |
517 | offset - start, | 518 | offset - start, |
518 | from, | 519 | from, |
519 | from_offset, | 520 | from_offset, |
520 | copy)) | 521 | copy)) |
521 | goto fault; | 522 | goto fault; |
522 | if ((len -= copy) == 0) | 523 | if ((len -= copy) == 0) |
523 | return 0; | 524 | return 0; |
524 | offset += copy; | 525 | offset += copy; |
525 | from_offset += copy; | 526 | from_offset += copy; |
526 | } | 527 | } |
527 | start = end; | 528 | start = end; |
528 | } | 529 | } |
529 | if (!len) | 530 | if (!len) |
530 | return 0; | 531 | return 0; |
531 | 532 | ||
532 | fault: | 533 | fault: |
533 | return -EFAULT; | 534 | return -EFAULT; |
534 | } | 535 | } |
535 | EXPORT_SYMBOL(skb_copy_datagram_from_iovec); | 536 | EXPORT_SYMBOL(skb_copy_datagram_from_iovec); |
536 | 537 | ||
537 | static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, | 538 | static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, |
538 | u8 __user *to, int len, | 539 | u8 __user *to, int len, |
539 | __wsum *csump) | 540 | __wsum *csump) |
540 | { | 541 | { |
541 | int start = skb_headlen(skb); | 542 | int start = skb_headlen(skb); |
542 | int i, copy = start - offset; | 543 | int i, copy = start - offset; |
543 | struct sk_buff *frag_iter; | 544 | struct sk_buff *frag_iter; |
544 | int pos = 0; | 545 | int pos = 0; |
545 | 546 | ||
546 | /* Copy header. */ | 547 | /* Copy header. */ |
547 | if (copy > 0) { | 548 | if (copy > 0) { |
548 | int err = 0; | 549 | int err = 0; |
549 | if (copy > len) | 550 | if (copy > len) |
550 | copy = len; | 551 | copy = len; |
551 | *csump = csum_and_copy_to_user(skb->data + offset, to, copy, | 552 | *csump = csum_and_copy_to_user(skb->data + offset, to, copy, |
552 | *csump, &err); | 553 | *csump, &err); |
553 | if (err) | 554 | if (err) |
554 | goto fault; | 555 | goto fault; |
555 | if ((len -= copy) == 0) | 556 | if ((len -= copy) == 0) |
556 | return 0; | 557 | return 0; |
557 | offset += copy; | 558 | offset += copy; |
558 | to += copy; | 559 | to += copy; |
559 | pos = copy; | 560 | pos = copy; |
560 | } | 561 | } |
561 | 562 | ||
562 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | 563 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { |
563 | int end; | 564 | int end; |
564 | 565 | ||
565 | WARN_ON(start > offset + len); | 566 | WARN_ON(start > offset + len); |
566 | 567 | ||
567 | end = start + skb_shinfo(skb)->frags[i].size; | 568 | end = start + skb_shinfo(skb)->frags[i].size; |
568 | if ((copy = end - offset) > 0) { | 569 | if ((copy = end - offset) > 0) { |
569 | __wsum csum2; | 570 | __wsum csum2; |
570 | int err = 0; | 571 | int err = 0; |
571 | u8 *vaddr; | 572 | u8 *vaddr; |
572 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; | 573 | skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; |
573 | struct page *page = frag->page; | 574 | struct page *page = frag->page; |
574 | 575 | ||
575 | if (copy > len) | 576 | if (copy > len) |
576 | copy = len; | 577 | copy = len; |
577 | vaddr = kmap(page); | 578 | vaddr = kmap(page); |
578 | csum2 = csum_and_copy_to_user(vaddr + | 579 | csum2 = csum_and_copy_to_user(vaddr + |
579 | frag->page_offset + | 580 | frag->page_offset + |
580 | offset - start, | 581 | offset - start, |
581 | to, copy, 0, &err); | 582 | to, copy, 0, &err); |
582 | kunmap(page); | 583 | kunmap(page); |
583 | if (err) | 584 | if (err) |
584 | goto fault; | 585 | goto fault; |
585 | *csump = csum_block_add(*csump, csum2, pos); | 586 | *csump = csum_block_add(*csump, csum2, pos); |
586 | if (!(len -= copy)) | 587 | if (!(len -= copy)) |
587 | return 0; | 588 | return 0; |
588 | offset += copy; | 589 | offset += copy; |
589 | to += copy; | 590 | to += copy; |
590 | pos += copy; | 591 | pos += copy; |
591 | } | 592 | } |
592 | start = end; | 593 | start = end; |
593 | } | 594 | } |
594 | 595 | ||
595 | skb_walk_frags(skb, frag_iter) { | 596 | skb_walk_frags(skb, frag_iter) { |
596 | int end; | 597 | int end; |
597 | 598 | ||
598 | WARN_ON(start > offset + len); | 599 | WARN_ON(start > offset + len); |
599 | 600 | ||
600 | end = start + frag_iter->len; | 601 | end = start + frag_iter->len; |
601 | if ((copy = end - offset) > 0) { | 602 | if ((copy = end - offset) > 0) { |
602 | __wsum csum2 = 0; | 603 | __wsum csum2 = 0; |
603 | if (copy > len) | 604 | if (copy > len) |
604 | copy = len; | 605 | copy = len; |
605 | if (skb_copy_and_csum_datagram(frag_iter, | 606 | if (skb_copy_and_csum_datagram(frag_iter, |
606 | offset - start, | 607 | offset - start, |
607 | to, copy, | 608 | to, copy, |
608 | &csum2)) | 609 | &csum2)) |
609 | goto fault; | 610 | goto fault; |
610 | *csump = csum_block_add(*csump, csum2, pos); | 611 | *csump = csum_block_add(*csump, csum2, pos); |
611 | if ((len -= copy) == 0) | 612 | if ((len -= copy) == 0) |
612 | return 0; | 613 | return 0; |
613 | offset += copy; | 614 | offset += copy; |
614 | to += copy; | 615 | to += copy; |
615 | pos += copy; | 616 | pos += copy; |
616 | } | 617 | } |
617 | start = end; | 618 | start = end; |
618 | } | 619 | } |
619 | if (!len) | 620 | if (!len) |
620 | return 0; | 621 | return 0; |
621 | 622 | ||
622 | fault: | 623 | fault: |
623 | return -EFAULT; | 624 | return -EFAULT; |
624 | } | 625 | } |
625 | 626 | ||
626 | __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) | 627 | __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) |
627 | { | 628 | { |
628 | __sum16 sum; | 629 | __sum16 sum; |
629 | 630 | ||
630 | sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); | 631 | sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); |
631 | if (likely(!sum)) { | 632 | if (likely(!sum)) { |
632 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) | 633 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) |
633 | netdev_rx_csum_fault(skb->dev); | 634 | netdev_rx_csum_fault(skb->dev); |
634 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 635 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
635 | } | 636 | } |
636 | return sum; | 637 | return sum; |
637 | } | 638 | } |
638 | EXPORT_SYMBOL(__skb_checksum_complete_head); | 639 | EXPORT_SYMBOL(__skb_checksum_complete_head); |
639 | 640 | ||
640 | __sum16 __skb_checksum_complete(struct sk_buff *skb) | 641 | __sum16 __skb_checksum_complete(struct sk_buff *skb) |
641 | { | 642 | { |
642 | return __skb_checksum_complete_head(skb, skb->len); | 643 | return __skb_checksum_complete_head(skb, skb->len); |
643 | } | 644 | } |
644 | EXPORT_SYMBOL(__skb_checksum_complete); | 645 | EXPORT_SYMBOL(__skb_checksum_complete); |
645 | 646 | ||
646 | /** | 647 | /** |
647 | * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. | 648 | * skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec. |
648 | * @skb: skbuff | 649 | * @skb: skbuff |
649 | * @hlen: hardware length | 650 | * @hlen: hardware length |
650 | * @iov: io vector | 651 | * @iov: io vector |
651 | * | 652 | * |
652 | * Caller _must_ check that skb will fit to this iovec. | 653 | * Caller _must_ check that skb will fit to this iovec. |
653 | * | 654 | * |
654 | * Returns: 0 - success. | 655 | * Returns: 0 - success. |
655 | * -EINVAL - checksum failure. | 656 | * -EINVAL - checksum failure. |
656 | * -EFAULT - fault during copy. Beware, in this case iovec | 657 | * -EFAULT - fault during copy. Beware, in this case iovec |
657 | * can be modified! | 658 | * can be modified! |
658 | */ | 659 | */ |
659 | int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, | 660 | int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, |
660 | int hlen, struct iovec *iov) | 661 | int hlen, struct iovec *iov) |
661 | { | 662 | { |
662 | __wsum csum; | 663 | __wsum csum; |
663 | int chunk = skb->len - hlen; | 664 | int chunk = skb->len - hlen; |
664 | 665 | ||
665 | if (!chunk) | 666 | if (!chunk) |
666 | return 0; | 667 | return 0; |
667 | 668 | ||
668 | /* Skip filled elements. | 669 | /* Skip filled elements. |
669 | * Pretty silly, look at memcpy_toiovec, though 8) | 670 | * Pretty silly, look at memcpy_toiovec, though 8) |
670 | */ | 671 | */ |
671 | while (!iov->iov_len) | 672 | while (!iov->iov_len) |
672 | iov++; | 673 | iov++; |
673 | 674 | ||
674 | if (iov->iov_len < chunk) { | 675 | if (iov->iov_len < chunk) { |
675 | if (__skb_checksum_complete(skb)) | 676 | if (__skb_checksum_complete(skb)) |
676 | goto csum_error; | 677 | goto csum_error; |
677 | if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) | 678 | if (skb_copy_datagram_iovec(skb, hlen, iov, chunk)) |
678 | goto fault; | 679 | goto fault; |
679 | } else { | 680 | } else { |
680 | csum = csum_partial(skb->data, hlen, skb->csum); | 681 | csum = csum_partial(skb->data, hlen, skb->csum); |
681 | if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, | 682 | if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, |
682 | chunk, &csum)) | 683 | chunk, &csum)) |
683 | goto fault; | 684 | goto fault; |
684 | if (csum_fold(csum)) | 685 | if (csum_fold(csum)) |
685 | goto csum_error; | 686 | goto csum_error; |
686 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) | 687 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) |
687 | netdev_rx_csum_fault(skb->dev); | 688 | netdev_rx_csum_fault(skb->dev); |
688 | iov->iov_len -= chunk; | 689 | iov->iov_len -= chunk; |
689 | iov->iov_base += chunk; | 690 | iov->iov_base += chunk; |
690 | } | 691 | } |
691 | return 0; | 692 | return 0; |
692 | csum_error: | 693 | csum_error: |
693 | return -EINVAL; | 694 | return -EINVAL; |
694 | fault: | 695 | fault: |
695 | return -EFAULT; | 696 | return -EFAULT; |
696 | } | 697 | } |
697 | 698 | ||
698 | /** | 699 | /** |
699 | * datagram_poll - generic datagram poll | 700 | * datagram_poll - generic datagram poll |
700 | * @file: file struct | 701 | * @file: file struct |
701 | * @sock: socket | 702 | * @sock: socket |
702 | * @wait: poll table | 703 | * @wait: poll table |
703 | * | 704 | * |
704 | * Datagram poll: Again totally generic. This also handles | 705 | * Datagram poll: Again totally generic. This also handles |
705 | * sequenced packet sockets providing the socket receive queue | 706 | * sequenced packet sockets providing the socket receive queue |
706 | * is only ever holding data ready to receive. | 707 | * is only ever holding data ready to receive. |
707 | * | 708 | * |
708 | * Note: when you _don't_ use this routine for this protocol, | 709 | * Note: when you _don't_ use this routine for this protocol, |
709 | * and you use a different write policy from sock_writeable() | 710 | * and you use a different write policy from sock_writeable() |
710 | * then please supply your own write_space callback. | 711 | * then please supply your own write_space callback. |
711 | */ | 712 | */ |
712 | unsigned int datagram_poll(struct file *file, struct socket *sock, | 713 | unsigned int datagram_poll(struct file *file, struct socket *sock, |
713 | poll_table *wait) | 714 | poll_table *wait) |
714 | { | 715 | { |
715 | struct sock *sk = sock->sk; | 716 | struct sock *sk = sock->sk; |
716 | unsigned int mask; | 717 | unsigned int mask; |
717 | 718 | ||
718 | sock_poll_wait(file, sk->sk_sleep, wait); | 719 | sock_poll_wait(file, sk->sk_sleep, wait); |
719 | mask = 0; | 720 | mask = 0; |
720 | 721 | ||
721 | /* exceptional events? */ | 722 | /* exceptional events? */ |
722 | if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) | 723 | if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) |
723 | mask |= POLLERR; | 724 | mask |= POLLERR; |
724 | if (sk->sk_shutdown & RCV_SHUTDOWN) | 725 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
725 | mask |= POLLRDHUP; | 726 | mask |= POLLRDHUP; |
726 | if (sk->sk_shutdown == SHUTDOWN_MASK) | 727 | if (sk->sk_shutdown == SHUTDOWN_MASK) |
727 | mask |= POLLHUP; | 728 | mask |= POLLHUP; |
728 | 729 | ||
729 | /* readable? */ | 730 | /* readable? */ |
730 | if (!skb_queue_empty(&sk->sk_receive_queue) || | 731 | if (!skb_queue_empty(&sk->sk_receive_queue) || |
731 | (sk->sk_shutdown & RCV_SHUTDOWN)) | 732 | (sk->sk_shutdown & RCV_SHUTDOWN)) |
732 | mask |= POLLIN | POLLRDNORM; | 733 | mask |= POLLIN | POLLRDNORM; |
733 | 734 | ||
734 | /* Connection-based need to check for termination and startup */ | 735 | /* Connection-based need to check for termination and startup */ |
735 | if (connection_based(sk)) { | 736 | if (connection_based(sk)) { |
736 | if (sk->sk_state == TCP_CLOSE) | 737 | if (sk->sk_state == TCP_CLOSE) |
737 | mask |= POLLHUP; | 738 | mask |= POLLHUP; |
738 | /* connection hasn't started yet? */ | 739 | /* connection hasn't started yet? */ |
739 | if (sk->sk_state == TCP_SYN_SENT) | 740 | if (sk->sk_state == TCP_SYN_SENT) |
740 | return mask; | 741 | return mask; |
741 | } | 742 | } |
742 | 743 | ||
743 | /* writable? */ | 744 | /* writable? */ |
744 | if (sock_writeable(sk)) | 745 | if (sock_writeable(sk)) |
745 | mask |= POLLOUT | POLLWRNORM | POLLWRBAND; | 746 | mask |= POLLOUT | POLLWRNORM | POLLWRBAND; |
746 | else | 747 | else |
747 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 748 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
748 | 749 | ||
749 | return mask; | 750 | return mask; |
750 | } | 751 | } |
751 | 752 | ||
752 | EXPORT_SYMBOL(datagram_poll); | 753 | EXPORT_SYMBOL(datagram_poll); |
753 | EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); | 754 | EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); |
754 | EXPORT_SYMBOL(skb_copy_datagram_iovec); | 755 | EXPORT_SYMBOL(skb_copy_datagram_iovec); |
755 | EXPORT_SYMBOL(skb_free_datagram); | 756 | EXPORT_SYMBOL(skb_free_datagram); |
756 | EXPORT_SYMBOL(skb_recv_datagram); | 757 | EXPORT_SYMBOL(skb_recv_datagram); |
757 | 758 |
net/ipv4/udp.c
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * The User Datagram Protocol (UDP). | 6 | * The User Datagram Protocol (UDP). |
7 | * | 7 | * |
8 | * Authors: Ross Biro | 8 | * Authors: Ross Biro |
9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 9 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | 10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
11 | * Alan Cox, <alan@lxorguk.ukuu.org.uk> | 11 | * Alan Cox, <alan@lxorguk.ukuu.org.uk> |
12 | * Hirokazu Takahashi, <taka@valinux.co.jp> | 12 | * Hirokazu Takahashi, <taka@valinux.co.jp> |
13 | * | 13 | * |
14 | * Fixes: | 14 | * Fixes: |
15 | * Alan Cox : verify_area() calls | 15 | * Alan Cox : verify_area() calls |
16 | * Alan Cox : stopped close while in use off icmp | 16 | * Alan Cox : stopped close while in use off icmp |
17 | * messages. Not a fix but a botch that | 17 | * messages. Not a fix but a botch that |
18 | * for udp at least is 'valid'. | 18 | * for udp at least is 'valid'. |
19 | * Alan Cox : Fixed icmp handling properly | 19 | * Alan Cox : Fixed icmp handling properly |
20 | * Alan Cox : Correct error for oversized datagrams | 20 | * Alan Cox : Correct error for oversized datagrams |
21 | * Alan Cox : Tidied select() semantics. | 21 | * Alan Cox : Tidied select() semantics. |
22 | * Alan Cox : udp_err() fixed properly, also now | 22 | * Alan Cox : udp_err() fixed properly, also now |
23 | * select and read wake correctly on errors | 23 | * select and read wake correctly on errors |
24 | * Alan Cox : udp_send verify_area moved to avoid mem leak | 24 | * Alan Cox : udp_send verify_area moved to avoid mem leak |
25 | * Alan Cox : UDP can count its memory | 25 | * Alan Cox : UDP can count its memory |
26 | * Alan Cox : send to an unknown connection causes | 26 | * Alan Cox : send to an unknown connection causes |
27 | * an ECONNREFUSED off the icmp, but | 27 | * an ECONNREFUSED off the icmp, but |
28 | * does NOT close. | 28 | * does NOT close. |
29 | * Alan Cox : Switched to new sk_buff handlers. No more backlog! | 29 | * Alan Cox : Switched to new sk_buff handlers. No more backlog! |
30 | * Alan Cox : Using generic datagram code. Even smaller and the PEEK | 30 | * Alan Cox : Using generic datagram code. Even smaller and the PEEK |
31 | * bug no longer crashes it. | 31 | * bug no longer crashes it. |
32 | * Fred Van Kempen : Net2e support for sk->broadcast. | 32 | * Fred Van Kempen : Net2e support for sk->broadcast. |
33 | * Alan Cox : Uses skb_free_datagram | 33 | * Alan Cox : Uses skb_free_datagram |
34 | * Alan Cox : Added get/set sockopt support. | 34 | * Alan Cox : Added get/set sockopt support. |
35 | * Alan Cox : Broadcasting without option set returns EACCES. | 35 | * Alan Cox : Broadcasting without option set returns EACCES. |
36 | * Alan Cox : No wakeup calls. Instead we now use the callbacks. | 36 | * Alan Cox : No wakeup calls. Instead we now use the callbacks. |
37 | * Alan Cox : Use ip_tos and ip_ttl | 37 | * Alan Cox : Use ip_tos and ip_ttl |
38 | * Alan Cox : SNMP Mibs | 38 | * Alan Cox : SNMP Mibs |
39 | * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. | 39 | * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. |
40 | * Matt Dillon : UDP length checks. | 40 | * Matt Dillon : UDP length checks. |
41 | * Alan Cox : Smarter af_inet used properly. | 41 | * Alan Cox : Smarter af_inet used properly. |
42 | * Alan Cox : Use new kernel side addressing. | 42 | * Alan Cox : Use new kernel side addressing. |
43 | * Alan Cox : Incorrect return on truncated datagram receive. | 43 | * Alan Cox : Incorrect return on truncated datagram receive. |
44 | * Arnt Gulbrandsen : New udp_send and stuff | 44 | * Arnt Gulbrandsen : New udp_send and stuff |
45 | * Alan Cox : Cache last socket | 45 | * Alan Cox : Cache last socket |
46 | * Alan Cox : Route cache | 46 | * Alan Cox : Route cache |
47 | * Jon Peatfield : Minor efficiency fix to sendto(). | 47 | * Jon Peatfield : Minor efficiency fix to sendto(). |
48 | * Mike Shaver : RFC1122 checks. | 48 | * Mike Shaver : RFC1122 checks. |
49 | * Alan Cox : Nonblocking error fix. | 49 | * Alan Cox : Nonblocking error fix. |
50 | * Willy Konynenberg : Transparent proxying support. | 50 | * Willy Konynenberg : Transparent proxying support. |
51 | * Mike McLagan : Routing by source | 51 | * Mike McLagan : Routing by source |
52 | * David S. Miller : New socket lookup architecture. | 52 | * David S. Miller : New socket lookup architecture. |
53 | * Last socket cache retained as it | 53 | * Last socket cache retained as it |
54 | * does have a high hit rate. | 54 | * does have a high hit rate. |
55 | * Olaf Kirch : Don't linearise iovec on sendmsg. | 55 | * Olaf Kirch : Don't linearise iovec on sendmsg. |
56 | * Andi Kleen : Some cleanups, cache destination entry | 56 | * Andi Kleen : Some cleanups, cache destination entry |
57 | * for connect. | 57 | * for connect. |
58 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. | 58 | * Vitaly E. Lavrov : Transparent proxy revived after year coma. |
59 | * Melvin Smith : Check msg_name not msg_namelen in sendto(), | 59 | * Melvin Smith : Check msg_name not msg_namelen in sendto(), |
60 | * return ENOTCONN for unconnected sockets (POSIX) | 60 | * return ENOTCONN for unconnected sockets (POSIX) |
61 | * Janos Farkas : don't deliver multi/broadcasts to a different | 61 | * Janos Farkas : don't deliver multi/broadcasts to a different |
62 | * bound-to-device socket | 62 | * bound-to-device socket |
63 | * Hirokazu Takahashi : HW checksumming for outgoing UDP | 63 | * Hirokazu Takahashi : HW checksumming for outgoing UDP |
64 | * datagrams. | 64 | * datagrams. |
65 | * Hirokazu Takahashi : sendfile() on UDP works now. | 65 | * Hirokazu Takahashi : sendfile() on UDP works now. |
66 | * Arnaldo C. Melo : convert /proc/net/udp to seq_file | 66 | * Arnaldo C. Melo : convert /proc/net/udp to seq_file |
67 | * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which | 67 | * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which |
68 | * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind | 68 | * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind |
69 | * a single port at the same time. | 69 | * a single port at the same time. |
70 | * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support | 70 | * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support |
71 | * James Chapman : Add L2TP encapsulation type. | 71 | * James Chapman : Add L2TP encapsulation type. |
72 | * | 72 | * |
73 | * | 73 | * |
74 | * This program is free software; you can redistribute it and/or | 74 | * This program is free software; you can redistribute it and/or |
75 | * modify it under the terms of the GNU General Public License | 75 | * modify it under the terms of the GNU General Public License |
76 | * as published by the Free Software Foundation; either version | 76 | * as published by the Free Software Foundation; either version |
77 | * 2 of the License, or (at your option) any later version. | 77 | * 2 of the License, or (at your option) any later version. |
78 | */ | 78 | */ |
79 | 79 | ||
80 | #include <asm/system.h> | 80 | #include <asm/system.h> |
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | #include <asm/ioctls.h> | 82 | #include <asm/ioctls.h> |
83 | #include <linux/bootmem.h> | 83 | #include <linux/bootmem.h> |
84 | #include <linux/highmem.h> | 84 | #include <linux/highmem.h> |
85 | #include <linux/swap.h> | 85 | #include <linux/swap.h> |
86 | #include <linux/types.h> | 86 | #include <linux/types.h> |
87 | #include <linux/fcntl.h> | 87 | #include <linux/fcntl.h> |
88 | #include <linux/module.h> | 88 | #include <linux/module.h> |
89 | #include <linux/socket.h> | 89 | #include <linux/socket.h> |
90 | #include <linux/sockios.h> | 90 | #include <linux/sockios.h> |
91 | #include <linux/igmp.h> | 91 | #include <linux/igmp.h> |
92 | #include <linux/in.h> | 92 | #include <linux/in.h> |
93 | #include <linux/errno.h> | 93 | #include <linux/errno.h> |
94 | #include <linux/timer.h> | 94 | #include <linux/timer.h> |
95 | #include <linux/mm.h> | 95 | #include <linux/mm.h> |
96 | #include <linux/inet.h> | 96 | #include <linux/inet.h> |
97 | #include <linux/netdevice.h> | 97 | #include <linux/netdevice.h> |
98 | #include <net/tcp_states.h> | 98 | #include <net/tcp_states.h> |
99 | #include <linux/skbuff.h> | 99 | #include <linux/skbuff.h> |
100 | #include <linux/proc_fs.h> | 100 | #include <linux/proc_fs.h> |
101 | #include <linux/seq_file.h> | 101 | #include <linux/seq_file.h> |
102 | #include <net/net_namespace.h> | 102 | #include <net/net_namespace.h> |
103 | #include <net/icmp.h> | 103 | #include <net/icmp.h> |
104 | #include <net/route.h> | 104 | #include <net/route.h> |
105 | #include <net/checksum.h> | 105 | #include <net/checksum.h> |
106 | #include <net/xfrm.h> | 106 | #include <net/xfrm.h> |
107 | #include "udp_impl.h" | 107 | #include "udp_impl.h" |
108 | 108 | ||
109 | struct udp_table udp_table __read_mostly; | 109 | struct udp_table udp_table __read_mostly; |
110 | EXPORT_SYMBOL(udp_table); | 110 | EXPORT_SYMBOL(udp_table); |
111 | 111 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
113 | EXPORT_SYMBOL(sysctl_udp_mem); | 113 | EXPORT_SYMBOL(sysctl_udp_mem); |
114 | 114 | ||
115 | int sysctl_udp_rmem_min __read_mostly; | 115 | int sysctl_udp_rmem_min __read_mostly; |
116 | EXPORT_SYMBOL(sysctl_udp_rmem_min); | 116 | EXPORT_SYMBOL(sysctl_udp_rmem_min); |
117 | 117 | ||
118 | int sysctl_udp_wmem_min __read_mostly; | 118 | int sysctl_udp_wmem_min __read_mostly; |
119 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | 119 | EXPORT_SYMBOL(sysctl_udp_wmem_min); |
120 | 120 | ||
121 | atomic_t udp_memory_allocated; | 121 | atomic_t udp_memory_allocated; |
122 | EXPORT_SYMBOL(udp_memory_allocated); | 122 | EXPORT_SYMBOL(udp_memory_allocated); |
123 | 123 | ||
124 | #define MAX_UDP_PORTS 65536 | 124 | #define MAX_UDP_PORTS 65536 |
125 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) | 125 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) |
126 | 126 | ||
127 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 127 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
128 | const struct udp_hslot *hslot, | 128 | const struct udp_hslot *hslot, |
129 | unsigned long *bitmap, | 129 | unsigned long *bitmap, |
130 | struct sock *sk, | 130 | struct sock *sk, |
131 | int (*saddr_comp)(const struct sock *sk1, | 131 | int (*saddr_comp)(const struct sock *sk1, |
132 | const struct sock *sk2), | 132 | const struct sock *sk2), |
133 | unsigned int log) | 133 | unsigned int log) |
134 | { | 134 | { |
135 | struct sock *sk2; | 135 | struct sock *sk2; |
136 | struct hlist_nulls_node *node; | 136 | struct hlist_nulls_node *node; |
137 | 137 | ||
138 | sk_nulls_for_each(sk2, node, &hslot->head) | 138 | sk_nulls_for_each(sk2, node, &hslot->head) |
139 | if (net_eq(sock_net(sk2), net) && | 139 | if (net_eq(sock_net(sk2), net) && |
140 | sk2 != sk && | 140 | sk2 != sk && |
141 | (bitmap || sk2->sk_hash == num) && | 141 | (bitmap || sk2->sk_hash == num) && |
142 | (!sk2->sk_reuse || !sk->sk_reuse) && | 142 | (!sk2->sk_reuse || !sk->sk_reuse) && |
143 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 143 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
144 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 144 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
145 | (*saddr_comp)(sk, sk2)) { | 145 | (*saddr_comp)(sk, sk2)) { |
146 | if (bitmap) | 146 | if (bitmap) |
147 | __set_bit(sk2->sk_hash >> log, bitmap); | 147 | __set_bit(sk2->sk_hash >> log, bitmap); |
148 | else | 148 | else |
149 | return 1; | 149 | return 1; |
150 | } | 150 | } |
151 | return 0; | 151 | return 0; |
152 | } | 152 | } |
153 | 153 | ||
154 | /** | 154 | /** |
155 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 | 155 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
156 | * | 156 | * |
157 | * @sk: socket struct in question | 157 | * @sk: socket struct in question |
158 | * @snum: port number to look up | 158 | * @snum: port number to look up |
159 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 159 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
160 | */ | 160 | */ |
161 | int udp_lib_get_port(struct sock *sk, unsigned short snum, | 161 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
162 | int (*saddr_comp)(const struct sock *sk1, | 162 | int (*saddr_comp)(const struct sock *sk1, |
163 | const struct sock *sk2)) | 163 | const struct sock *sk2)) |
164 | { | 164 | { |
165 | struct udp_hslot *hslot; | 165 | struct udp_hslot *hslot; |
166 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 166 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
167 | int error = 1; | 167 | int error = 1; |
168 | struct net *net = sock_net(sk); | 168 | struct net *net = sock_net(sk); |
169 | 169 | ||
170 | if (!snum) { | 170 | if (!snum) { |
171 | int low, high, remaining; | 171 | int low, high, remaining; |
172 | unsigned rand; | 172 | unsigned rand; |
173 | unsigned short first, last; | 173 | unsigned short first, last; |
174 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); | 174 | DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); |
175 | 175 | ||
176 | inet_get_local_port_range(&low, &high); | 176 | inet_get_local_port_range(&low, &high); |
177 | remaining = (high - low) + 1; | 177 | remaining = (high - low) + 1; |
178 | 178 | ||
179 | rand = net_random(); | 179 | rand = net_random(); |
180 | first = (((u64)rand * remaining) >> 32) + low; | 180 | first = (((u64)rand * remaining) >> 32) + low; |
181 | /* | 181 | /* |
182 | * force rand to be an odd multiple of UDP_HTABLE_SIZE | 182 | * force rand to be an odd multiple of UDP_HTABLE_SIZE |
183 | */ | 183 | */ |
184 | rand = (rand | 1) * (udptable->mask + 1); | 184 | rand = (rand | 1) * (udptable->mask + 1); |
185 | for (last = first + udptable->mask + 1; | 185 | for (last = first + udptable->mask + 1; |
186 | first != last; | 186 | first != last; |
187 | first++) { | 187 | first++) { |
188 | hslot = udp_hashslot(udptable, net, first); | 188 | hslot = udp_hashslot(udptable, net, first); |
189 | bitmap_zero(bitmap, PORTS_PER_CHAIN); | 189 | bitmap_zero(bitmap, PORTS_PER_CHAIN); |
190 | spin_lock_bh(&hslot->lock); | 190 | spin_lock_bh(&hslot->lock); |
191 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, | 191 | udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, |
192 | saddr_comp, udptable->log); | 192 | saddr_comp, udptable->log); |
193 | 193 | ||
194 | snum = first; | 194 | snum = first; |
195 | /* | 195 | /* |
196 | * Iterate on all possible values of snum for this hash. | 196 | * Iterate on all possible values of snum for this hash. |
197 | * Using steps of an odd multiple of UDP_HTABLE_SIZE | 197 | * Using steps of an odd multiple of UDP_HTABLE_SIZE |
198 | * give us randomization and full range coverage. | 198 | * give us randomization and full range coverage. |
199 | */ | 199 | */ |
200 | do { | 200 | do { |
201 | if (low <= snum && snum <= high && | 201 | if (low <= snum && snum <= high && |
202 | !test_bit(snum >> udptable->log, bitmap)) | 202 | !test_bit(snum >> udptable->log, bitmap)) |
203 | goto found; | 203 | goto found; |
204 | snum += rand; | 204 | snum += rand; |
205 | } while (snum != first); | 205 | } while (snum != first); |
206 | spin_unlock_bh(&hslot->lock); | 206 | spin_unlock_bh(&hslot->lock); |
207 | } | 207 | } |
208 | goto fail; | 208 | goto fail; |
209 | } else { | 209 | } else { |
210 | hslot = udp_hashslot(udptable, net, snum); | 210 | hslot = udp_hashslot(udptable, net, snum); |
211 | spin_lock_bh(&hslot->lock); | 211 | spin_lock_bh(&hslot->lock); |
212 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, | 212 | if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, |
213 | saddr_comp, 0)) | 213 | saddr_comp, 0)) |
214 | goto fail_unlock; | 214 | goto fail_unlock; |
215 | } | 215 | } |
216 | found: | 216 | found: |
217 | inet_sk(sk)->inet_num = snum; | 217 | inet_sk(sk)->inet_num = snum; |
218 | sk->sk_hash = snum; | 218 | sk->sk_hash = snum; |
219 | if (sk_unhashed(sk)) { | 219 | if (sk_unhashed(sk)) { |
220 | sk_nulls_add_node_rcu(sk, &hslot->head); | 220 | sk_nulls_add_node_rcu(sk, &hslot->head); |
221 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 221 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
222 | } | 222 | } |
223 | error = 0; | 223 | error = 0; |
224 | fail_unlock: | 224 | fail_unlock: |
225 | spin_unlock_bh(&hslot->lock); | 225 | spin_unlock_bh(&hslot->lock); |
226 | fail: | 226 | fail: |
227 | return error; | 227 | return error; |
228 | } | 228 | } |
229 | EXPORT_SYMBOL(udp_lib_get_port); | 229 | EXPORT_SYMBOL(udp_lib_get_port); |
230 | 230 | ||
231 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 231 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
232 | { | 232 | { |
233 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 233 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
234 | 234 | ||
235 | return (!ipv6_only_sock(sk2) && | 235 | return (!ipv6_only_sock(sk2) && |
236 | (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || | 236 | (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || |
237 | inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); | 237 | inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); |
238 | } | 238 | } |
239 | 239 | ||
240 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 240 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
241 | { | 241 | { |
242 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); | 242 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); |
243 | } | 243 | } |
244 | 244 | ||
245 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, | 245 | static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, |
246 | unsigned short hnum, | 246 | unsigned short hnum, |
247 | __be16 sport, __be32 daddr, __be16 dport, int dif) | 247 | __be16 sport, __be32 daddr, __be16 dport, int dif) |
248 | { | 248 | { |
249 | int score = -1; | 249 | int score = -1; |
250 | 250 | ||
251 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 251 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && |
252 | !ipv6_only_sock(sk)) { | 252 | !ipv6_only_sock(sk)) { |
253 | struct inet_sock *inet = inet_sk(sk); | 253 | struct inet_sock *inet = inet_sk(sk); |
254 | 254 | ||
255 | score = (sk->sk_family == PF_INET ? 1 : 0); | 255 | score = (sk->sk_family == PF_INET ? 1 : 0); |
256 | if (inet->inet_rcv_saddr) { | 256 | if (inet->inet_rcv_saddr) { |
257 | if (inet->inet_rcv_saddr != daddr) | 257 | if (inet->inet_rcv_saddr != daddr) |
258 | return -1; | 258 | return -1; |
259 | score += 2; | 259 | score += 2; |
260 | } | 260 | } |
261 | if (inet->inet_daddr) { | 261 | if (inet->inet_daddr) { |
262 | if (inet->inet_daddr != saddr) | 262 | if (inet->inet_daddr != saddr) |
263 | return -1; | 263 | return -1; |
264 | score += 2; | 264 | score += 2; |
265 | } | 265 | } |
266 | if (inet->inet_dport) { | 266 | if (inet->inet_dport) { |
267 | if (inet->inet_dport != sport) | 267 | if (inet->inet_dport != sport) |
268 | return -1; | 268 | return -1; |
269 | score += 2; | 269 | score += 2; |
270 | } | 270 | } |
271 | if (sk->sk_bound_dev_if) { | 271 | if (sk->sk_bound_dev_if) { |
272 | if (sk->sk_bound_dev_if != dif) | 272 | if (sk->sk_bound_dev_if != dif) |
273 | return -1; | 273 | return -1; |
274 | score += 2; | 274 | score += 2; |
275 | } | 275 | } |
276 | } | 276 | } |
277 | return score; | 277 | return score; |
278 | } | 278 | } |
279 | 279 | ||
280 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 280 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
281 | * harder than this. -DaveM | 281 | * harder than this. -DaveM |
282 | */ | 282 | */ |
283 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | 283 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, |
284 | __be16 sport, __be32 daddr, __be16 dport, | 284 | __be16 sport, __be32 daddr, __be16 dport, |
285 | int dif, struct udp_table *udptable) | 285 | int dif, struct udp_table *udptable) |
286 | { | 286 | { |
287 | struct sock *sk, *result; | 287 | struct sock *sk, *result; |
288 | struct hlist_nulls_node *node; | 288 | struct hlist_nulls_node *node; |
289 | unsigned short hnum = ntohs(dport); | 289 | unsigned short hnum = ntohs(dport); |
290 | unsigned int hash = udp_hashfn(net, hnum, udptable->mask); | 290 | unsigned int hash = udp_hashfn(net, hnum, udptable->mask); |
291 | struct udp_hslot *hslot = &udptable->hash[hash]; | 291 | struct udp_hslot *hslot = &udptable->hash[hash]; |
292 | int score, badness; | 292 | int score, badness; |
293 | 293 | ||
294 | rcu_read_lock(); | 294 | rcu_read_lock(); |
295 | begin: | 295 | begin: |
296 | result = NULL; | 296 | result = NULL; |
297 | badness = -1; | 297 | badness = -1; |
298 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { | 298 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
299 | score = compute_score(sk, net, saddr, hnum, sport, | 299 | score = compute_score(sk, net, saddr, hnum, sport, |
300 | daddr, dport, dif); | 300 | daddr, dport, dif); |
301 | if (score > badness) { | 301 | if (score > badness) { |
302 | result = sk; | 302 | result = sk; |
303 | badness = score; | 303 | badness = score; |
304 | } | 304 | } |
305 | } | 305 | } |
306 | /* | 306 | /* |
307 | * if the nulls value we got at the end of this lookup is | 307 | * if the nulls value we got at the end of this lookup is |
308 | * not the expected one, we must restart lookup. | 308 | * not the expected one, we must restart lookup. |
309 | * We probably met an item that was moved to another chain. | 309 | * We probably met an item that was moved to another chain. |
310 | */ | 310 | */ |
311 | if (get_nulls_value(node) != hash) | 311 | if (get_nulls_value(node) != hash) |
312 | goto begin; | 312 | goto begin; |
313 | 313 | ||
314 | if (result) { | 314 | if (result) { |
315 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 315 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
316 | result = NULL; | 316 | result = NULL; |
317 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, | 317 | else if (unlikely(compute_score(result, net, saddr, hnum, sport, |
318 | daddr, dport, dif) < badness)) { | 318 | daddr, dport, dif) < badness)) { |
319 | sock_put(result); | 319 | sock_put(result); |
320 | goto begin; | 320 | goto begin; |
321 | } | 321 | } |
322 | } | 322 | } |
323 | rcu_read_unlock(); | 323 | rcu_read_unlock(); |
324 | return result; | 324 | return result; |
325 | } | 325 | } |
326 | 326 | ||
327 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, | 327 | static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, |
328 | __be16 sport, __be16 dport, | 328 | __be16 sport, __be16 dport, |
329 | struct udp_table *udptable) | 329 | struct udp_table *udptable) |
330 | { | 330 | { |
331 | struct sock *sk; | 331 | struct sock *sk; |
332 | const struct iphdr *iph = ip_hdr(skb); | 332 | const struct iphdr *iph = ip_hdr(skb); |
333 | 333 | ||
334 | if (unlikely(sk = skb_steal_sock(skb))) | 334 | if (unlikely(sk = skb_steal_sock(skb))) |
335 | return sk; | 335 | return sk; |
336 | else | 336 | else |
337 | return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, | 337 | return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, |
338 | iph->daddr, dport, inet_iif(skb), | 338 | iph->daddr, dport, inet_iif(skb), |
339 | udptable); | 339 | udptable); |
340 | } | 340 | } |
341 | 341 | ||
342 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, | 342 | struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, |
343 | __be32 daddr, __be16 dport, int dif) | 343 | __be32 daddr, __be16 dport, int dif) |
344 | { | 344 | { |
345 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); | 345 | return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); |
346 | } | 346 | } |
347 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); | 347 | EXPORT_SYMBOL_GPL(udp4_lib_lookup); |
348 | 348 | ||
349 | static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, | 349 | static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, |
350 | __be16 loc_port, __be32 loc_addr, | 350 | __be16 loc_port, __be32 loc_addr, |
351 | __be16 rmt_port, __be32 rmt_addr, | 351 | __be16 rmt_port, __be32 rmt_addr, |
352 | int dif) | 352 | int dif) |
353 | { | 353 | { |
354 | struct hlist_nulls_node *node; | 354 | struct hlist_nulls_node *node; |
355 | struct sock *s = sk; | 355 | struct sock *s = sk; |
356 | unsigned short hnum = ntohs(loc_port); | 356 | unsigned short hnum = ntohs(loc_port); |
357 | 357 | ||
358 | sk_nulls_for_each_from(s, node) { | 358 | sk_nulls_for_each_from(s, node) { |
359 | struct inet_sock *inet = inet_sk(s); | 359 | struct inet_sock *inet = inet_sk(s); |
360 | 360 | ||
361 | if (!net_eq(sock_net(s), net) || | 361 | if (!net_eq(sock_net(s), net) || |
362 | s->sk_hash != hnum || | 362 | s->sk_hash != hnum || |
363 | (inet->inet_daddr && inet->inet_daddr != rmt_addr) || | 363 | (inet->inet_daddr && inet->inet_daddr != rmt_addr) || |
364 | (inet->inet_dport != rmt_port && inet->inet_dport) || | 364 | (inet->inet_dport != rmt_port && inet->inet_dport) || |
365 | (inet->inet_rcv_saddr && | 365 | (inet->inet_rcv_saddr && |
366 | inet->inet_rcv_saddr != loc_addr) || | 366 | inet->inet_rcv_saddr != loc_addr) || |
367 | ipv6_only_sock(s) || | 367 | ipv6_only_sock(s) || |
368 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) | 368 | (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) |
369 | continue; | 369 | continue; |
370 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) | 370 | if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) |
371 | continue; | 371 | continue; |
372 | goto found; | 372 | goto found; |
373 | } | 373 | } |
374 | s = NULL; | 374 | s = NULL; |
375 | found: | 375 | found: |
376 | return s; | 376 | return s; |
377 | } | 377 | } |
378 | 378 | ||
379 | /* | 379 | /* |
380 | * This routine is called by the ICMP module when it gets some | 380 | * This routine is called by the ICMP module when it gets some |
381 | * sort of error condition. If err < 0 then the socket should | 381 | * sort of error condition. If err < 0 then the socket should |
382 | * be closed and the error returned to the user. If err > 0 | 382 | * be closed and the error returned to the user. If err > 0 |
383 | * it's just the icmp type << 8 | icmp code. | 383 | * it's just the icmp type << 8 | icmp code. |
384 | * Header points to the ip header of the error packet. We move | 384 | * Header points to the ip header of the error packet. We move |
385 | * on past this. Then (as it used to claim before adjustment) | 385 | * on past this. Then (as it used to claim before adjustment) |
386 | * header points to the first 8 bytes of the udp header. We need | 386 | * header points to the first 8 bytes of the udp header. We need |
387 | * to find the appropriate port. | 387 | * to find the appropriate port. |
388 | */ | 388 | */ |
389 | 389 | ||
390 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | 390 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
391 | { | 391 | { |
392 | struct inet_sock *inet; | 392 | struct inet_sock *inet; |
393 | struct iphdr *iph = (struct iphdr *)skb->data; | 393 | struct iphdr *iph = (struct iphdr *)skb->data; |
394 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); | 394 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
395 | const int type = icmp_hdr(skb)->type; | 395 | const int type = icmp_hdr(skb)->type; |
396 | const int code = icmp_hdr(skb)->code; | 396 | const int code = icmp_hdr(skb)->code; |
397 | struct sock *sk; | 397 | struct sock *sk; |
398 | int harderr; | 398 | int harderr; |
399 | int err; | 399 | int err; |
400 | struct net *net = dev_net(skb->dev); | 400 | struct net *net = dev_net(skb->dev); |
401 | 401 | ||
402 | sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, | 402 | sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, |
403 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | 403 | iph->saddr, uh->source, skb->dev->ifindex, udptable); |
404 | if (sk == NULL) { | 404 | if (sk == NULL) { |
405 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | 405 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
406 | return; /* No socket for error */ | 406 | return; /* No socket for error */ |
407 | } | 407 | } |
408 | 408 | ||
409 | err = 0; | 409 | err = 0; |
410 | harderr = 0; | 410 | harderr = 0; |
411 | inet = inet_sk(sk); | 411 | inet = inet_sk(sk); |
412 | 412 | ||
413 | switch (type) { | 413 | switch (type) { |
414 | default: | 414 | default: |
415 | case ICMP_TIME_EXCEEDED: | 415 | case ICMP_TIME_EXCEEDED: |
416 | err = EHOSTUNREACH; | 416 | err = EHOSTUNREACH; |
417 | break; | 417 | break; |
418 | case ICMP_SOURCE_QUENCH: | 418 | case ICMP_SOURCE_QUENCH: |
419 | goto out; | 419 | goto out; |
420 | case ICMP_PARAMETERPROB: | 420 | case ICMP_PARAMETERPROB: |
421 | err = EPROTO; | 421 | err = EPROTO; |
422 | harderr = 1; | 422 | harderr = 1; |
423 | break; | 423 | break; |
424 | case ICMP_DEST_UNREACH: | 424 | case ICMP_DEST_UNREACH: |
425 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ | 425 | if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ |
426 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { | 426 | if (inet->pmtudisc != IP_PMTUDISC_DONT) { |
427 | err = EMSGSIZE; | 427 | err = EMSGSIZE; |
428 | harderr = 1; | 428 | harderr = 1; |
429 | break; | 429 | break; |
430 | } | 430 | } |
431 | goto out; | 431 | goto out; |
432 | } | 432 | } |
433 | err = EHOSTUNREACH; | 433 | err = EHOSTUNREACH; |
434 | if (code <= NR_ICMP_UNREACH) { | 434 | if (code <= NR_ICMP_UNREACH) { |
435 | harderr = icmp_err_convert[code].fatal; | 435 | harderr = icmp_err_convert[code].fatal; |
436 | err = icmp_err_convert[code].errno; | 436 | err = icmp_err_convert[code].errno; |
437 | } | 437 | } |
438 | break; | 438 | break; |
439 | } | 439 | } |
440 | 440 | ||
441 | /* | 441 | /* |
442 | * RFC1122: OK. Passes ICMP errors back to application, as per | 442 | * RFC1122: OK. Passes ICMP errors back to application, as per |
443 | * 4.1.3.3. | 443 | * 4.1.3.3. |
444 | */ | 444 | */ |
445 | if (!inet->recverr) { | 445 | if (!inet->recverr) { |
446 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 446 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
447 | goto out; | 447 | goto out; |
448 | } else { | 448 | } else { |
449 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); | 449 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); |
450 | } | 450 | } |
451 | sk->sk_err = err; | 451 | sk->sk_err = err; |
452 | sk->sk_error_report(sk); | 452 | sk->sk_error_report(sk); |
453 | out: | 453 | out: |
454 | sock_put(sk); | 454 | sock_put(sk); |
455 | } | 455 | } |
456 | 456 | ||
457 | void udp_err(struct sk_buff *skb, u32 info) | 457 | void udp_err(struct sk_buff *skb, u32 info) |
458 | { | 458 | { |
459 | __udp4_lib_err(skb, info, &udp_table); | 459 | __udp4_lib_err(skb, info, &udp_table); |
460 | } | 460 | } |
461 | 461 | ||
462 | /* | 462 | /* |
463 | * Throw away all pending data and cancel the corking. Socket is locked. | 463 | * Throw away all pending data and cancel the corking. Socket is locked. |
464 | */ | 464 | */ |
465 | void udp_flush_pending_frames(struct sock *sk) | 465 | void udp_flush_pending_frames(struct sock *sk) |
466 | { | 466 | { |
467 | struct udp_sock *up = udp_sk(sk); | 467 | struct udp_sock *up = udp_sk(sk); |
468 | 468 | ||
469 | if (up->pending) { | 469 | if (up->pending) { |
470 | up->len = 0; | 470 | up->len = 0; |
471 | up->pending = 0; | 471 | up->pending = 0; |
472 | ip_flush_pending_frames(sk); | 472 | ip_flush_pending_frames(sk); |
473 | } | 473 | } |
474 | } | 474 | } |
475 | EXPORT_SYMBOL(udp_flush_pending_frames); | 475 | EXPORT_SYMBOL(udp_flush_pending_frames); |
476 | 476 | ||
477 | /** | 477 | /** |
478 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming | 478 | * udp4_hwcsum_outgoing - handle outgoing HW checksumming |
479 | * @sk: socket we are sending on | 479 | * @sk: socket we are sending on |
480 | * @skb: sk_buff containing the filled-in UDP header | 480 | * @skb: sk_buff containing the filled-in UDP header |
481 | * (checksum field must be zeroed out) | 481 | * (checksum field must be zeroed out) |
482 | */ | 482 | */ |
483 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 483 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, |
484 | __be32 src, __be32 dst, int len) | 484 | __be32 src, __be32 dst, int len) |
485 | { | 485 | { |
486 | unsigned int offset; | 486 | unsigned int offset; |
487 | struct udphdr *uh = udp_hdr(skb); | 487 | struct udphdr *uh = udp_hdr(skb); |
488 | __wsum csum = 0; | 488 | __wsum csum = 0; |
489 | 489 | ||
490 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 490 | if (skb_queue_len(&sk->sk_write_queue) == 1) { |
491 | /* | 491 | /* |
492 | * Only one fragment on the socket. | 492 | * Only one fragment on the socket. |
493 | */ | 493 | */ |
494 | skb->csum_start = skb_transport_header(skb) - skb->head; | 494 | skb->csum_start = skb_transport_header(skb) - skb->head; |
495 | skb->csum_offset = offsetof(struct udphdr, check); | 495 | skb->csum_offset = offsetof(struct udphdr, check); |
496 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); | 496 | uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); |
497 | } else { | 497 | } else { |
498 | /* | 498 | /* |
499 | * HW-checksum won't work as there are two or more | 499 | * HW-checksum won't work as there are two or more |
500 | * fragments on the socket so that all csums of sk_buffs | 500 | * fragments on the socket so that all csums of sk_buffs |
501 | * should be together | 501 | * should be together |
502 | */ | 502 | */ |
503 | offset = skb_transport_offset(skb); | 503 | offset = skb_transport_offset(skb); |
504 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 504 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); |
505 | 505 | ||
506 | skb->ip_summed = CHECKSUM_NONE; | 506 | skb->ip_summed = CHECKSUM_NONE; |
507 | 507 | ||
508 | skb_queue_walk(&sk->sk_write_queue, skb) { | 508 | skb_queue_walk(&sk->sk_write_queue, skb) { |
509 | csum = csum_add(csum, skb->csum); | 509 | csum = csum_add(csum, skb->csum); |
510 | } | 510 | } |
511 | 511 | ||
512 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); | 512 | uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); |
513 | if (uh->check == 0) | 513 | if (uh->check == 0) |
514 | uh->check = CSUM_MANGLED_0; | 514 | uh->check = CSUM_MANGLED_0; |
515 | } | 515 | } |
516 | } | 516 | } |
517 | 517 | ||
518 | /* | 518 | /* |
519 | * Push out all pending data as one UDP datagram. Socket is locked. | 519 | * Push out all pending data as one UDP datagram. Socket is locked. |
520 | */ | 520 | */ |
521 | static int udp_push_pending_frames(struct sock *sk) | 521 | static int udp_push_pending_frames(struct sock *sk) |
522 | { | 522 | { |
523 | struct udp_sock *up = udp_sk(sk); | 523 | struct udp_sock *up = udp_sk(sk); |
524 | struct inet_sock *inet = inet_sk(sk); | 524 | struct inet_sock *inet = inet_sk(sk); |
525 | struct flowi *fl = &inet->cork.fl; | 525 | struct flowi *fl = &inet->cork.fl; |
526 | struct sk_buff *skb; | 526 | struct sk_buff *skb; |
527 | struct udphdr *uh; | 527 | struct udphdr *uh; |
528 | int err = 0; | 528 | int err = 0; |
529 | int is_udplite = IS_UDPLITE(sk); | 529 | int is_udplite = IS_UDPLITE(sk); |
530 | __wsum csum = 0; | 530 | __wsum csum = 0; |
531 | 531 | ||
532 | /* Grab the skbuff where UDP header space exists. */ | 532 | /* Grab the skbuff where UDP header space exists. */ |
533 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | 533 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) |
534 | goto out; | 534 | goto out; |
535 | 535 | ||
536 | /* | 536 | /* |
537 | * Create a UDP header | 537 | * Create a UDP header |
538 | */ | 538 | */ |
539 | uh = udp_hdr(skb); | 539 | uh = udp_hdr(skb); |
540 | uh->source = fl->fl_ip_sport; | 540 | uh->source = fl->fl_ip_sport; |
541 | uh->dest = fl->fl_ip_dport; | 541 | uh->dest = fl->fl_ip_dport; |
542 | uh->len = htons(up->len); | 542 | uh->len = htons(up->len); |
543 | uh->check = 0; | 543 | uh->check = 0; |
544 | 544 | ||
545 | if (is_udplite) /* UDP-Lite */ | 545 | if (is_udplite) /* UDP-Lite */ |
546 | csum = udplite_csum_outgoing(sk, skb); | 546 | csum = udplite_csum_outgoing(sk, skb); |
547 | 547 | ||
548 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ | 548 | else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ |
549 | 549 | ||
550 | skb->ip_summed = CHECKSUM_NONE; | 550 | skb->ip_summed = CHECKSUM_NONE; |
551 | goto send; | 551 | goto send; |
552 | 552 | ||
553 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 553 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
554 | 554 | ||
555 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); | 555 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); |
556 | goto send; | 556 | goto send; |
557 | 557 | ||
558 | } else /* `normal' UDP */ | 558 | } else /* `normal' UDP */ |
559 | csum = udp_csum_outgoing(sk, skb); | 559 | csum = udp_csum_outgoing(sk, skb); |
560 | 560 | ||
561 | /* add protocol-dependent pseudo-header */ | 561 | /* add protocol-dependent pseudo-header */ |
562 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | 562 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, |
563 | sk->sk_protocol, csum); | 563 | sk->sk_protocol, csum); |
564 | if (uh->check == 0) | 564 | if (uh->check == 0) |
565 | uh->check = CSUM_MANGLED_0; | 565 | uh->check = CSUM_MANGLED_0; |
566 | 566 | ||
567 | send: | 567 | send: |
568 | err = ip_push_pending_frames(sk); | 568 | err = ip_push_pending_frames(sk); |
569 | if (err) { | 569 | if (err) { |
570 | if (err == -ENOBUFS && !inet->recverr) { | 570 | if (err == -ENOBUFS && !inet->recverr) { |
571 | UDP_INC_STATS_USER(sock_net(sk), | 571 | UDP_INC_STATS_USER(sock_net(sk), |
572 | UDP_MIB_SNDBUFERRORS, is_udplite); | 572 | UDP_MIB_SNDBUFERRORS, is_udplite); |
573 | err = 0; | 573 | err = 0; |
574 | } | 574 | } |
575 | } else | 575 | } else |
576 | UDP_INC_STATS_USER(sock_net(sk), | 576 | UDP_INC_STATS_USER(sock_net(sk), |
577 | UDP_MIB_OUTDATAGRAMS, is_udplite); | 577 | UDP_MIB_OUTDATAGRAMS, is_udplite); |
578 | out: | 578 | out: |
579 | up->len = 0; | 579 | up->len = 0; |
580 | up->pending = 0; | 580 | up->pending = 0; |
581 | return err; | 581 | return err; |
582 | } | 582 | } |
583 | 583 | ||
584 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 584 | int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
585 | size_t len) | 585 | size_t len) |
586 | { | 586 | { |
587 | struct inet_sock *inet = inet_sk(sk); | 587 | struct inet_sock *inet = inet_sk(sk); |
588 | struct udp_sock *up = udp_sk(sk); | 588 | struct udp_sock *up = udp_sk(sk); |
589 | int ulen = len; | 589 | int ulen = len; |
590 | struct ipcm_cookie ipc; | 590 | struct ipcm_cookie ipc; |
591 | struct rtable *rt = NULL; | 591 | struct rtable *rt = NULL; |
592 | int free = 0; | 592 | int free = 0; |
593 | int connected = 0; | 593 | int connected = 0; |
594 | __be32 daddr, faddr, saddr; | 594 | __be32 daddr, faddr, saddr; |
595 | __be16 dport; | 595 | __be16 dport; |
596 | u8 tos; | 596 | u8 tos; |
597 | int err, is_udplite = IS_UDPLITE(sk); | 597 | int err, is_udplite = IS_UDPLITE(sk); |
598 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 598 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
599 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 599 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
600 | 600 | ||
601 | if (len > 0xFFFF) | 601 | if (len > 0xFFFF) |
602 | return -EMSGSIZE; | 602 | return -EMSGSIZE; |
603 | 603 | ||
604 | /* | 604 | /* |
605 | * Check the flags. | 605 | * Check the flags. |
606 | */ | 606 | */ |
607 | 607 | ||
608 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ | 608 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ |
609 | return -EOPNOTSUPP; | 609 | return -EOPNOTSUPP; |
610 | 610 | ||
611 | ipc.opt = NULL; | 611 | ipc.opt = NULL; |
612 | ipc.shtx.flags = 0; | 612 | ipc.shtx.flags = 0; |
613 | 613 | ||
614 | if (up->pending) { | 614 | if (up->pending) { |
615 | /* | 615 | /* |
616 | * There are pending frames. | 616 | * There are pending frames. |
617 | * The socket lock must be held while it's corked. | 617 | * The socket lock must be held while it's corked. |
618 | */ | 618 | */ |
619 | lock_sock(sk); | 619 | lock_sock(sk); |
620 | if (likely(up->pending)) { | 620 | if (likely(up->pending)) { |
621 | if (unlikely(up->pending != AF_INET)) { | 621 | if (unlikely(up->pending != AF_INET)) { |
622 | release_sock(sk); | 622 | release_sock(sk); |
623 | return -EINVAL; | 623 | return -EINVAL; |
624 | } | 624 | } |
625 | goto do_append_data; | 625 | goto do_append_data; |
626 | } | 626 | } |
627 | release_sock(sk); | 627 | release_sock(sk); |
628 | } | 628 | } |
629 | ulen += sizeof(struct udphdr); | 629 | ulen += sizeof(struct udphdr); |
630 | 630 | ||
631 | /* | 631 | /* |
632 | * Get and verify the address. | 632 | * Get and verify the address. |
633 | */ | 633 | */ |
634 | if (msg->msg_name) { | 634 | if (msg->msg_name) { |
635 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; | 635 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; |
636 | if (msg->msg_namelen < sizeof(*usin)) | 636 | if (msg->msg_namelen < sizeof(*usin)) |
637 | return -EINVAL; | 637 | return -EINVAL; |
638 | if (usin->sin_family != AF_INET) { | 638 | if (usin->sin_family != AF_INET) { |
639 | if (usin->sin_family != AF_UNSPEC) | 639 | if (usin->sin_family != AF_UNSPEC) |
640 | return -EAFNOSUPPORT; | 640 | return -EAFNOSUPPORT; |
641 | } | 641 | } |
642 | 642 | ||
643 | daddr = usin->sin_addr.s_addr; | 643 | daddr = usin->sin_addr.s_addr; |
644 | dport = usin->sin_port; | 644 | dport = usin->sin_port; |
645 | if (dport == 0) | 645 | if (dport == 0) |
646 | return -EINVAL; | 646 | return -EINVAL; |
647 | } else { | 647 | } else { |
648 | if (sk->sk_state != TCP_ESTABLISHED) | 648 | if (sk->sk_state != TCP_ESTABLISHED) |
649 | return -EDESTADDRREQ; | 649 | return -EDESTADDRREQ; |
650 | daddr = inet->inet_daddr; | 650 | daddr = inet->inet_daddr; |
651 | dport = inet->inet_dport; | 651 | dport = inet->inet_dport; |
652 | /* Open fast path for connected socket. | 652 | /* Open fast path for connected socket. |
653 | Route will not be used, if at least one option is set. | 653 | Route will not be used, if at least one option is set. |
654 | */ | 654 | */ |
655 | connected = 1; | 655 | connected = 1; |
656 | } | 656 | } |
657 | ipc.addr = inet->inet_saddr; | 657 | ipc.addr = inet->inet_saddr; |
658 | 658 | ||
659 | ipc.oif = sk->sk_bound_dev_if; | 659 | ipc.oif = sk->sk_bound_dev_if; |
660 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); | 660 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); |
661 | if (err) | 661 | if (err) |
662 | return err; | 662 | return err; |
663 | if (msg->msg_controllen) { | 663 | if (msg->msg_controllen) { |
664 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); | 664 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); |
665 | if (err) | 665 | if (err) |
666 | return err; | 666 | return err; |
667 | if (ipc.opt) | 667 | if (ipc.opt) |
668 | free = 1; | 668 | free = 1; |
669 | connected = 0; | 669 | connected = 0; |
670 | } | 670 | } |
671 | if (!ipc.opt) | 671 | if (!ipc.opt) |
672 | ipc.opt = inet->opt; | 672 | ipc.opt = inet->opt; |
673 | 673 | ||
674 | saddr = ipc.addr; | 674 | saddr = ipc.addr; |
675 | ipc.addr = faddr = daddr; | 675 | ipc.addr = faddr = daddr; |
676 | 676 | ||
677 | if (ipc.opt && ipc.opt->srr) { | 677 | if (ipc.opt && ipc.opt->srr) { |
678 | if (!daddr) | 678 | if (!daddr) |
679 | return -EINVAL; | 679 | return -EINVAL; |
680 | faddr = ipc.opt->faddr; | 680 | faddr = ipc.opt->faddr; |
681 | connected = 0; | 681 | connected = 0; |
682 | } | 682 | } |
683 | tos = RT_TOS(inet->tos); | 683 | tos = RT_TOS(inet->tos); |
684 | if (sock_flag(sk, SOCK_LOCALROUTE) || | 684 | if (sock_flag(sk, SOCK_LOCALROUTE) || |
685 | (msg->msg_flags & MSG_DONTROUTE) || | 685 | (msg->msg_flags & MSG_DONTROUTE) || |
686 | (ipc.opt && ipc.opt->is_strictroute)) { | 686 | (ipc.opt && ipc.opt->is_strictroute)) { |
687 | tos |= RTO_ONLINK; | 687 | tos |= RTO_ONLINK; |
688 | connected = 0; | 688 | connected = 0; |
689 | } | 689 | } |
690 | 690 | ||
691 | if (ipv4_is_multicast(daddr)) { | 691 | if (ipv4_is_multicast(daddr)) { |
692 | if (!ipc.oif) | 692 | if (!ipc.oif) |
693 | ipc.oif = inet->mc_index; | 693 | ipc.oif = inet->mc_index; |
694 | if (!saddr) | 694 | if (!saddr) |
695 | saddr = inet->mc_addr; | 695 | saddr = inet->mc_addr; |
696 | connected = 0; | 696 | connected = 0; |
697 | } | 697 | } |
698 | 698 | ||
699 | if (connected) | 699 | if (connected) |
700 | rt = (struct rtable *)sk_dst_check(sk, 0); | 700 | rt = (struct rtable *)sk_dst_check(sk, 0); |
701 | 701 | ||
702 | if (rt == NULL) { | 702 | if (rt == NULL) { |
703 | struct flowi fl = { .oif = ipc.oif, | 703 | struct flowi fl = { .oif = ipc.oif, |
704 | .mark = sk->sk_mark, | 704 | .mark = sk->sk_mark, |
705 | .nl_u = { .ip4_u = | 705 | .nl_u = { .ip4_u = |
706 | { .daddr = faddr, | 706 | { .daddr = faddr, |
707 | .saddr = saddr, | 707 | .saddr = saddr, |
708 | .tos = tos } }, | 708 | .tos = tos } }, |
709 | .proto = sk->sk_protocol, | 709 | .proto = sk->sk_protocol, |
710 | .flags = inet_sk_flowi_flags(sk), | 710 | .flags = inet_sk_flowi_flags(sk), |
711 | .uli_u = { .ports = | 711 | .uli_u = { .ports = |
712 | { .sport = inet->inet_sport, | 712 | { .sport = inet->inet_sport, |
713 | .dport = dport } } }; | 713 | .dport = dport } } }; |
714 | struct net *net = sock_net(sk); | 714 | struct net *net = sock_net(sk); |
715 | 715 | ||
716 | security_sk_classify_flow(sk, &fl); | 716 | security_sk_classify_flow(sk, &fl); |
717 | err = ip_route_output_flow(net, &rt, &fl, sk, 1); | 717 | err = ip_route_output_flow(net, &rt, &fl, sk, 1); |
718 | if (err) { | 718 | if (err) { |
719 | if (err == -ENETUNREACH) | 719 | if (err == -ENETUNREACH) |
720 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 720 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
721 | goto out; | 721 | goto out; |
722 | } | 722 | } |
723 | 723 | ||
724 | err = -EACCES; | 724 | err = -EACCES; |
725 | if ((rt->rt_flags & RTCF_BROADCAST) && | 725 | if ((rt->rt_flags & RTCF_BROADCAST) && |
726 | !sock_flag(sk, SOCK_BROADCAST)) | 726 | !sock_flag(sk, SOCK_BROADCAST)) |
727 | goto out; | 727 | goto out; |
728 | if (connected) | 728 | if (connected) |
729 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | 729 | sk_dst_set(sk, dst_clone(&rt->u.dst)); |
730 | } | 730 | } |
731 | 731 | ||
732 | if (msg->msg_flags&MSG_CONFIRM) | 732 | if (msg->msg_flags&MSG_CONFIRM) |
733 | goto do_confirm; | 733 | goto do_confirm; |
734 | back_from_confirm: | 734 | back_from_confirm: |
735 | 735 | ||
736 | saddr = rt->rt_src; | 736 | saddr = rt->rt_src; |
737 | if (!ipc.addr) | 737 | if (!ipc.addr) |
738 | daddr = ipc.addr = rt->rt_dst; | 738 | daddr = ipc.addr = rt->rt_dst; |
739 | 739 | ||
740 | lock_sock(sk); | 740 | lock_sock(sk); |
741 | if (unlikely(up->pending)) { | 741 | if (unlikely(up->pending)) { |
742 | /* The socket is already corked while preparing it. */ | 742 | /* The socket is already corked while preparing it. */ |
743 | /* ... which is an evident application bug. --ANK */ | 743 | /* ... which is an evident application bug. --ANK */ |
744 | release_sock(sk); | 744 | release_sock(sk); |
745 | 745 | ||
746 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | 746 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); |
747 | err = -EINVAL; | 747 | err = -EINVAL; |
748 | goto out; | 748 | goto out; |
749 | } | 749 | } |
750 | /* | 750 | /* |
751 | * Now cork the socket to pend data. | 751 | * Now cork the socket to pend data. |
752 | */ | 752 | */ |
753 | inet->cork.fl.fl4_dst = daddr; | 753 | inet->cork.fl.fl4_dst = daddr; |
754 | inet->cork.fl.fl_ip_dport = dport; | 754 | inet->cork.fl.fl_ip_dport = dport; |
755 | inet->cork.fl.fl4_src = saddr; | 755 | inet->cork.fl.fl4_src = saddr; |
756 | inet->cork.fl.fl_ip_sport = inet->inet_sport; | 756 | inet->cork.fl.fl_ip_sport = inet->inet_sport; |
757 | up->pending = AF_INET; | 757 | up->pending = AF_INET; |
758 | 758 | ||
759 | do_append_data: | 759 | do_append_data: |
760 | up->len += ulen; | 760 | up->len += ulen; |
761 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | 761 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
762 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, | 762 | err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, |
763 | sizeof(struct udphdr), &ipc, &rt, | 763 | sizeof(struct udphdr), &ipc, &rt, |
764 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 764 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
765 | if (err) | 765 | if (err) |
766 | udp_flush_pending_frames(sk); | 766 | udp_flush_pending_frames(sk); |
767 | else if (!corkreq) | 767 | else if (!corkreq) |
768 | err = udp_push_pending_frames(sk); | 768 | err = udp_push_pending_frames(sk); |
769 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | 769 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) |
770 | up->pending = 0; | 770 | up->pending = 0; |
771 | release_sock(sk); | 771 | release_sock(sk); |
772 | 772 | ||
773 | out: | 773 | out: |
774 | ip_rt_put(rt); | 774 | ip_rt_put(rt); |
775 | if (free) | 775 | if (free) |
776 | kfree(ipc.opt); | 776 | kfree(ipc.opt); |
777 | if (!err) | 777 | if (!err) |
778 | return len; | 778 | return len; |
779 | /* | 779 | /* |
780 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | 780 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting |
781 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | 781 | * ENOBUFS might not be good (it's not tunable per se), but otherwise |
782 | * we don't have a good statistic (IpOutDiscards but it can be too many | 782 | * we don't have a good statistic (IpOutDiscards but it can be too many |
783 | * things). We could add another new stat but at least for now that | 783 | * things). We could add another new stat but at least for now that |
784 | * seems like overkill. | 784 | * seems like overkill. |
785 | */ | 785 | */ |
786 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 786 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
787 | UDP_INC_STATS_USER(sock_net(sk), | 787 | UDP_INC_STATS_USER(sock_net(sk), |
788 | UDP_MIB_SNDBUFERRORS, is_udplite); | 788 | UDP_MIB_SNDBUFERRORS, is_udplite); |
789 | } | 789 | } |
790 | return err; | 790 | return err; |
791 | 791 | ||
792 | do_confirm: | 792 | do_confirm: |
793 | dst_confirm(&rt->u.dst); | 793 | dst_confirm(&rt->u.dst); |
794 | if (!(msg->msg_flags&MSG_PROBE) || len) | 794 | if (!(msg->msg_flags&MSG_PROBE) || len) |
795 | goto back_from_confirm; | 795 | goto back_from_confirm; |
796 | err = 0; | 796 | err = 0; |
797 | goto out; | 797 | goto out; |
798 | } | 798 | } |
799 | EXPORT_SYMBOL(udp_sendmsg); | 799 | EXPORT_SYMBOL(udp_sendmsg); |
800 | 800 | ||
801 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | 801 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
802 | size_t size, int flags) | 802 | size_t size, int flags) |
803 | { | 803 | { |
804 | struct udp_sock *up = udp_sk(sk); | 804 | struct udp_sock *up = udp_sk(sk); |
805 | int ret; | 805 | int ret; |
806 | 806 | ||
807 | if (!up->pending) { | 807 | if (!up->pending) { |
808 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; | 808 | struct msghdr msg = { .msg_flags = flags|MSG_MORE }; |
809 | 809 | ||
810 | /* Call udp_sendmsg to specify destination address which | 810 | /* Call udp_sendmsg to specify destination address which |
811 | * sendpage interface can't pass. | 811 | * sendpage interface can't pass. |
812 | * This will succeed only when the socket is connected. | 812 | * This will succeed only when the socket is connected. |
813 | */ | 813 | */ |
814 | ret = udp_sendmsg(NULL, sk, &msg, 0); | 814 | ret = udp_sendmsg(NULL, sk, &msg, 0); |
815 | if (ret < 0) | 815 | if (ret < 0) |
816 | return ret; | 816 | return ret; |
817 | } | 817 | } |
818 | 818 | ||
819 | lock_sock(sk); | 819 | lock_sock(sk); |
820 | 820 | ||
821 | if (unlikely(!up->pending)) { | 821 | if (unlikely(!up->pending)) { |
822 | release_sock(sk); | 822 | release_sock(sk); |
823 | 823 | ||
824 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); | 824 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); |
825 | return -EINVAL; | 825 | return -EINVAL; |
826 | } | 826 | } |
827 | 827 | ||
828 | ret = ip_append_page(sk, page, offset, size, flags); | 828 | ret = ip_append_page(sk, page, offset, size, flags); |
829 | if (ret == -EOPNOTSUPP) { | 829 | if (ret == -EOPNOTSUPP) { |
830 | release_sock(sk); | 830 | release_sock(sk); |
831 | return sock_no_sendpage(sk->sk_socket, page, offset, | 831 | return sock_no_sendpage(sk->sk_socket, page, offset, |
832 | size, flags); | 832 | size, flags); |
833 | } | 833 | } |
834 | if (ret < 0) { | 834 | if (ret < 0) { |
835 | udp_flush_pending_frames(sk); | 835 | udp_flush_pending_frames(sk); |
836 | goto out; | 836 | goto out; |
837 | } | 837 | } |
838 | 838 | ||
839 | up->len += size; | 839 | up->len += size; |
840 | if (!(up->corkflag || (flags&MSG_MORE))) | 840 | if (!(up->corkflag || (flags&MSG_MORE))) |
841 | ret = udp_push_pending_frames(sk); | 841 | ret = udp_push_pending_frames(sk); |
842 | if (!ret) | 842 | if (!ret) |
843 | ret = size; | 843 | ret = size; |
844 | out: | 844 | out: |
845 | release_sock(sk); | 845 | release_sock(sk); |
846 | return ret; | 846 | return ret; |
847 | } | 847 | } |
848 | 848 | ||
849 | 849 | ||
850 | /** | 850 | /** |
851 | * first_packet_length - return length of first packet in receive queue | 851 | * first_packet_length - return length of first packet in receive queue |
852 | * @sk: socket | 852 | * @sk: socket |
853 | * | 853 | * |
854 | * Drops all bad checksum frames, until a valid one is found. | 854 | * Drops all bad checksum frames, until a valid one is found. |
855 | * Returns the length of found skb, or 0 if none is found. | 855 | * Returns the length of found skb, or 0 if none is found. |
856 | */ | 856 | */ |
857 | static unsigned int first_packet_length(struct sock *sk) | 857 | static unsigned int first_packet_length(struct sock *sk) |
858 | { | 858 | { |
859 | struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; | 859 | struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; |
860 | struct sk_buff *skb; | 860 | struct sk_buff *skb; |
861 | unsigned int res; | 861 | unsigned int res; |
862 | 862 | ||
863 | __skb_queue_head_init(&list_kill); | 863 | __skb_queue_head_init(&list_kill); |
864 | 864 | ||
865 | spin_lock_bh(&rcvq->lock); | 865 | spin_lock_bh(&rcvq->lock); |
866 | while ((skb = skb_peek(rcvq)) != NULL && | 866 | while ((skb = skb_peek(rcvq)) != NULL && |
867 | udp_lib_checksum_complete(skb)) { | 867 | udp_lib_checksum_complete(skb)) { |
868 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, | 868 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, |
869 | IS_UDPLITE(sk)); | 869 | IS_UDPLITE(sk)); |
870 | atomic_inc(&sk->sk_drops); | ||
870 | __skb_unlink(skb, rcvq); | 871 | __skb_unlink(skb, rcvq); |
871 | __skb_queue_tail(&list_kill, skb); | 872 | __skb_queue_tail(&list_kill, skb); |
872 | } | 873 | } |
873 | res = skb ? skb->len : 0; | 874 | res = skb ? skb->len : 0; |
874 | spin_unlock_bh(&rcvq->lock); | 875 | spin_unlock_bh(&rcvq->lock); |
875 | 876 | ||
876 | if (!skb_queue_empty(&list_kill)) { | 877 | if (!skb_queue_empty(&list_kill)) { |
877 | lock_sock(sk); | 878 | lock_sock(sk); |
878 | __skb_queue_purge(&list_kill); | 879 | __skb_queue_purge(&list_kill); |
879 | sk_mem_reclaim_partial(sk); | 880 | sk_mem_reclaim_partial(sk); |
880 | release_sock(sk); | 881 | release_sock(sk); |
881 | } | 882 | } |
882 | return res; | 883 | return res; |
883 | } | 884 | } |
884 | 885 | ||
885 | /* | 886 | /* |
886 | * IOCTL requests applicable to the UDP protocol | 887 | * IOCTL requests applicable to the UDP protocol |
887 | */ | 888 | */ |
888 | 889 | ||
889 | int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 890 | int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
890 | { | 891 | { |
891 | switch (cmd) { | 892 | switch (cmd) { |
892 | case SIOCOUTQ: | 893 | case SIOCOUTQ: |
893 | { | 894 | { |
894 | int amount = sk_wmem_alloc_get(sk); | 895 | int amount = sk_wmem_alloc_get(sk); |
895 | 896 | ||
896 | return put_user(amount, (int __user *)arg); | 897 | return put_user(amount, (int __user *)arg); |
897 | } | 898 | } |
898 | 899 | ||
899 | case SIOCINQ: | 900 | case SIOCINQ: |
900 | { | 901 | { |
901 | unsigned int amount = first_packet_length(sk); | 902 | unsigned int amount = first_packet_length(sk); |
902 | 903 | ||
903 | if (amount) | 904 | if (amount) |
904 | /* | 905 | /* |
905 | * We will only return the amount | 906 | * We will only return the amount |
906 | * of this packet since that is all | 907 | * of this packet since that is all |
907 | * that will be read. | 908 | * that will be read. |
908 | */ | 909 | */ |
909 | amount -= sizeof(struct udphdr); | 910 | amount -= sizeof(struct udphdr); |
910 | 911 | ||
911 | return put_user(amount, (int __user *)arg); | 912 | return put_user(amount, (int __user *)arg); |
912 | } | 913 | } |
913 | 914 | ||
914 | default: | 915 | default: |
915 | return -ENOIOCTLCMD; | 916 | return -ENOIOCTLCMD; |
916 | } | 917 | } |
917 | 918 | ||
918 | return 0; | 919 | return 0; |
919 | } | 920 | } |
920 | EXPORT_SYMBOL(udp_ioctl); | 921 | EXPORT_SYMBOL(udp_ioctl); |
921 | 922 | ||
922 | /* | 923 | /* |
923 | * This should be easy, if there is something there we | 924 | * This should be easy, if there is something there we |
924 | * return it, otherwise we block. | 925 | * return it, otherwise we block. |
925 | */ | 926 | */ |
926 | 927 | ||
927 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | 928 | int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
928 | size_t len, int noblock, int flags, int *addr_len) | 929 | size_t len, int noblock, int flags, int *addr_len) |
929 | { | 930 | { |
930 | struct inet_sock *inet = inet_sk(sk); | 931 | struct inet_sock *inet = inet_sk(sk); |
931 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; | 932 | struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; |
932 | struct sk_buff *skb; | 933 | struct sk_buff *skb; |
933 | unsigned int ulen, copied; | 934 | unsigned int ulen, copied; |
934 | int peeked; | 935 | int peeked; |
935 | int err; | 936 | int err; |
936 | int is_udplite = IS_UDPLITE(sk); | 937 | int is_udplite = IS_UDPLITE(sk); |
937 | 938 | ||
938 | /* | 939 | /* |
939 | * Check any passed addresses | 940 | * Check any passed addresses |
940 | */ | 941 | */ |
941 | if (addr_len) | 942 | if (addr_len) |
942 | *addr_len = sizeof(*sin); | 943 | *addr_len = sizeof(*sin); |
943 | 944 | ||
944 | if (flags & MSG_ERRQUEUE) | 945 | if (flags & MSG_ERRQUEUE) |
945 | return ip_recv_error(sk, msg, len); | 946 | return ip_recv_error(sk, msg, len); |
946 | 947 | ||
947 | try_again: | 948 | try_again: |
948 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | 949 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), |
949 | &peeked, &err); | 950 | &peeked, &err); |
950 | if (!skb) | 951 | if (!skb) |
951 | goto out; | 952 | goto out; |
952 | 953 | ||
953 | ulen = skb->len - sizeof(struct udphdr); | 954 | ulen = skb->len - sizeof(struct udphdr); |
954 | copied = len; | 955 | copied = len; |
955 | if (copied > ulen) | 956 | if (copied > ulen) |
956 | copied = ulen; | 957 | copied = ulen; |
957 | else if (copied < ulen) | 958 | else if (copied < ulen) |
958 | msg->msg_flags |= MSG_TRUNC; | 959 | msg->msg_flags |= MSG_TRUNC; |
959 | 960 | ||
960 | /* | 961 | /* |
961 | * If checksum is needed at all, try to do it while copying the | 962 | * If checksum is needed at all, try to do it while copying the |
962 | * data. If the data is truncated, or if we only want a partial | 963 | * data. If the data is truncated, or if we only want a partial |
963 | * coverage checksum (UDP-Lite), do it before the copy. | 964 | * coverage checksum (UDP-Lite), do it before the copy. |
964 | */ | 965 | */ |
965 | 966 | ||
966 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | 967 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { |
967 | if (udp_lib_checksum_complete(skb)) | 968 | if (udp_lib_checksum_complete(skb)) |
968 | goto csum_copy_err; | 969 | goto csum_copy_err; |
969 | } | 970 | } |
970 | 971 | ||
971 | if (skb_csum_unnecessary(skb)) | 972 | if (skb_csum_unnecessary(skb)) |
972 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 973 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
973 | msg->msg_iov, copied); | 974 | msg->msg_iov, copied); |
974 | else { | 975 | else { |
975 | err = skb_copy_and_csum_datagram_iovec(skb, | 976 | err = skb_copy_and_csum_datagram_iovec(skb, |
976 | sizeof(struct udphdr), | 977 | sizeof(struct udphdr), |
977 | msg->msg_iov); | 978 | msg->msg_iov); |
978 | 979 | ||
979 | if (err == -EINVAL) | 980 | if (err == -EINVAL) |
980 | goto csum_copy_err; | 981 | goto csum_copy_err; |
981 | } | 982 | } |
982 | 983 | ||
983 | if (err) | 984 | if (err) |
984 | goto out_free; | 985 | goto out_free; |
985 | 986 | ||
986 | if (!peeked) | 987 | if (!peeked) |
987 | UDP_INC_STATS_USER(sock_net(sk), | 988 | UDP_INC_STATS_USER(sock_net(sk), |
988 | UDP_MIB_INDATAGRAMS, is_udplite); | 989 | UDP_MIB_INDATAGRAMS, is_udplite); |
989 | 990 | ||
990 | sock_recv_ts_and_drops(msg, sk, skb); | 991 | sock_recv_ts_and_drops(msg, sk, skb); |
991 | 992 | ||
992 | /* Copy the address. */ | 993 | /* Copy the address. */ |
993 | if (sin) { | 994 | if (sin) { |
994 | sin->sin_family = AF_INET; | 995 | sin->sin_family = AF_INET; |
995 | sin->sin_port = udp_hdr(skb)->source; | 996 | sin->sin_port = udp_hdr(skb)->source; |
996 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | 997 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
997 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | 998 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); |
998 | } | 999 | } |
999 | if (inet->cmsg_flags) | 1000 | if (inet->cmsg_flags) |
1000 | ip_cmsg_recv(msg, skb); | 1001 | ip_cmsg_recv(msg, skb); |
1001 | 1002 | ||
1002 | err = copied; | 1003 | err = copied; |
1003 | if (flags & MSG_TRUNC) | 1004 | if (flags & MSG_TRUNC) |
1004 | err = ulen; | 1005 | err = ulen; |
1005 | 1006 | ||
1006 | out_free: | 1007 | out_free: |
1007 | lock_sock(sk); | 1008 | lock_sock(sk); |
1008 | skb_free_datagram(sk, skb); | 1009 | skb_free_datagram(sk, skb); |
1009 | release_sock(sk); | 1010 | release_sock(sk); |
1010 | out: | 1011 | out: |
1011 | return err; | 1012 | return err; |
1012 | 1013 | ||
1013 | csum_copy_err: | 1014 | csum_copy_err: |
1014 | lock_sock(sk); | 1015 | lock_sock(sk); |
1015 | if (!skb_kill_datagram(sk, skb, flags)) | 1016 | if (!skb_kill_datagram(sk, skb, flags)) |
1016 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1017 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1017 | release_sock(sk); | 1018 | release_sock(sk); |
1018 | 1019 | ||
1019 | if (noblock) | 1020 | if (noblock) |
1020 | return -EAGAIN; | 1021 | return -EAGAIN; |
1021 | goto try_again; | 1022 | goto try_again; |
1022 | } | 1023 | } |
1023 | 1024 | ||
1024 | 1025 | ||
1025 | int udp_disconnect(struct sock *sk, int flags) | 1026 | int udp_disconnect(struct sock *sk, int flags) |
1026 | { | 1027 | { |
1027 | struct inet_sock *inet = inet_sk(sk); | 1028 | struct inet_sock *inet = inet_sk(sk); |
1028 | /* | 1029 | /* |
1029 | * 1003.1g - break association. | 1030 | * 1003.1g - break association. |
1030 | */ | 1031 | */ |
1031 | 1032 | ||
1032 | sk->sk_state = TCP_CLOSE; | 1033 | sk->sk_state = TCP_CLOSE; |
1033 | inet->inet_daddr = 0; | 1034 | inet->inet_daddr = 0; |
1034 | inet->inet_dport = 0; | 1035 | inet->inet_dport = 0; |
1035 | sk->sk_bound_dev_if = 0; | 1036 | sk->sk_bound_dev_if = 0; |
1036 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) | 1037 | if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) |
1037 | inet_reset_saddr(sk); | 1038 | inet_reset_saddr(sk); |
1038 | 1039 | ||
1039 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { | 1040 | if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { |
1040 | sk->sk_prot->unhash(sk); | 1041 | sk->sk_prot->unhash(sk); |
1041 | inet->inet_sport = 0; | 1042 | inet->inet_sport = 0; |
1042 | } | 1043 | } |
1043 | sk_dst_reset(sk); | 1044 | sk_dst_reset(sk); |
1044 | return 0; | 1045 | return 0; |
1045 | } | 1046 | } |
1046 | EXPORT_SYMBOL(udp_disconnect); | 1047 | EXPORT_SYMBOL(udp_disconnect); |
1047 | 1048 | ||
1048 | void udp_lib_unhash(struct sock *sk) | 1049 | void udp_lib_unhash(struct sock *sk) |
1049 | { | 1050 | { |
1050 | if (sk_hashed(sk)) { | 1051 | if (sk_hashed(sk)) { |
1051 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 1052 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
1052 | struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), | 1053 | struct udp_hslot *hslot = udp_hashslot(udptable, sock_net(sk), |
1053 | sk->sk_hash); | 1054 | sk->sk_hash); |
1054 | 1055 | ||
1055 | spin_lock_bh(&hslot->lock); | 1056 | spin_lock_bh(&hslot->lock); |
1056 | if (sk_nulls_del_node_init_rcu(sk)) { | 1057 | if (sk_nulls_del_node_init_rcu(sk)) { |
1057 | inet_sk(sk)->inet_num = 0; | 1058 | inet_sk(sk)->inet_num = 0; |
1058 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 1059 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
1059 | } | 1060 | } |
1060 | spin_unlock_bh(&hslot->lock); | 1061 | spin_unlock_bh(&hslot->lock); |
1061 | } | 1062 | } |
1062 | } | 1063 | } |
1063 | EXPORT_SYMBOL(udp_lib_unhash); | 1064 | EXPORT_SYMBOL(udp_lib_unhash); |
1064 | 1065 | ||
1065 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 1066 | static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1066 | { | 1067 | { |
1067 | int rc = sock_queue_rcv_skb(sk, skb); | 1068 | int rc = sock_queue_rcv_skb(sk, skb); |
1068 | 1069 | ||
1069 | if (rc < 0) { | 1070 | if (rc < 0) { |
1070 | int is_udplite = IS_UDPLITE(sk); | 1071 | int is_udplite = IS_UDPLITE(sk); |
1071 | 1072 | ||
1072 | /* Note that an ENOMEM error is charged twice */ | 1073 | /* Note that an ENOMEM error is charged twice */ |
1073 | if (rc == -ENOMEM) | 1074 | if (rc == -ENOMEM) |
1074 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, | 1075 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, |
1075 | is_udplite); | 1076 | is_udplite); |
1076 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1077 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1077 | kfree_skb(skb); | 1078 | kfree_skb(skb); |
1078 | return -1; | 1079 | return -1; |
1079 | } | 1080 | } |
1080 | 1081 | ||
1081 | return 0; | 1082 | return 0; |
1082 | 1083 | ||
1083 | } | 1084 | } |
1084 | 1085 | ||
1085 | /* returns: | 1086 | /* returns: |
1086 | * -1: error | 1087 | * -1: error |
1087 | * 0: success | 1088 | * 0: success |
1088 | * >0: "udp encap" protocol resubmission | 1089 | * >0: "udp encap" protocol resubmission |
1089 | * | 1090 | * |
1090 | * Note that in the success and error cases, the skb is assumed to | 1091 | * Note that in the success and error cases, the skb is assumed to |
1091 | * have either been requeued or freed. | 1092 | * have either been requeued or freed. |
1092 | */ | 1093 | */ |
1093 | int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 1094 | int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1094 | { | 1095 | { |
1095 | struct udp_sock *up = udp_sk(sk); | 1096 | struct udp_sock *up = udp_sk(sk); |
1096 | int rc; | 1097 | int rc; |
1097 | int is_udplite = IS_UDPLITE(sk); | 1098 | int is_udplite = IS_UDPLITE(sk); |
1098 | 1099 | ||
1099 | /* | 1100 | /* |
1100 | * Charge it to the socket, dropping if the queue is full. | 1101 | * Charge it to the socket, dropping if the queue is full. |
1101 | */ | 1102 | */ |
1102 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) | 1103 | if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) |
1103 | goto drop; | 1104 | goto drop; |
1104 | nf_reset(skb); | 1105 | nf_reset(skb); |
1105 | 1106 | ||
1106 | if (up->encap_type) { | 1107 | if (up->encap_type) { |
1107 | /* | 1108 | /* |
1108 | * This is an encapsulation socket so pass the skb to | 1109 | * This is an encapsulation socket so pass the skb to |
1109 | * the socket's udp_encap_rcv() hook. Otherwise, just | 1110 | * the socket's udp_encap_rcv() hook. Otherwise, just |
1110 | * fall through and pass this up the UDP socket. | 1111 | * fall through and pass this up the UDP socket. |
1111 | * up->encap_rcv() returns the following value: | 1112 | * up->encap_rcv() returns the following value: |
1112 | * =0 if skb was successfully passed to the encap | 1113 | * =0 if skb was successfully passed to the encap |
1113 | * handler or was discarded by it. | 1114 | * handler or was discarded by it. |
1114 | * >0 if skb should be passed on to UDP. | 1115 | * >0 if skb should be passed on to UDP. |
1115 | * <0 if skb should be resubmitted as proto -N | 1116 | * <0 if skb should be resubmitted as proto -N |
1116 | */ | 1117 | */ |
1117 | 1118 | ||
1118 | /* if we're overly short, let UDP handle it */ | 1119 | /* if we're overly short, let UDP handle it */ |
1119 | if (skb->len > sizeof(struct udphdr) && | 1120 | if (skb->len > sizeof(struct udphdr) && |
1120 | up->encap_rcv != NULL) { | 1121 | up->encap_rcv != NULL) { |
1121 | int ret; | 1122 | int ret; |
1122 | 1123 | ||
1123 | ret = (*up->encap_rcv)(sk, skb); | 1124 | ret = (*up->encap_rcv)(sk, skb); |
1124 | if (ret <= 0) { | 1125 | if (ret <= 0) { |
1125 | UDP_INC_STATS_BH(sock_net(sk), | 1126 | UDP_INC_STATS_BH(sock_net(sk), |
1126 | UDP_MIB_INDATAGRAMS, | 1127 | UDP_MIB_INDATAGRAMS, |
1127 | is_udplite); | 1128 | is_udplite); |
1128 | return -ret; | 1129 | return -ret; |
1129 | } | 1130 | } |
1130 | } | 1131 | } |
1131 | 1132 | ||
1132 | /* FALLTHROUGH -- it's a UDP Packet */ | 1133 | /* FALLTHROUGH -- it's a UDP Packet */ |
1133 | } | 1134 | } |
1134 | 1135 | ||
1135 | /* | 1136 | /* |
1136 | * UDP-Lite specific tests, ignored on UDP sockets | 1137 | * UDP-Lite specific tests, ignored on UDP sockets |
1137 | */ | 1138 | */ |
1138 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | 1139 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { |
1139 | 1140 | ||
1140 | /* | 1141 | /* |
1141 | * MIB statistics other than incrementing the error count are | 1142 | * MIB statistics other than incrementing the error count are |
1142 | * disabled for the following two types of errors: these depend | 1143 | * disabled for the following two types of errors: these depend |
1143 | * on the application settings, not on the functioning of the | 1144 | * on the application settings, not on the functioning of the |
1144 | * protocol stack as such. | 1145 | * protocol stack as such. |
1145 | * | 1146 | * |
1146 | * RFC 3828 here recommends (sec 3.3): "There should also be a | 1147 | * RFC 3828 here recommends (sec 3.3): "There should also be a |
1147 | * way ... to ... at least let the receiving application block | 1148 | * way ... to ... at least let the receiving application block |
1148 | * delivery of packets with coverage values less than a value | 1149 | * delivery of packets with coverage values less than a value |
1149 | * provided by the application." | 1150 | * provided by the application." |
1150 | */ | 1151 | */ |
1151 | if (up->pcrlen == 0) { /* full coverage was set */ | 1152 | if (up->pcrlen == 0) { /* full coverage was set */ |
1152 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " | 1153 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " |
1153 | "%d while full coverage %d requested\n", | 1154 | "%d while full coverage %d requested\n", |
1154 | UDP_SKB_CB(skb)->cscov, skb->len); | 1155 | UDP_SKB_CB(skb)->cscov, skb->len); |
1155 | goto drop; | 1156 | goto drop; |
1156 | } | 1157 | } |
1157 | /* The next case involves violating the min. coverage requested | 1158 | /* The next case involves violating the min. coverage requested |
1158 | * by the receiver. This is subtle: if receiver wants x and x is | 1159 | * by the receiver. This is subtle: if receiver wants x and x is |
1159 | * greater than the buffersize/MTU then receiver will complain | 1160 | * greater than the buffersize/MTU then receiver will complain |
1160 | * that it wants x while sender emits packets of smaller size y. | 1161 | * that it wants x while sender emits packets of smaller size y. |
1161 | * Therefore the above ...()->partial_cov statement is essential. | 1162 | * Therefore the above ...()->partial_cov statement is essential. |
1162 | */ | 1163 | */ |
1163 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | 1164 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { |
1164 | LIMIT_NETDEBUG(KERN_WARNING | 1165 | LIMIT_NETDEBUG(KERN_WARNING |
1165 | "UDPLITE: coverage %d too small, need min %d\n", | 1166 | "UDPLITE: coverage %d too small, need min %d\n", |
1166 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | 1167 | UDP_SKB_CB(skb)->cscov, up->pcrlen); |
1167 | goto drop; | 1168 | goto drop; |
1168 | } | 1169 | } |
1169 | } | 1170 | } |
1170 | 1171 | ||
1171 | if (sk->sk_filter) { | 1172 | if (sk->sk_filter) { |
1172 | if (udp_lib_checksum_complete(skb)) | 1173 | if (udp_lib_checksum_complete(skb)) |
1173 | goto drop; | 1174 | goto drop; |
1174 | } | 1175 | } |
1175 | 1176 | ||
1176 | rc = 0; | 1177 | rc = 0; |
1177 | 1178 | ||
1178 | bh_lock_sock(sk); | 1179 | bh_lock_sock(sk); |
1179 | if (!sock_owned_by_user(sk)) | 1180 | if (!sock_owned_by_user(sk)) |
1180 | rc = __udp_queue_rcv_skb(sk, skb); | 1181 | rc = __udp_queue_rcv_skb(sk, skb); |
1181 | else | 1182 | else |
1182 | sk_add_backlog(sk, skb); | 1183 | sk_add_backlog(sk, skb); |
1183 | bh_unlock_sock(sk); | 1184 | bh_unlock_sock(sk); |
1184 | 1185 | ||
1185 | return rc; | 1186 | return rc; |
1186 | 1187 | ||
1187 | drop: | 1188 | drop: |
1188 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1189 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1190 | atomic_inc(&sk->sk_drops); | ||
1189 | kfree_skb(skb); | 1191 | kfree_skb(skb); |
1190 | return -1; | 1192 | return -1; |
1191 | } | 1193 | } |
1192 | 1194 | ||
1193 | /* | 1195 | /* |
1194 | * Multicasts and broadcasts go to each listener. | 1196 | * Multicasts and broadcasts go to each listener. |
1195 | * | 1197 | * |
1196 | * Note: called only from the BH handler context, | 1198 | * Note: called only from the BH handler context, |
1197 | * so we don't need to lock the hashes. | 1199 | * so we don't need to lock the hashes. |
1198 | */ | 1200 | */ |
1199 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 1201 | static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
1200 | struct udphdr *uh, | 1202 | struct udphdr *uh, |
1201 | __be32 saddr, __be32 daddr, | 1203 | __be32 saddr, __be32 daddr, |
1202 | struct udp_table *udptable) | 1204 | struct udp_table *udptable) |
1203 | { | 1205 | { |
1204 | struct sock *sk; | 1206 | struct sock *sk; |
1205 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); | 1207 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
1206 | int dif; | 1208 | int dif; |
1207 | 1209 | ||
1208 | spin_lock(&hslot->lock); | 1210 | spin_lock(&hslot->lock); |
1209 | sk = sk_nulls_head(&hslot->head); | 1211 | sk = sk_nulls_head(&hslot->head); |
1210 | dif = skb->dev->ifindex; | 1212 | dif = skb->dev->ifindex; |
1211 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 1213 | sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
1212 | if (sk) { | 1214 | if (sk) { |
1213 | struct sock *sknext = NULL; | 1215 | struct sock *sknext = NULL; |
1214 | 1216 | ||
1215 | do { | 1217 | do { |
1216 | struct sk_buff *skb1 = skb; | 1218 | struct sk_buff *skb1 = skb; |
1217 | 1219 | ||
1218 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, | 1220 | sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, |
1219 | daddr, uh->source, saddr, | 1221 | daddr, uh->source, saddr, |
1220 | dif); | 1222 | dif); |
1221 | if (sknext) | 1223 | if (sknext) |
1222 | skb1 = skb_clone(skb, GFP_ATOMIC); | 1224 | skb1 = skb_clone(skb, GFP_ATOMIC); |
1223 | 1225 | ||
1224 | if (skb1) { | 1226 | if (skb1) { |
1225 | int ret = udp_queue_rcv_skb(sk, skb1); | 1227 | int ret = udp_queue_rcv_skb(sk, skb1); |
1226 | if (ret > 0) | 1228 | if (ret > 0) |
1227 | /* we should probably re-process instead | 1229 | /* we should probably re-process instead |
1228 | * of dropping packets here. */ | 1230 | * of dropping packets here. */ |
1229 | kfree_skb(skb1); | 1231 | kfree_skb(skb1); |
1230 | } | 1232 | } |
1231 | sk = sknext; | 1233 | sk = sknext; |
1232 | } while (sknext); | 1234 | } while (sknext); |
1233 | } else | 1235 | } else |
1234 | consume_skb(skb); | 1236 | consume_skb(skb); |
1235 | spin_unlock(&hslot->lock); | 1237 | spin_unlock(&hslot->lock); |
1236 | return 0; | 1238 | return 0; |
1237 | } | 1239 | } |
1238 | 1240 | ||
1239 | /* Initialize UDP checksum. If exited with zero value (success), | 1241 | /* Initialize UDP checksum. If exited with zero value (success), |
1240 | * CHECKSUM_UNNECESSARY means, that no more checks are required. | 1242 | * CHECKSUM_UNNECESSARY means, that no more checks are required. |
1241 | * Otherwise, csum completion requires chacksumming packet body, | 1243 | * Otherwise, csum completion requires chacksumming packet body, |
1242 | * including udp header and folding it to skb->csum. | 1244 | * including udp header and folding it to skb->csum. |
1243 | */ | 1245 | */ |
1244 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | 1246 | static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, |
1245 | int proto) | 1247 | int proto) |
1246 | { | 1248 | { |
1247 | const struct iphdr *iph; | 1249 | const struct iphdr *iph; |
1248 | int err; | 1250 | int err; |
1249 | 1251 | ||
1250 | UDP_SKB_CB(skb)->partial_cov = 0; | 1252 | UDP_SKB_CB(skb)->partial_cov = 0; |
1251 | UDP_SKB_CB(skb)->cscov = skb->len; | 1253 | UDP_SKB_CB(skb)->cscov = skb->len; |
1252 | 1254 | ||
1253 | if (proto == IPPROTO_UDPLITE) { | 1255 | if (proto == IPPROTO_UDPLITE) { |
1254 | err = udplite_checksum_init(skb, uh); | 1256 | err = udplite_checksum_init(skb, uh); |
1255 | if (err) | 1257 | if (err) |
1256 | return err; | 1258 | return err; |
1257 | } | 1259 | } |
1258 | 1260 | ||
1259 | iph = ip_hdr(skb); | 1261 | iph = ip_hdr(skb); |
1260 | if (uh->check == 0) { | 1262 | if (uh->check == 0) { |
1261 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1263 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1262 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1264 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1263 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | 1265 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, |
1264 | proto, skb->csum)) | 1266 | proto, skb->csum)) |
1265 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1267 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1266 | } | 1268 | } |
1267 | if (!skb_csum_unnecessary(skb)) | 1269 | if (!skb_csum_unnecessary(skb)) |
1268 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, | 1270 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, |
1269 | skb->len, proto, 0); | 1271 | skb->len, proto, 0); |
1270 | /* Probably, we should checksum udp header (it should be in cache | 1272 | /* Probably, we should checksum udp header (it should be in cache |
1271 | * in any case) and data in tiny packets (< rx copybreak). | 1273 | * in any case) and data in tiny packets (< rx copybreak). |
1272 | */ | 1274 | */ |
1273 | 1275 | ||
1274 | return 0; | 1276 | return 0; |
1275 | } | 1277 | } |
1276 | 1278 | ||
1277 | /* | 1279 | /* |
1278 | * All we need to do is get the socket, and then do a checksum. | 1280 | * All we need to do is get the socket, and then do a checksum. |
1279 | */ | 1281 | */ |
1280 | 1282 | ||
1281 | int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | 1283 | int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, |
1282 | int proto) | 1284 | int proto) |
1283 | { | 1285 | { |
1284 | struct sock *sk; | 1286 | struct sock *sk; |
1285 | struct udphdr *uh; | 1287 | struct udphdr *uh; |
1286 | unsigned short ulen; | 1288 | unsigned short ulen; |
1287 | struct rtable *rt = skb_rtable(skb); | 1289 | struct rtable *rt = skb_rtable(skb); |
1288 | __be32 saddr, daddr; | 1290 | __be32 saddr, daddr; |
1289 | struct net *net = dev_net(skb->dev); | 1291 | struct net *net = dev_net(skb->dev); |
1290 | 1292 | ||
1291 | /* | 1293 | /* |
1292 | * Validate the packet. | 1294 | * Validate the packet. |
1293 | */ | 1295 | */ |
1294 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 1296 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
1295 | goto drop; /* No space for header. */ | 1297 | goto drop; /* No space for header. */ |
1296 | 1298 | ||
1297 | uh = udp_hdr(skb); | 1299 | uh = udp_hdr(skb); |
1298 | ulen = ntohs(uh->len); | 1300 | ulen = ntohs(uh->len); |
1299 | if (ulen > skb->len) | 1301 | if (ulen > skb->len) |
1300 | goto short_packet; | 1302 | goto short_packet; |
1301 | 1303 | ||
1302 | if (proto == IPPROTO_UDP) { | 1304 | if (proto == IPPROTO_UDP) { |
1303 | /* UDP validates ulen. */ | 1305 | /* UDP validates ulen. */ |
1304 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) | 1306 | if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) |
1305 | goto short_packet; | 1307 | goto short_packet; |
1306 | uh = udp_hdr(skb); | 1308 | uh = udp_hdr(skb); |
1307 | } | 1309 | } |
1308 | 1310 | ||
1309 | if (udp4_csum_init(skb, uh, proto)) | 1311 | if (udp4_csum_init(skb, uh, proto)) |
1310 | goto csum_error; | 1312 | goto csum_error; |
1311 | 1313 | ||
1312 | saddr = ip_hdr(skb)->saddr; | 1314 | saddr = ip_hdr(skb)->saddr; |
1313 | daddr = ip_hdr(skb)->daddr; | 1315 | daddr = ip_hdr(skb)->daddr; |
1314 | 1316 | ||
1315 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1317 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1316 | return __udp4_lib_mcast_deliver(net, skb, uh, | 1318 | return __udp4_lib_mcast_deliver(net, skb, uh, |
1317 | saddr, daddr, udptable); | 1319 | saddr, daddr, udptable); |
1318 | 1320 | ||
1319 | sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); | 1321 | sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); |
1320 | 1322 | ||
1321 | if (sk != NULL) { | 1323 | if (sk != NULL) { |
1322 | int ret = udp_queue_rcv_skb(sk, skb); | 1324 | int ret = udp_queue_rcv_skb(sk, skb); |
1323 | sock_put(sk); | 1325 | sock_put(sk); |
1324 | 1326 | ||
1325 | /* a return value > 0 means to resubmit the input, but | 1327 | /* a return value > 0 means to resubmit the input, but |
1326 | * it wants the return to be -protocol, or 0 | 1328 | * it wants the return to be -protocol, or 0 |
1327 | */ | 1329 | */ |
1328 | if (ret > 0) | 1330 | if (ret > 0) |
1329 | return -ret; | 1331 | return -ret; |
1330 | return 0; | 1332 | return 0; |
1331 | } | 1333 | } |
1332 | 1334 | ||
1333 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) | 1335 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) |
1334 | goto drop; | 1336 | goto drop; |
1335 | nf_reset(skb); | 1337 | nf_reset(skb); |
1336 | 1338 | ||
1337 | /* No socket. Drop packet silently, if checksum is wrong */ | 1339 | /* No socket. Drop packet silently, if checksum is wrong */ |
1338 | if (udp_lib_checksum_complete(skb)) | 1340 | if (udp_lib_checksum_complete(skb)) |
1339 | goto csum_error; | 1341 | goto csum_error; |
1340 | 1342 | ||
1341 | UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); | 1343 | UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); |
1342 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 1344 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); |
1343 | 1345 | ||
1344 | /* | 1346 | /* |
1345 | * Hmm. We got an UDP packet to a port to which we | 1347 | * Hmm. We got an UDP packet to a port to which we |
1346 | * don't wanna listen. Ignore it. | 1348 | * don't wanna listen. Ignore it. |
1347 | */ | 1349 | */ |
1348 | kfree_skb(skb); | 1350 | kfree_skb(skb); |
1349 | return 0; | 1351 | return 0; |
1350 | 1352 | ||
1351 | short_packet: | 1353 | short_packet: |
1352 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", | 1354 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", |
1353 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1355 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1354 | &saddr, | 1356 | &saddr, |
1355 | ntohs(uh->source), | 1357 | ntohs(uh->source), |
1356 | ulen, | 1358 | ulen, |
1357 | skb->len, | 1359 | skb->len, |
1358 | &daddr, | 1360 | &daddr, |
1359 | ntohs(uh->dest)); | 1361 | ntohs(uh->dest)); |
1360 | goto drop; | 1362 | goto drop; |
1361 | 1363 | ||
1362 | csum_error: | 1364 | csum_error: |
1363 | /* | 1365 | /* |
1364 | * RFC1122: OK. Discards the bad packet silently (as far as | 1366 | * RFC1122: OK. Discards the bad packet silently (as far as |
1365 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1367 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1366 | */ | 1368 | */ |
1367 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", | 1369 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", |
1368 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1370 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1369 | &saddr, | 1371 | &saddr, |
1370 | ntohs(uh->source), | 1372 | ntohs(uh->source), |
1371 | &daddr, | 1373 | &daddr, |
1372 | ntohs(uh->dest), | 1374 | ntohs(uh->dest), |
1373 | ulen); | 1375 | ulen); |
1374 | drop: | 1376 | drop: |
1375 | UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); | 1377 | UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); |
1376 | kfree_skb(skb); | 1378 | kfree_skb(skb); |
1377 | return 0; | 1379 | return 0; |
1378 | } | 1380 | } |
1379 | 1381 | ||
1380 | int udp_rcv(struct sk_buff *skb) | 1382 | int udp_rcv(struct sk_buff *skb) |
1381 | { | 1383 | { |
1382 | return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); | 1384 | return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); |
1383 | } | 1385 | } |
1384 | 1386 | ||
1385 | void udp_destroy_sock(struct sock *sk) | 1387 | void udp_destroy_sock(struct sock *sk) |
1386 | { | 1388 | { |
1387 | lock_sock(sk); | 1389 | lock_sock(sk); |
1388 | udp_flush_pending_frames(sk); | 1390 | udp_flush_pending_frames(sk); |
1389 | release_sock(sk); | 1391 | release_sock(sk); |
1390 | } | 1392 | } |
1391 | 1393 | ||
1392 | /* | 1394 | /* |
1393 | * Socket option code for UDP | 1395 | * Socket option code for UDP |
1394 | */ | 1396 | */ |
1395 | int udp_lib_setsockopt(struct sock *sk, int level, int optname, | 1397 | int udp_lib_setsockopt(struct sock *sk, int level, int optname, |
1396 | char __user *optval, unsigned int optlen, | 1398 | char __user *optval, unsigned int optlen, |
1397 | int (*push_pending_frames)(struct sock *)) | 1399 | int (*push_pending_frames)(struct sock *)) |
1398 | { | 1400 | { |
1399 | struct udp_sock *up = udp_sk(sk); | 1401 | struct udp_sock *up = udp_sk(sk); |
1400 | int val; | 1402 | int val; |
1401 | int err = 0; | 1403 | int err = 0; |
1402 | int is_udplite = IS_UDPLITE(sk); | 1404 | int is_udplite = IS_UDPLITE(sk); |
1403 | 1405 | ||
1404 | if (optlen < sizeof(int)) | 1406 | if (optlen < sizeof(int)) |
1405 | return -EINVAL; | 1407 | return -EINVAL; |
1406 | 1408 | ||
1407 | if (get_user(val, (int __user *)optval)) | 1409 | if (get_user(val, (int __user *)optval)) |
1408 | return -EFAULT; | 1410 | return -EFAULT; |
1409 | 1411 | ||
1410 | switch (optname) { | 1412 | switch (optname) { |
1411 | case UDP_CORK: | 1413 | case UDP_CORK: |
1412 | if (val != 0) { | 1414 | if (val != 0) { |
1413 | up->corkflag = 1; | 1415 | up->corkflag = 1; |
1414 | } else { | 1416 | } else { |
1415 | up->corkflag = 0; | 1417 | up->corkflag = 0; |
1416 | lock_sock(sk); | 1418 | lock_sock(sk); |
1417 | (*push_pending_frames)(sk); | 1419 | (*push_pending_frames)(sk); |
1418 | release_sock(sk); | 1420 | release_sock(sk); |
1419 | } | 1421 | } |
1420 | break; | 1422 | break; |
1421 | 1423 | ||
1422 | case UDP_ENCAP: | 1424 | case UDP_ENCAP: |
1423 | switch (val) { | 1425 | switch (val) { |
1424 | case 0: | 1426 | case 0: |
1425 | case UDP_ENCAP_ESPINUDP: | 1427 | case UDP_ENCAP_ESPINUDP: |
1426 | case UDP_ENCAP_ESPINUDP_NON_IKE: | 1428 | case UDP_ENCAP_ESPINUDP_NON_IKE: |
1427 | up->encap_rcv = xfrm4_udp_encap_rcv; | 1429 | up->encap_rcv = xfrm4_udp_encap_rcv; |
1428 | /* FALLTHROUGH */ | 1430 | /* FALLTHROUGH */ |
1429 | case UDP_ENCAP_L2TPINUDP: | 1431 | case UDP_ENCAP_L2TPINUDP: |
1430 | up->encap_type = val; | 1432 | up->encap_type = val; |
1431 | break; | 1433 | break; |
1432 | default: | 1434 | default: |
1433 | err = -ENOPROTOOPT; | 1435 | err = -ENOPROTOOPT; |
1434 | break; | 1436 | break; |
1435 | } | 1437 | } |
1436 | break; | 1438 | break; |
1437 | 1439 | ||
1438 | /* | 1440 | /* |
1439 | * UDP-Lite's partial checksum coverage (RFC 3828). | 1441 | * UDP-Lite's partial checksum coverage (RFC 3828). |
1440 | */ | 1442 | */ |
1441 | /* The sender sets actual checksum coverage length via this option. | 1443 | /* The sender sets actual checksum coverage length via this option. |
1442 | * The case coverage > packet length is handled by send module. */ | 1444 | * The case coverage > packet length is handled by send module. */ |
1443 | case UDPLITE_SEND_CSCOV: | 1445 | case UDPLITE_SEND_CSCOV: |
1444 | if (!is_udplite) /* Disable the option on UDP sockets */ | 1446 | if (!is_udplite) /* Disable the option on UDP sockets */ |
1445 | return -ENOPROTOOPT; | 1447 | return -ENOPROTOOPT; |
1446 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ | 1448 | if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ |
1447 | val = 8; | 1449 | val = 8; |
1448 | else if (val > USHORT_MAX) | 1450 | else if (val > USHORT_MAX) |
1449 | val = USHORT_MAX; | 1451 | val = USHORT_MAX; |
1450 | up->pcslen = val; | 1452 | up->pcslen = val; |
1451 | up->pcflag |= UDPLITE_SEND_CC; | 1453 | up->pcflag |= UDPLITE_SEND_CC; |
1452 | break; | 1454 | break; |
1453 | 1455 | ||
1454 | /* The receiver specifies a minimum checksum coverage value. To make | 1456 | /* The receiver specifies a minimum checksum coverage value. To make |
1455 | * sense, this should be set to at least 8 (as done below). If zero is | 1457 | * sense, this should be set to at least 8 (as done below). If zero is |
1456 | * used, this again means full checksum coverage. */ | 1458 | * used, this again means full checksum coverage. */ |
1457 | case UDPLITE_RECV_CSCOV: | 1459 | case UDPLITE_RECV_CSCOV: |
1458 | if (!is_udplite) /* Disable the option on UDP sockets */ | 1460 | if (!is_udplite) /* Disable the option on UDP sockets */ |
1459 | return -ENOPROTOOPT; | 1461 | return -ENOPROTOOPT; |
1460 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ | 1462 | if (val != 0 && val < 8) /* Avoid silly minimal values. */ |
1461 | val = 8; | 1463 | val = 8; |
1462 | else if (val > USHORT_MAX) | 1464 | else if (val > USHORT_MAX) |
1463 | val = USHORT_MAX; | 1465 | val = USHORT_MAX; |
1464 | up->pcrlen = val; | 1466 | up->pcrlen = val; |
1465 | up->pcflag |= UDPLITE_RECV_CC; | 1467 | up->pcflag |= UDPLITE_RECV_CC; |
1466 | break; | 1468 | break; |
1467 | 1469 | ||
1468 | default: | 1470 | default: |
1469 | err = -ENOPROTOOPT; | 1471 | err = -ENOPROTOOPT; |
1470 | break; | 1472 | break; |
1471 | } | 1473 | } |
1472 | 1474 | ||
1473 | return err; | 1475 | return err; |
1474 | } | 1476 | } |
1475 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1477 | EXPORT_SYMBOL(udp_lib_setsockopt); |
1476 | 1478 | ||
1477 | int udp_setsockopt(struct sock *sk, int level, int optname, | 1479 | int udp_setsockopt(struct sock *sk, int level, int optname, |
1478 | char __user *optval, unsigned int optlen) | 1480 | char __user *optval, unsigned int optlen) |
1479 | { | 1481 | { |
1480 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1482 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1481 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | 1483 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1482 | udp_push_pending_frames); | 1484 | udp_push_pending_frames); |
1483 | return ip_setsockopt(sk, level, optname, optval, optlen); | 1485 | return ip_setsockopt(sk, level, optname, optval, optlen); |
1484 | } | 1486 | } |
1485 | 1487 | ||
1486 | #ifdef CONFIG_COMPAT | 1488 | #ifdef CONFIG_COMPAT |
1487 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, | 1489 | int compat_udp_setsockopt(struct sock *sk, int level, int optname, |
1488 | char __user *optval, unsigned int optlen) | 1490 | char __user *optval, unsigned int optlen) |
1489 | { | 1491 | { |
1490 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1492 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1491 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | 1493 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1492 | udp_push_pending_frames); | 1494 | udp_push_pending_frames); |
1493 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); | 1495 | return compat_ip_setsockopt(sk, level, optname, optval, optlen); |
1494 | } | 1496 | } |
1495 | #endif | 1497 | #endif |
1496 | 1498 | ||
1497 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, | 1499 | int udp_lib_getsockopt(struct sock *sk, int level, int optname, |
1498 | char __user *optval, int __user *optlen) | 1500 | char __user *optval, int __user *optlen) |
1499 | { | 1501 | { |
1500 | struct udp_sock *up = udp_sk(sk); | 1502 | struct udp_sock *up = udp_sk(sk); |
1501 | int val, len; | 1503 | int val, len; |
1502 | 1504 | ||
1503 | if (get_user(len, optlen)) | 1505 | if (get_user(len, optlen)) |
1504 | return -EFAULT; | 1506 | return -EFAULT; |
1505 | 1507 | ||
1506 | len = min_t(unsigned int, len, sizeof(int)); | 1508 | len = min_t(unsigned int, len, sizeof(int)); |
1507 | 1509 | ||
1508 | if (len < 0) | 1510 | if (len < 0) |
1509 | return -EINVAL; | 1511 | return -EINVAL; |
1510 | 1512 | ||
1511 | switch (optname) { | 1513 | switch (optname) { |
1512 | case UDP_CORK: | 1514 | case UDP_CORK: |
1513 | val = up->corkflag; | 1515 | val = up->corkflag; |
1514 | break; | 1516 | break; |
1515 | 1517 | ||
1516 | case UDP_ENCAP: | 1518 | case UDP_ENCAP: |
1517 | val = up->encap_type; | 1519 | val = up->encap_type; |
1518 | break; | 1520 | break; |
1519 | 1521 | ||
1520 | /* The following two cannot be changed on UDP sockets, the return is | 1522 | /* The following two cannot be changed on UDP sockets, the return is |
1521 | * always 0 (which corresponds to the full checksum coverage of UDP). */ | 1523 | * always 0 (which corresponds to the full checksum coverage of UDP). */ |
1522 | case UDPLITE_SEND_CSCOV: | 1524 | case UDPLITE_SEND_CSCOV: |
1523 | val = up->pcslen; | 1525 | val = up->pcslen; |
1524 | break; | 1526 | break; |
1525 | 1527 | ||
1526 | case UDPLITE_RECV_CSCOV: | 1528 | case UDPLITE_RECV_CSCOV: |
1527 | val = up->pcrlen; | 1529 | val = up->pcrlen; |
1528 | break; | 1530 | break; |
1529 | 1531 | ||
1530 | default: | 1532 | default: |
1531 | return -ENOPROTOOPT; | 1533 | return -ENOPROTOOPT; |
1532 | } | 1534 | } |
1533 | 1535 | ||
1534 | if (put_user(len, optlen)) | 1536 | if (put_user(len, optlen)) |
1535 | return -EFAULT; | 1537 | return -EFAULT; |
1536 | if (copy_to_user(optval, &val, len)) | 1538 | if (copy_to_user(optval, &val, len)) |
1537 | return -EFAULT; | 1539 | return -EFAULT; |
1538 | return 0; | 1540 | return 0; |
1539 | } | 1541 | } |
1540 | EXPORT_SYMBOL(udp_lib_getsockopt); | 1542 | EXPORT_SYMBOL(udp_lib_getsockopt); |
1541 | 1543 | ||
1542 | int udp_getsockopt(struct sock *sk, int level, int optname, | 1544 | int udp_getsockopt(struct sock *sk, int level, int optname, |
1543 | char __user *optval, int __user *optlen) | 1545 | char __user *optval, int __user *optlen) |
1544 | { | 1546 | { |
1545 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1547 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1546 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | 1548 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1547 | return ip_getsockopt(sk, level, optname, optval, optlen); | 1549 | return ip_getsockopt(sk, level, optname, optval, optlen); |
1548 | } | 1550 | } |
1549 | 1551 | ||
1550 | #ifdef CONFIG_COMPAT | 1552 | #ifdef CONFIG_COMPAT |
1551 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, | 1553 | int compat_udp_getsockopt(struct sock *sk, int level, int optname, |
1552 | char __user *optval, int __user *optlen) | 1554 | char __user *optval, int __user *optlen) |
1553 | { | 1555 | { |
1554 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1556 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1555 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | 1557 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1556 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); | 1558 | return compat_ip_getsockopt(sk, level, optname, optval, optlen); |
1557 | } | 1559 | } |
1558 | #endif | 1560 | #endif |
1559 | /** | 1561 | /** |
1560 | * udp_poll - wait for a UDP event. | 1562 | * udp_poll - wait for a UDP event. |
1561 | * @file - file struct | 1563 | * @file - file struct |
1562 | * @sock - socket | 1564 | * @sock - socket |
1563 | * @wait - poll table | 1565 | * @wait - poll table |
1564 | * | 1566 | * |
1565 | * This is same as datagram poll, except for the special case of | 1567 | * This is same as datagram poll, except for the special case of |
1566 | * blocking sockets. If application is using a blocking fd | 1568 | * blocking sockets. If application is using a blocking fd |
1567 | * and a packet with checksum error is in the queue; | 1569 | * and a packet with checksum error is in the queue; |
1568 | * then it could get return from select indicating data available | 1570 | * then it could get return from select indicating data available |
1569 | * but then block when reading it. Add special case code | 1571 | * but then block when reading it. Add special case code |
1570 | * to work around these arguably broken applications. | 1572 | * to work around these arguably broken applications. |
1571 | */ | 1573 | */ |
1572 | unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | 1574 | unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) |
1573 | { | 1575 | { |
1574 | unsigned int mask = datagram_poll(file, sock, wait); | 1576 | unsigned int mask = datagram_poll(file, sock, wait); |
1575 | struct sock *sk = sock->sk; | 1577 | struct sock *sk = sock->sk; |
1576 | 1578 | ||
1577 | /* Check for false positives due to checksum errors */ | 1579 | /* Check for false positives due to checksum errors */ |
1578 | if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && | 1580 | if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && |
1579 | !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) | 1581 | !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) |
1580 | mask &= ~(POLLIN | POLLRDNORM); | 1582 | mask &= ~(POLLIN | POLLRDNORM); |
1581 | 1583 | ||
1582 | return mask; | 1584 | return mask; |
1583 | 1585 | ||
1584 | } | 1586 | } |
1585 | EXPORT_SYMBOL(udp_poll); | 1587 | EXPORT_SYMBOL(udp_poll); |
1586 | 1588 | ||
1587 | struct proto udp_prot = { | 1589 | struct proto udp_prot = { |
1588 | .name = "UDP", | 1590 | .name = "UDP", |
1589 | .owner = THIS_MODULE, | 1591 | .owner = THIS_MODULE, |
1590 | .close = udp_lib_close, | 1592 | .close = udp_lib_close, |
1591 | .connect = ip4_datagram_connect, | 1593 | .connect = ip4_datagram_connect, |
1592 | .disconnect = udp_disconnect, | 1594 | .disconnect = udp_disconnect, |
1593 | .ioctl = udp_ioctl, | 1595 | .ioctl = udp_ioctl, |
1594 | .destroy = udp_destroy_sock, | 1596 | .destroy = udp_destroy_sock, |
1595 | .setsockopt = udp_setsockopt, | 1597 | .setsockopt = udp_setsockopt, |
1596 | .getsockopt = udp_getsockopt, | 1598 | .getsockopt = udp_getsockopt, |
1597 | .sendmsg = udp_sendmsg, | 1599 | .sendmsg = udp_sendmsg, |
1598 | .recvmsg = udp_recvmsg, | 1600 | .recvmsg = udp_recvmsg, |
1599 | .sendpage = udp_sendpage, | 1601 | .sendpage = udp_sendpage, |
1600 | .backlog_rcv = __udp_queue_rcv_skb, | 1602 | .backlog_rcv = __udp_queue_rcv_skb, |
1601 | .hash = udp_lib_hash, | 1603 | .hash = udp_lib_hash, |
1602 | .unhash = udp_lib_unhash, | 1604 | .unhash = udp_lib_unhash, |
1603 | .get_port = udp_v4_get_port, | 1605 | .get_port = udp_v4_get_port, |
1604 | .memory_allocated = &udp_memory_allocated, | 1606 | .memory_allocated = &udp_memory_allocated, |
1605 | .sysctl_mem = sysctl_udp_mem, | 1607 | .sysctl_mem = sysctl_udp_mem, |
1606 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1608 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1607 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1609 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1608 | .obj_size = sizeof(struct udp_sock), | 1610 | .obj_size = sizeof(struct udp_sock), |
1609 | .slab_flags = SLAB_DESTROY_BY_RCU, | 1611 | .slab_flags = SLAB_DESTROY_BY_RCU, |
1610 | .h.udp_table = &udp_table, | 1612 | .h.udp_table = &udp_table, |
1611 | #ifdef CONFIG_COMPAT | 1613 | #ifdef CONFIG_COMPAT |
1612 | .compat_setsockopt = compat_udp_setsockopt, | 1614 | .compat_setsockopt = compat_udp_setsockopt, |
1613 | .compat_getsockopt = compat_udp_getsockopt, | 1615 | .compat_getsockopt = compat_udp_getsockopt, |
1614 | #endif | 1616 | #endif |
1615 | }; | 1617 | }; |
1616 | EXPORT_SYMBOL(udp_prot); | 1618 | EXPORT_SYMBOL(udp_prot); |
1617 | 1619 | ||
1618 | /* ------------------------------------------------------------------------ */ | 1620 | /* ------------------------------------------------------------------------ */ |
1619 | #ifdef CONFIG_PROC_FS | 1621 | #ifdef CONFIG_PROC_FS |
1620 | 1622 | ||
1621 | static struct sock *udp_get_first(struct seq_file *seq, int start) | 1623 | static struct sock *udp_get_first(struct seq_file *seq, int start) |
1622 | { | 1624 | { |
1623 | struct sock *sk; | 1625 | struct sock *sk; |
1624 | struct udp_iter_state *state = seq->private; | 1626 | struct udp_iter_state *state = seq->private; |
1625 | struct net *net = seq_file_net(seq); | 1627 | struct net *net = seq_file_net(seq); |
1626 | 1628 | ||
1627 | for (state->bucket = start; state->bucket <= state->udp_table->mask; | 1629 | for (state->bucket = start; state->bucket <= state->udp_table->mask; |
1628 | ++state->bucket) { | 1630 | ++state->bucket) { |
1629 | struct hlist_nulls_node *node; | 1631 | struct hlist_nulls_node *node; |
1630 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; | 1632 | struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; |
1631 | 1633 | ||
1632 | if (hlist_nulls_empty(&hslot->head)) | 1634 | if (hlist_nulls_empty(&hslot->head)) |
1633 | continue; | 1635 | continue; |
1634 | 1636 | ||
1635 | spin_lock_bh(&hslot->lock); | 1637 | spin_lock_bh(&hslot->lock); |
1636 | sk_nulls_for_each(sk, node, &hslot->head) { | 1638 | sk_nulls_for_each(sk, node, &hslot->head) { |
1637 | if (!net_eq(sock_net(sk), net)) | 1639 | if (!net_eq(sock_net(sk), net)) |
1638 | continue; | 1640 | continue; |
1639 | if (sk->sk_family == state->family) | 1641 | if (sk->sk_family == state->family) |
1640 | goto found; | 1642 | goto found; |
1641 | } | 1643 | } |
1642 | spin_unlock_bh(&hslot->lock); | 1644 | spin_unlock_bh(&hslot->lock); |
1643 | } | 1645 | } |
1644 | sk = NULL; | 1646 | sk = NULL; |
1645 | found: | 1647 | found: |
1646 | return sk; | 1648 | return sk; |
1647 | } | 1649 | } |
1648 | 1650 | ||
1649 | static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | 1651 | static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) |
1650 | { | 1652 | { |
1651 | struct udp_iter_state *state = seq->private; | 1653 | struct udp_iter_state *state = seq->private; |
1652 | struct net *net = seq_file_net(seq); | 1654 | struct net *net = seq_file_net(seq); |
1653 | 1655 | ||
1654 | do { | 1656 | do { |
1655 | sk = sk_nulls_next(sk); | 1657 | sk = sk_nulls_next(sk); |
1656 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1658 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1657 | 1659 | ||
1658 | if (!sk) { | 1660 | if (!sk) { |
1659 | if (state->bucket <= state->udp_table->mask) | 1661 | if (state->bucket <= state->udp_table->mask) |
1660 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1662 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1661 | return udp_get_first(seq, state->bucket + 1); | 1663 | return udp_get_first(seq, state->bucket + 1); |
1662 | } | 1664 | } |
1663 | return sk; | 1665 | return sk; |
1664 | } | 1666 | } |
1665 | 1667 | ||
1666 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | 1668 | static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) |
1667 | { | 1669 | { |
1668 | struct sock *sk = udp_get_first(seq, 0); | 1670 | struct sock *sk = udp_get_first(seq, 0); |
1669 | 1671 | ||
1670 | if (sk) | 1672 | if (sk) |
1671 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) | 1673 | while (pos && (sk = udp_get_next(seq, sk)) != NULL) |
1672 | --pos; | 1674 | --pos; |
1673 | return pos ? NULL : sk; | 1675 | return pos ? NULL : sk; |
1674 | } | 1676 | } |
1675 | 1677 | ||
1676 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1678 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1677 | { | 1679 | { |
1678 | struct udp_iter_state *state = seq->private; | 1680 | struct udp_iter_state *state = seq->private; |
1679 | state->bucket = MAX_UDP_PORTS; | 1681 | state->bucket = MAX_UDP_PORTS; |
1680 | 1682 | ||
1681 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1683 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1682 | } | 1684 | } |
1683 | 1685 | ||
1684 | static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 1686 | static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1685 | { | 1687 | { |
1686 | struct sock *sk; | 1688 | struct sock *sk; |
1687 | 1689 | ||
1688 | if (v == SEQ_START_TOKEN) | 1690 | if (v == SEQ_START_TOKEN) |
1689 | sk = udp_get_idx(seq, 0); | 1691 | sk = udp_get_idx(seq, 0); |
1690 | else | 1692 | else |
1691 | sk = udp_get_next(seq, v); | 1693 | sk = udp_get_next(seq, v); |
1692 | 1694 | ||
1693 | ++*pos; | 1695 | ++*pos; |
1694 | return sk; | 1696 | return sk; |
1695 | } | 1697 | } |
1696 | 1698 | ||
1697 | static void udp_seq_stop(struct seq_file *seq, void *v) | 1699 | static void udp_seq_stop(struct seq_file *seq, void *v) |
1698 | { | 1700 | { |
1699 | struct udp_iter_state *state = seq->private; | 1701 | struct udp_iter_state *state = seq->private; |
1700 | 1702 | ||
1701 | if (state->bucket <= state->udp_table->mask) | 1703 | if (state->bucket <= state->udp_table->mask) |
1702 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1704 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); |
1703 | } | 1705 | } |
1704 | 1706 | ||
1705 | static int udp_seq_open(struct inode *inode, struct file *file) | 1707 | static int udp_seq_open(struct inode *inode, struct file *file) |
1706 | { | 1708 | { |
1707 | struct udp_seq_afinfo *afinfo = PDE(inode)->data; | 1709 | struct udp_seq_afinfo *afinfo = PDE(inode)->data; |
1708 | struct udp_iter_state *s; | 1710 | struct udp_iter_state *s; |
1709 | int err; | 1711 | int err; |
1710 | 1712 | ||
1711 | err = seq_open_net(inode, file, &afinfo->seq_ops, | 1713 | err = seq_open_net(inode, file, &afinfo->seq_ops, |
1712 | sizeof(struct udp_iter_state)); | 1714 | sizeof(struct udp_iter_state)); |
1713 | if (err < 0) | 1715 | if (err < 0) |
1714 | return err; | 1716 | return err; |
1715 | 1717 | ||
1716 | s = ((struct seq_file *)file->private_data)->private; | 1718 | s = ((struct seq_file *)file->private_data)->private; |
1717 | s->family = afinfo->family; | 1719 | s->family = afinfo->family; |
1718 | s->udp_table = afinfo->udp_table; | 1720 | s->udp_table = afinfo->udp_table; |
1719 | return err; | 1721 | return err; |
1720 | } | 1722 | } |
1721 | 1723 | ||
1722 | /* ------------------------------------------------------------------------ */ | 1724 | /* ------------------------------------------------------------------------ */ |
1723 | int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) | 1725 | int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) |
1724 | { | 1726 | { |
1725 | struct proc_dir_entry *p; | 1727 | struct proc_dir_entry *p; |
1726 | int rc = 0; | 1728 | int rc = 0; |
1727 | 1729 | ||
1728 | afinfo->seq_fops.open = udp_seq_open; | 1730 | afinfo->seq_fops.open = udp_seq_open; |
1729 | afinfo->seq_fops.read = seq_read; | 1731 | afinfo->seq_fops.read = seq_read; |
1730 | afinfo->seq_fops.llseek = seq_lseek; | 1732 | afinfo->seq_fops.llseek = seq_lseek; |
1731 | afinfo->seq_fops.release = seq_release_net; | 1733 | afinfo->seq_fops.release = seq_release_net; |
1732 | 1734 | ||
1733 | afinfo->seq_ops.start = udp_seq_start; | 1735 | afinfo->seq_ops.start = udp_seq_start; |
1734 | afinfo->seq_ops.next = udp_seq_next; | 1736 | afinfo->seq_ops.next = udp_seq_next; |
1735 | afinfo->seq_ops.stop = udp_seq_stop; | 1737 | afinfo->seq_ops.stop = udp_seq_stop; |
1736 | 1738 | ||
1737 | p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, | 1739 | p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, |
1738 | &afinfo->seq_fops, afinfo); | 1740 | &afinfo->seq_fops, afinfo); |
1739 | if (!p) | 1741 | if (!p) |
1740 | rc = -ENOMEM; | 1742 | rc = -ENOMEM; |
1741 | return rc; | 1743 | return rc; |
1742 | } | 1744 | } |
1743 | EXPORT_SYMBOL(udp_proc_register); | 1745 | EXPORT_SYMBOL(udp_proc_register); |
1744 | 1746 | ||
1745 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) | 1747 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) |
1746 | { | 1748 | { |
1747 | proc_net_remove(net, afinfo->name); | 1749 | proc_net_remove(net, afinfo->name); |
1748 | } | 1750 | } |
1749 | EXPORT_SYMBOL(udp_proc_unregister); | 1751 | EXPORT_SYMBOL(udp_proc_unregister); |
1750 | 1752 | ||
1751 | /* ------------------------------------------------------------------------ */ | 1753 | /* ------------------------------------------------------------------------ */ |
1752 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, | 1754 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, |
1753 | int bucket, int *len) | 1755 | int bucket, int *len) |
1754 | { | 1756 | { |
1755 | struct inet_sock *inet = inet_sk(sp); | 1757 | struct inet_sock *inet = inet_sk(sp); |
1756 | __be32 dest = inet->inet_daddr; | 1758 | __be32 dest = inet->inet_daddr; |
1757 | __be32 src = inet->inet_rcv_saddr; | 1759 | __be32 src = inet->inet_rcv_saddr; |
1758 | __u16 destp = ntohs(inet->inet_dport); | 1760 | __u16 destp = ntohs(inet->inet_dport); |
1759 | __u16 srcp = ntohs(inet->inet_sport); | 1761 | __u16 srcp = ntohs(inet->inet_sport); |
1760 | 1762 | ||
1761 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" | 1763 | seq_printf(f, "%5d: %08X:%04X %08X:%04X" |
1762 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", | 1764 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", |
1763 | bucket, src, srcp, dest, destp, sp->sk_state, | 1765 | bucket, src, srcp, dest, destp, sp->sk_state, |
1764 | sk_wmem_alloc_get(sp), | 1766 | sk_wmem_alloc_get(sp), |
1765 | sk_rmem_alloc_get(sp), | 1767 | sk_rmem_alloc_get(sp), |
1766 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | 1768 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), |
1767 | atomic_read(&sp->sk_refcnt), sp, | 1769 | atomic_read(&sp->sk_refcnt), sp, |
1768 | atomic_read(&sp->sk_drops), len); | 1770 | atomic_read(&sp->sk_drops), len); |
1769 | } | 1771 | } |
1770 | 1772 | ||
1771 | int udp4_seq_show(struct seq_file *seq, void *v) | 1773 | int udp4_seq_show(struct seq_file *seq, void *v) |
1772 | { | 1774 | { |
1773 | if (v == SEQ_START_TOKEN) | 1775 | if (v == SEQ_START_TOKEN) |
1774 | seq_printf(seq, "%-127s\n", | 1776 | seq_printf(seq, "%-127s\n", |
1775 | " sl local_address rem_address st tx_queue " | 1777 | " sl local_address rem_address st tx_queue " |
1776 | "rx_queue tr tm->when retrnsmt uid timeout " | 1778 | "rx_queue tr tm->when retrnsmt uid timeout " |
1777 | "inode ref pointer drops"); | 1779 | "inode ref pointer drops"); |
1778 | else { | 1780 | else { |
1779 | struct udp_iter_state *state = seq->private; | 1781 | struct udp_iter_state *state = seq->private; |
1780 | int len; | 1782 | int len; |
1781 | 1783 | ||
1782 | udp4_format_sock(v, seq, state->bucket, &len); | 1784 | udp4_format_sock(v, seq, state->bucket, &len); |
1783 | seq_printf(seq, "%*s\n", 127 - len, ""); | 1785 | seq_printf(seq, "%*s\n", 127 - len, ""); |
1784 | } | 1786 | } |
1785 | return 0; | 1787 | return 0; |
1786 | } | 1788 | } |
1787 | 1789 | ||
1788 | /* ------------------------------------------------------------------------ */ | 1790 | /* ------------------------------------------------------------------------ */ |
1789 | static struct udp_seq_afinfo udp4_seq_afinfo = { | 1791 | static struct udp_seq_afinfo udp4_seq_afinfo = { |
1790 | .name = "udp", | 1792 | .name = "udp", |
1791 | .family = AF_INET, | 1793 | .family = AF_INET, |
1792 | .udp_table = &udp_table, | 1794 | .udp_table = &udp_table, |
1793 | .seq_fops = { | 1795 | .seq_fops = { |
1794 | .owner = THIS_MODULE, | 1796 | .owner = THIS_MODULE, |
1795 | }, | 1797 | }, |
1796 | .seq_ops = { | 1798 | .seq_ops = { |
1797 | .show = udp4_seq_show, | 1799 | .show = udp4_seq_show, |
1798 | }, | 1800 | }, |
1799 | }; | 1801 | }; |
1800 | 1802 | ||
1801 | static int udp4_proc_init_net(struct net *net) | 1803 | static int udp4_proc_init_net(struct net *net) |
1802 | { | 1804 | { |
1803 | return udp_proc_register(net, &udp4_seq_afinfo); | 1805 | return udp_proc_register(net, &udp4_seq_afinfo); |
1804 | } | 1806 | } |
1805 | 1807 | ||
1806 | static void udp4_proc_exit_net(struct net *net) | 1808 | static void udp4_proc_exit_net(struct net *net) |
1807 | { | 1809 | { |
1808 | udp_proc_unregister(net, &udp4_seq_afinfo); | 1810 | udp_proc_unregister(net, &udp4_seq_afinfo); |
1809 | } | 1811 | } |
1810 | 1812 | ||
1811 | static struct pernet_operations udp4_net_ops = { | 1813 | static struct pernet_operations udp4_net_ops = { |
1812 | .init = udp4_proc_init_net, | 1814 | .init = udp4_proc_init_net, |
1813 | .exit = udp4_proc_exit_net, | 1815 | .exit = udp4_proc_exit_net, |
1814 | }; | 1816 | }; |
1815 | 1817 | ||
1816 | int __init udp4_proc_init(void) | 1818 | int __init udp4_proc_init(void) |
1817 | { | 1819 | { |
1818 | return register_pernet_subsys(&udp4_net_ops); | 1820 | return register_pernet_subsys(&udp4_net_ops); |
1819 | } | 1821 | } |
1820 | 1822 | ||
1821 | void udp4_proc_exit(void) | 1823 | void udp4_proc_exit(void) |
1822 | { | 1824 | { |
1823 | unregister_pernet_subsys(&udp4_net_ops); | 1825 | unregister_pernet_subsys(&udp4_net_ops); |
1824 | } | 1826 | } |
1825 | #endif /* CONFIG_PROC_FS */ | 1827 | #endif /* CONFIG_PROC_FS */ |
1826 | 1828 | ||
1827 | static __initdata unsigned long uhash_entries; | 1829 | static __initdata unsigned long uhash_entries; |
1828 | static int __init set_uhash_entries(char *str) | 1830 | static int __init set_uhash_entries(char *str) |
1829 | { | 1831 | { |
1830 | if (!str) | 1832 | if (!str) |
1831 | return 0; | 1833 | return 0; |
1832 | uhash_entries = simple_strtoul(str, &str, 0); | 1834 | uhash_entries = simple_strtoul(str, &str, 0); |
1833 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) | 1835 | if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) |
1834 | uhash_entries = UDP_HTABLE_SIZE_MIN; | 1836 | uhash_entries = UDP_HTABLE_SIZE_MIN; |
1835 | return 1; | 1837 | return 1; |
1836 | } | 1838 | } |
1837 | __setup("uhash_entries=", set_uhash_entries); | 1839 | __setup("uhash_entries=", set_uhash_entries); |
1838 | 1840 | ||
1839 | void __init udp_table_init(struct udp_table *table, const char *name) | 1841 | void __init udp_table_init(struct udp_table *table, const char *name) |
1840 | { | 1842 | { |
1841 | unsigned int i; | 1843 | unsigned int i; |
1842 | 1844 | ||
1843 | if (!CONFIG_BASE_SMALL) | 1845 | if (!CONFIG_BASE_SMALL) |
1844 | table->hash = alloc_large_system_hash(name, | 1846 | table->hash = alloc_large_system_hash(name, |
1845 | sizeof(struct udp_hslot), | 1847 | sizeof(struct udp_hslot), |
1846 | uhash_entries, | 1848 | uhash_entries, |
1847 | 21, /* one slot per 2 MB */ | 1849 | 21, /* one slot per 2 MB */ |
1848 | 0, | 1850 | 0, |
1849 | &table->log, | 1851 | &table->log, |
1850 | &table->mask, | 1852 | &table->mask, |
1851 | 64 * 1024); | 1853 | 64 * 1024); |
1852 | /* | 1854 | /* |
1853 | * Make sure hash table has the minimum size | 1855 | * Make sure hash table has the minimum size |
1854 | */ | 1856 | */ |
1855 | if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { | 1857 | if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { |
1856 | table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * | 1858 | table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * |
1857 | sizeof(struct udp_hslot), GFP_KERNEL); | 1859 | sizeof(struct udp_hslot), GFP_KERNEL); |
1858 | if (!table->hash) | 1860 | if (!table->hash) |
1859 | panic(name); | 1861 | panic(name); |
1860 | table->log = ilog2(UDP_HTABLE_SIZE_MIN); | 1862 | table->log = ilog2(UDP_HTABLE_SIZE_MIN); |
1861 | table->mask = UDP_HTABLE_SIZE_MIN - 1; | 1863 | table->mask = UDP_HTABLE_SIZE_MIN - 1; |
1862 | } | 1864 | } |
1863 | for (i = 0; i <= table->mask; i++) { | 1865 | for (i = 0; i <= table->mask; i++) { |
1864 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); | 1866 | INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); |
1865 | spin_lock_init(&table->hash[i].lock); | 1867 | spin_lock_init(&table->hash[i].lock); |
1866 | } | 1868 | } |
1867 | } | 1869 | } |
1868 | 1870 | ||
1869 | void __init udp_init(void) | 1871 | void __init udp_init(void) |
1870 | { | 1872 | { |
1871 | unsigned long nr_pages, limit; | 1873 | unsigned long nr_pages, limit; |
1872 | 1874 | ||
1873 | udp_table_init(&udp_table, "UDP"); | 1875 | udp_table_init(&udp_table, "UDP"); |
1874 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 1876 | /* Set the pressure threshold up by the same strategy of TCP. It is a |
1875 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | 1877 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing |
1876 | * toward zero with the amount of memory, with a floor of 128 pages. | 1878 | * toward zero with the amount of memory, with a floor of 128 pages. |
1877 | */ | 1879 | */ |
1878 | nr_pages = totalram_pages - totalhigh_pages; | 1880 | nr_pages = totalram_pages - totalhigh_pages; |
1879 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | 1881 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); |
1880 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | 1882 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); |
1881 | limit = max(limit, 128UL); | 1883 | limit = max(limit, 128UL); |
1882 | sysctl_udp_mem[0] = limit / 4 * 3; | 1884 | sysctl_udp_mem[0] = limit / 4 * 3; |
1883 | sysctl_udp_mem[1] = limit; | 1885 | sysctl_udp_mem[1] = limit; |
1884 | sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; | 1886 | sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; |
1885 | 1887 | ||
1886 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; | 1888 | sysctl_udp_rmem_min = SK_MEM_QUANTUM; |
1887 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | 1889 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; |
1888 | } | 1890 | } |
1889 | 1891 | ||
1890 | int udp4_ufo_send_check(struct sk_buff *skb) | 1892 | int udp4_ufo_send_check(struct sk_buff *skb) |
1891 | { | 1893 | { |
1892 | const struct iphdr *iph; | 1894 | const struct iphdr *iph; |
1893 | struct udphdr *uh; | 1895 | struct udphdr *uh; |
1894 | 1896 | ||
1895 | if (!pskb_may_pull(skb, sizeof(*uh))) | 1897 | if (!pskb_may_pull(skb, sizeof(*uh))) |
1896 | return -EINVAL; | 1898 | return -EINVAL; |
1897 | 1899 | ||
1898 | iph = ip_hdr(skb); | 1900 | iph = ip_hdr(skb); |
1899 | uh = udp_hdr(skb); | 1901 | uh = udp_hdr(skb); |
1900 | 1902 | ||
1901 | uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | 1903 | uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, |
1902 | IPPROTO_UDP, 0); | 1904 | IPPROTO_UDP, 0); |
1903 | skb->csum_start = skb_transport_header(skb) - skb->head; | 1905 | skb->csum_start = skb_transport_header(skb) - skb->head; |
1904 | skb->csum_offset = offsetof(struct udphdr, check); | 1906 | skb->csum_offset = offsetof(struct udphdr, check); |
1905 | skb->ip_summed = CHECKSUM_PARTIAL; | 1907 | skb->ip_summed = CHECKSUM_PARTIAL; |
1906 | return 0; | 1908 | return 0; |
1907 | } | 1909 | } |
1908 | 1910 | ||
1909 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | 1911 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) |
1910 | { | 1912 | { |
1911 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1913 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
1912 | unsigned int mss; | 1914 | unsigned int mss; |
1913 | int offset; | 1915 | int offset; |
1914 | __wsum csum; | 1916 | __wsum csum; |
1915 | 1917 | ||
1916 | mss = skb_shinfo(skb)->gso_size; | 1918 | mss = skb_shinfo(skb)->gso_size; |
1917 | if (unlikely(skb->len <= mss)) | 1919 | if (unlikely(skb->len <= mss)) |
1918 | goto out; | 1920 | goto out; |
1919 | 1921 | ||
1920 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | 1922 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { |
1921 | /* Packet is from an untrusted source, reset gso_segs. */ | 1923 | /* Packet is from an untrusted source, reset gso_segs. */ |
1922 | int type = skb_shinfo(skb)->gso_type; | 1924 | int type = skb_shinfo(skb)->gso_type; |
1923 | 1925 | ||
1924 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || | 1926 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || |
1925 | !(type & (SKB_GSO_UDP)))) | 1927 | !(type & (SKB_GSO_UDP)))) |
1926 | goto out; | 1928 | goto out; |
1927 | 1929 | ||
1928 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | 1930 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); |
1929 | 1931 | ||
1930 | segs = NULL; | 1932 | segs = NULL; |
1931 | goto out; | 1933 | goto out; |
1932 | } | 1934 | } |
1933 | 1935 | ||
1934 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot | 1936 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot |
1935 | * do checksum of UDP packets sent as multiple IP fragments. | 1937 | * do checksum of UDP packets sent as multiple IP fragments. |
1936 | */ | 1938 | */ |
1937 | offset = skb->csum_start - skb_headroom(skb); | 1939 | offset = skb->csum_start - skb_headroom(skb); |
1938 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | 1940 | csum = skb_checksum(skb, offset, skb->len - offset, 0); |
1939 | offset += skb->csum_offset; | 1941 | offset += skb->csum_offset; |
1940 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | 1942 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); |
1941 | skb->ip_summed = CHECKSUM_NONE; | 1943 | skb->ip_summed = CHECKSUM_NONE; |
1942 | 1944 | ||
1943 | /* Fragment the skb. IP headers of the fragments are updated in | 1945 | /* Fragment the skb. IP headers of the fragments are updated in |
1944 | * inet_gso_segment() | 1946 | * inet_gso_segment() |
1945 | */ | 1947 | */ |
1946 | segs = skb_segment(skb, features); | 1948 | segs = skb_segment(skb, features); |
1947 | out: | 1949 | out: |
1948 | return segs; | 1950 | return segs; |
1949 | } | 1951 | } |
1950 | 1952 | ||
1951 | 1953 |
net/ipv6/raw.c
1 | /* | 1 | /* |
2 | * RAW sockets for IPv6 | 2 | * RAW sockets for IPv6 |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * Adapted from linux/net/ipv4/raw.c | 8 | * Adapted from linux/net/ipv4/raw.c |
9 | * | 9 | * |
10 | * Fixes: | 10 | * Fixes: |
11 | * Hideaki YOSHIFUJI : sin6_scope_id support | 11 | * Hideaki YOSHIFUJI : sin6_scope_id support |
12 | * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) | 12 | * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) |
13 | * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data | 13 | * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data |
14 | * | 14 | * |
15 | * This program is free software; you can redistribute it and/or | 15 | * This program is free software; you can redistribute it and/or |
16 | * modify it under the terms of the GNU General Public License | 16 | * modify it under the terms of the GNU General Public License |
17 | * as published by the Free Software Foundation; either version | 17 | * as published by the Free Software Foundation; either version |
18 | * 2 of the License, or (at your option) any later version. | 18 | * 2 of the License, or (at your option) any later version. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/types.h> | 22 | #include <linux/types.h> |
23 | #include <linux/socket.h> | 23 | #include <linux/socket.h> |
24 | #include <linux/sockios.h> | 24 | #include <linux/sockios.h> |
25 | #include <linux/net.h> | 25 | #include <linux/net.h> |
26 | #include <linux/in6.h> | 26 | #include <linux/in6.h> |
27 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
28 | #include <linux/if_arp.h> | 28 | #include <linux/if_arp.h> |
29 | #include <linux/icmpv6.h> | 29 | #include <linux/icmpv6.h> |
30 | #include <linux/netfilter.h> | 30 | #include <linux/netfilter.h> |
31 | #include <linux/netfilter_ipv6.h> | 31 | #include <linux/netfilter_ipv6.h> |
32 | #include <linux/skbuff.h> | 32 | #include <linux/skbuff.h> |
33 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
34 | #include <asm/ioctls.h> | 34 | #include <asm/ioctls.h> |
35 | 35 | ||
36 | #include <net/net_namespace.h> | 36 | #include <net/net_namespace.h> |
37 | #include <net/ip.h> | 37 | #include <net/ip.h> |
38 | #include <net/sock.h> | 38 | #include <net/sock.h> |
39 | #include <net/snmp.h> | 39 | #include <net/snmp.h> |
40 | 40 | ||
41 | #include <net/ipv6.h> | 41 | #include <net/ipv6.h> |
42 | #include <net/ndisc.h> | 42 | #include <net/ndisc.h> |
43 | #include <net/protocol.h> | 43 | #include <net/protocol.h> |
44 | #include <net/ip6_route.h> | 44 | #include <net/ip6_route.h> |
45 | #include <net/ip6_checksum.h> | 45 | #include <net/ip6_checksum.h> |
46 | #include <net/addrconf.h> | 46 | #include <net/addrconf.h> |
47 | #include <net/transp_v6.h> | 47 | #include <net/transp_v6.h> |
48 | #include <net/udp.h> | 48 | #include <net/udp.h> |
49 | #include <net/inet_common.h> | 49 | #include <net/inet_common.h> |
50 | #include <net/tcp_states.h> | 50 | #include <net/tcp_states.h> |
51 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 51 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
52 | #include <net/mip6.h> | 52 | #include <net/mip6.h> |
53 | #endif | 53 | #endif |
54 | #include <linux/mroute6.h> | 54 | #include <linux/mroute6.h> |
55 | 55 | ||
56 | #include <net/raw.h> | 56 | #include <net/raw.h> |
57 | #include <net/rawv6.h> | 57 | #include <net/rawv6.h> |
58 | #include <net/xfrm.h> | 58 | #include <net/xfrm.h> |
59 | 59 | ||
60 | #include <linux/proc_fs.h> | 60 | #include <linux/proc_fs.h> |
61 | #include <linux/seq_file.h> | 61 | #include <linux/seq_file.h> |
62 | 62 | ||
63 | static struct raw_hashinfo raw_v6_hashinfo = { | 63 | static struct raw_hashinfo raw_v6_hashinfo = { |
64 | .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), | 64 | .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, | 67 | static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, |
68 | unsigned short num, struct in6_addr *loc_addr, | 68 | unsigned short num, struct in6_addr *loc_addr, |
69 | struct in6_addr *rmt_addr, int dif) | 69 | struct in6_addr *rmt_addr, int dif) |
70 | { | 70 | { |
71 | struct hlist_node *node; | 71 | struct hlist_node *node; |
72 | int is_multicast = ipv6_addr_is_multicast(loc_addr); | 72 | int is_multicast = ipv6_addr_is_multicast(loc_addr); |
73 | 73 | ||
74 | sk_for_each_from(sk, node) | 74 | sk_for_each_from(sk, node) |
75 | if (inet_sk(sk)->inet_num == num) { | 75 | if (inet_sk(sk)->inet_num == num) { |
76 | struct ipv6_pinfo *np = inet6_sk(sk); | 76 | struct ipv6_pinfo *np = inet6_sk(sk); |
77 | 77 | ||
78 | if (!net_eq(sock_net(sk), net)) | 78 | if (!net_eq(sock_net(sk), net)) |
79 | continue; | 79 | continue; |
80 | 80 | ||
81 | if (!ipv6_addr_any(&np->daddr) && | 81 | if (!ipv6_addr_any(&np->daddr) && |
82 | !ipv6_addr_equal(&np->daddr, rmt_addr)) | 82 | !ipv6_addr_equal(&np->daddr, rmt_addr)) |
83 | continue; | 83 | continue; |
84 | 84 | ||
85 | if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) | 85 | if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) |
86 | continue; | 86 | continue; |
87 | 87 | ||
88 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 88 | if (!ipv6_addr_any(&np->rcv_saddr)) { |
89 | if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) | 89 | if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) |
90 | goto found; | 90 | goto found; |
91 | if (is_multicast && | 91 | if (is_multicast && |
92 | inet6_mc_check(sk, loc_addr, rmt_addr)) | 92 | inet6_mc_check(sk, loc_addr, rmt_addr)) |
93 | goto found; | 93 | goto found; |
94 | continue; | 94 | continue; |
95 | } | 95 | } |
96 | goto found; | 96 | goto found; |
97 | } | 97 | } |
98 | sk = NULL; | 98 | sk = NULL; |
99 | found: | 99 | found: |
100 | return sk; | 100 | return sk; |
101 | } | 101 | } |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * 0 - deliver | 104 | * 0 - deliver |
105 | * 1 - block | 105 | * 1 - block |
106 | */ | 106 | */ |
107 | static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) | 107 | static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) |
108 | { | 108 | { |
109 | struct icmp6hdr *icmph; | 109 | struct icmp6hdr *icmph; |
110 | struct raw6_sock *rp = raw6_sk(sk); | 110 | struct raw6_sock *rp = raw6_sk(sk); |
111 | 111 | ||
112 | if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { | 112 | if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { |
113 | __u32 *data = &rp->filter.data[0]; | 113 | __u32 *data = &rp->filter.data[0]; |
114 | int bit_nr; | 114 | int bit_nr; |
115 | 115 | ||
116 | icmph = (struct icmp6hdr *) skb->data; | 116 | icmph = (struct icmp6hdr *) skb->data; |
117 | bit_nr = icmph->icmp6_type; | 117 | bit_nr = icmph->icmp6_type; |
118 | 118 | ||
119 | return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; | 119 | return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; |
120 | } | 120 | } |
121 | return 0; | 121 | return 0; |
122 | } | 122 | } |
123 | 123 | ||
124 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 124 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
125 | static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); | 125 | static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); |
126 | 126 | ||
127 | int rawv6_mh_filter_register(int (*filter)(struct sock *sock, | 127 | int rawv6_mh_filter_register(int (*filter)(struct sock *sock, |
128 | struct sk_buff *skb)) | 128 | struct sk_buff *skb)) |
129 | { | 129 | { |
130 | rcu_assign_pointer(mh_filter, filter); | 130 | rcu_assign_pointer(mh_filter, filter); |
131 | return 0; | 131 | return 0; |
132 | } | 132 | } |
133 | EXPORT_SYMBOL(rawv6_mh_filter_register); | 133 | EXPORT_SYMBOL(rawv6_mh_filter_register); |
134 | 134 | ||
135 | int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, | 135 | int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, |
136 | struct sk_buff *skb)) | 136 | struct sk_buff *skb)) |
137 | { | 137 | { |
138 | rcu_assign_pointer(mh_filter, NULL); | 138 | rcu_assign_pointer(mh_filter, NULL); |
139 | synchronize_rcu(); | 139 | synchronize_rcu(); |
140 | return 0; | 140 | return 0; |
141 | } | 141 | } |
142 | EXPORT_SYMBOL(rawv6_mh_filter_unregister); | 142 | EXPORT_SYMBOL(rawv6_mh_filter_unregister); |
143 | 143 | ||
144 | #endif | 144 | #endif |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * demultiplex raw sockets. | 147 | * demultiplex raw sockets. |
148 | * (should consider queueing the skb in the sock receive_queue | 148 | * (should consider queueing the skb in the sock receive_queue |
149 | * without calling rawv6.c) | 149 | * without calling rawv6.c) |
150 | * | 150 | * |
151 | * Caller owns SKB so we must make clones. | 151 | * Caller owns SKB so we must make clones. |
152 | */ | 152 | */ |
153 | static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) | 153 | static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) |
154 | { | 154 | { |
155 | struct in6_addr *saddr; | 155 | struct in6_addr *saddr; |
156 | struct in6_addr *daddr; | 156 | struct in6_addr *daddr; |
157 | struct sock *sk; | 157 | struct sock *sk; |
158 | int delivered = 0; | 158 | int delivered = 0; |
159 | __u8 hash; | 159 | __u8 hash; |
160 | struct net *net; | 160 | struct net *net; |
161 | 161 | ||
162 | saddr = &ipv6_hdr(skb)->saddr; | 162 | saddr = &ipv6_hdr(skb)->saddr; |
163 | daddr = saddr + 1; | 163 | daddr = saddr + 1; |
164 | 164 | ||
165 | hash = nexthdr & (MAX_INET_PROTOS - 1); | 165 | hash = nexthdr & (MAX_INET_PROTOS - 1); |
166 | 166 | ||
167 | read_lock(&raw_v6_hashinfo.lock); | 167 | read_lock(&raw_v6_hashinfo.lock); |
168 | sk = sk_head(&raw_v6_hashinfo.ht[hash]); | 168 | sk = sk_head(&raw_v6_hashinfo.ht[hash]); |
169 | 169 | ||
170 | if (sk == NULL) | 170 | if (sk == NULL) |
171 | goto out; | 171 | goto out; |
172 | 172 | ||
173 | net = dev_net(skb->dev); | 173 | net = dev_net(skb->dev); |
174 | sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); | 174 | sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); |
175 | 175 | ||
176 | while (sk) { | 176 | while (sk) { |
177 | int filtered; | 177 | int filtered; |
178 | 178 | ||
179 | delivered = 1; | 179 | delivered = 1; |
180 | switch (nexthdr) { | 180 | switch (nexthdr) { |
181 | case IPPROTO_ICMPV6: | 181 | case IPPROTO_ICMPV6: |
182 | filtered = icmpv6_filter(sk, skb); | 182 | filtered = icmpv6_filter(sk, skb); |
183 | break; | 183 | break; |
184 | 184 | ||
185 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) | 185 | #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) |
186 | case IPPROTO_MH: | 186 | case IPPROTO_MH: |
187 | { | 187 | { |
188 | /* XXX: To validate MH only once for each packet, | 188 | /* XXX: To validate MH only once for each packet, |
189 | * this is placed here. It should be after checking | 189 | * this is placed here. It should be after checking |
190 | * xfrm policy, however it doesn't. The checking xfrm | 190 | * xfrm policy, however it doesn't. The checking xfrm |
191 | * policy is placed in rawv6_rcv() because it is | 191 | * policy is placed in rawv6_rcv() because it is |
192 | * required for each socket. | 192 | * required for each socket. |
193 | */ | 193 | */ |
194 | int (*filter)(struct sock *sock, struct sk_buff *skb); | 194 | int (*filter)(struct sock *sock, struct sk_buff *skb); |
195 | 195 | ||
196 | filter = rcu_dereference(mh_filter); | 196 | filter = rcu_dereference(mh_filter); |
197 | filtered = filter ? filter(sk, skb) : 0; | 197 | filtered = filter ? filter(sk, skb) : 0; |
198 | break; | 198 | break; |
199 | } | 199 | } |
200 | #endif | 200 | #endif |
201 | default: | 201 | default: |
202 | filtered = 0; | 202 | filtered = 0; |
203 | break; | 203 | break; |
204 | } | 204 | } |
205 | 205 | ||
206 | if (filtered < 0) | 206 | if (filtered < 0) |
207 | break; | 207 | break; |
208 | if (filtered == 0) { | 208 | if (filtered == 0) { |
209 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); | 209 | struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); |
210 | 210 | ||
211 | /* Not releasing hash table! */ | 211 | /* Not releasing hash table! */ |
212 | if (clone) { | 212 | if (clone) { |
213 | nf_reset(clone); | 213 | nf_reset(clone); |
214 | rawv6_rcv(sk, clone); | 214 | rawv6_rcv(sk, clone); |
215 | } | 215 | } |
216 | } | 216 | } |
217 | sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, | 217 | sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, |
218 | IP6CB(skb)->iif); | 218 | IP6CB(skb)->iif); |
219 | } | 219 | } |
220 | out: | 220 | out: |
221 | read_unlock(&raw_v6_hashinfo.lock); | 221 | read_unlock(&raw_v6_hashinfo.lock); |
222 | return delivered; | 222 | return delivered; |
223 | } | 223 | } |
224 | 224 | ||
225 | int raw6_local_deliver(struct sk_buff *skb, int nexthdr) | 225 | int raw6_local_deliver(struct sk_buff *skb, int nexthdr) |
226 | { | 226 | { |
227 | struct sock *raw_sk; | 227 | struct sock *raw_sk; |
228 | 228 | ||
229 | raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); | 229 | raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); |
230 | if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) | 230 | if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) |
231 | raw_sk = NULL; | 231 | raw_sk = NULL; |
232 | 232 | ||
233 | return raw_sk != NULL; | 233 | return raw_sk != NULL; |
234 | } | 234 | } |
235 | 235 | ||
236 | /* This cleans up af_inet6 a bit. -DaveM */ | 236 | /* This cleans up af_inet6 a bit. -DaveM */ |
237 | static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 237 | static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
238 | { | 238 | { |
239 | struct inet_sock *inet = inet_sk(sk); | 239 | struct inet_sock *inet = inet_sk(sk); |
240 | struct ipv6_pinfo *np = inet6_sk(sk); | 240 | struct ipv6_pinfo *np = inet6_sk(sk); |
241 | struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; | 241 | struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; |
242 | __be32 v4addr = 0; | 242 | __be32 v4addr = 0; |
243 | int addr_type; | 243 | int addr_type; |
244 | int err; | 244 | int err; |
245 | 245 | ||
246 | if (addr_len < SIN6_LEN_RFC2133) | 246 | if (addr_len < SIN6_LEN_RFC2133) |
247 | return -EINVAL; | 247 | return -EINVAL; |
248 | addr_type = ipv6_addr_type(&addr->sin6_addr); | 248 | addr_type = ipv6_addr_type(&addr->sin6_addr); |
249 | 249 | ||
250 | /* Raw sockets are IPv6 only */ | 250 | /* Raw sockets are IPv6 only */ |
251 | if (addr_type == IPV6_ADDR_MAPPED) | 251 | if (addr_type == IPV6_ADDR_MAPPED) |
252 | return(-EADDRNOTAVAIL); | 252 | return(-EADDRNOTAVAIL); |
253 | 253 | ||
254 | lock_sock(sk); | 254 | lock_sock(sk); |
255 | 255 | ||
256 | err = -EINVAL; | 256 | err = -EINVAL; |
257 | if (sk->sk_state != TCP_CLOSE) | 257 | if (sk->sk_state != TCP_CLOSE) |
258 | goto out; | 258 | goto out; |
259 | 259 | ||
260 | /* Check if the address belongs to the host. */ | 260 | /* Check if the address belongs to the host. */ |
261 | if (addr_type != IPV6_ADDR_ANY) { | 261 | if (addr_type != IPV6_ADDR_ANY) { |
262 | struct net_device *dev = NULL; | 262 | struct net_device *dev = NULL; |
263 | 263 | ||
264 | if (addr_type & IPV6_ADDR_LINKLOCAL) { | 264 | if (addr_type & IPV6_ADDR_LINKLOCAL) { |
265 | if (addr_len >= sizeof(struct sockaddr_in6) && | 265 | if (addr_len >= sizeof(struct sockaddr_in6) && |
266 | addr->sin6_scope_id) { | 266 | addr->sin6_scope_id) { |
267 | /* Override any existing binding, if another | 267 | /* Override any existing binding, if another |
268 | * one is supplied by user. | 268 | * one is supplied by user. |
269 | */ | 269 | */ |
270 | sk->sk_bound_dev_if = addr->sin6_scope_id; | 270 | sk->sk_bound_dev_if = addr->sin6_scope_id; |
271 | } | 271 | } |
272 | 272 | ||
273 | /* Binding to link-local address requires an interface */ | 273 | /* Binding to link-local address requires an interface */ |
274 | if (!sk->sk_bound_dev_if) | 274 | if (!sk->sk_bound_dev_if) |
275 | goto out; | 275 | goto out; |
276 | 276 | ||
277 | dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); | 277 | dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); |
278 | if (!dev) { | 278 | if (!dev) { |
279 | err = -ENODEV; | 279 | err = -ENODEV; |
280 | goto out; | 280 | goto out; |
281 | } | 281 | } |
282 | } | 282 | } |
283 | 283 | ||
284 | /* ipv4 addr of the socket is invalid. Only the | 284 | /* ipv4 addr of the socket is invalid. Only the |
285 | * unspecified and mapped address have a v4 equivalent. | 285 | * unspecified and mapped address have a v4 equivalent. |
286 | */ | 286 | */ |
287 | v4addr = LOOPBACK4_IPV6; | 287 | v4addr = LOOPBACK4_IPV6; |
288 | if (!(addr_type & IPV6_ADDR_MULTICAST)) { | 288 | if (!(addr_type & IPV6_ADDR_MULTICAST)) { |
289 | err = -EADDRNOTAVAIL; | 289 | err = -EADDRNOTAVAIL; |
290 | if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, | 290 | if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, |
291 | dev, 0)) { | 291 | dev, 0)) { |
292 | if (dev) | 292 | if (dev) |
293 | dev_put(dev); | 293 | dev_put(dev); |
294 | goto out; | 294 | goto out; |
295 | } | 295 | } |
296 | } | 296 | } |
297 | if (dev) | 297 | if (dev) |
298 | dev_put(dev); | 298 | dev_put(dev); |
299 | } | 299 | } |
300 | 300 | ||
301 | inet->inet_rcv_saddr = inet->inet_saddr = v4addr; | 301 | inet->inet_rcv_saddr = inet->inet_saddr = v4addr; |
302 | ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); | 302 | ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); |
303 | if (!(addr_type & IPV6_ADDR_MULTICAST)) | 303 | if (!(addr_type & IPV6_ADDR_MULTICAST)) |
304 | ipv6_addr_copy(&np->saddr, &addr->sin6_addr); | 304 | ipv6_addr_copy(&np->saddr, &addr->sin6_addr); |
305 | err = 0; | 305 | err = 0; |
306 | out: | 306 | out: |
307 | release_sock(sk); | 307 | release_sock(sk); |
308 | return err; | 308 | return err; |
309 | } | 309 | } |
310 | 310 | ||
311 | static void rawv6_err(struct sock *sk, struct sk_buff *skb, | 311 | static void rawv6_err(struct sock *sk, struct sk_buff *skb, |
312 | struct inet6_skb_parm *opt, | 312 | struct inet6_skb_parm *opt, |
313 | u8 type, u8 code, int offset, __be32 info) | 313 | u8 type, u8 code, int offset, __be32 info) |
314 | { | 314 | { |
315 | struct inet_sock *inet = inet_sk(sk); | 315 | struct inet_sock *inet = inet_sk(sk); |
316 | struct ipv6_pinfo *np = inet6_sk(sk); | 316 | struct ipv6_pinfo *np = inet6_sk(sk); |
317 | int err; | 317 | int err; |
318 | int harderr; | 318 | int harderr; |
319 | 319 | ||
320 | /* Report error on raw socket, if: | 320 | /* Report error on raw socket, if: |
321 | 1. User requested recverr. | 321 | 1. User requested recverr. |
322 | 2. Socket is connected (otherwise the error indication | 322 | 2. Socket is connected (otherwise the error indication |
323 | is useless without recverr and error is hard. | 323 | is useless without recverr and error is hard. |
324 | */ | 324 | */ |
325 | if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) | 325 | if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) |
326 | return; | 326 | return; |
327 | 327 | ||
328 | harderr = icmpv6_err_convert(type, code, &err); | 328 | harderr = icmpv6_err_convert(type, code, &err); |
329 | if (type == ICMPV6_PKT_TOOBIG) | 329 | if (type == ICMPV6_PKT_TOOBIG) |
330 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); | 330 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); |
331 | 331 | ||
332 | if (np->recverr) { | 332 | if (np->recverr) { |
333 | u8 *payload = skb->data; | 333 | u8 *payload = skb->data; |
334 | if (!inet->hdrincl) | 334 | if (!inet->hdrincl) |
335 | payload += offset; | 335 | payload += offset; |
336 | ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); | 336 | ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); |
337 | } | 337 | } |
338 | 338 | ||
339 | if (np->recverr || harderr) { | 339 | if (np->recverr || harderr) { |
340 | sk->sk_err = err; | 340 | sk->sk_err = err; |
341 | sk->sk_error_report(sk); | 341 | sk->sk_error_report(sk); |
342 | } | 342 | } |
343 | } | 343 | } |
344 | 344 | ||
345 | void raw6_icmp_error(struct sk_buff *skb, int nexthdr, | 345 | void raw6_icmp_error(struct sk_buff *skb, int nexthdr, |
346 | u8 type, u8 code, int inner_offset, __be32 info) | 346 | u8 type, u8 code, int inner_offset, __be32 info) |
347 | { | 347 | { |
348 | struct sock *sk; | 348 | struct sock *sk; |
349 | int hash; | 349 | int hash; |
350 | struct in6_addr *saddr, *daddr; | 350 | struct in6_addr *saddr, *daddr; |
351 | struct net *net; | 351 | struct net *net; |
352 | 352 | ||
353 | hash = nexthdr & (RAW_HTABLE_SIZE - 1); | 353 | hash = nexthdr & (RAW_HTABLE_SIZE - 1); |
354 | 354 | ||
355 | read_lock(&raw_v6_hashinfo.lock); | 355 | read_lock(&raw_v6_hashinfo.lock); |
356 | sk = sk_head(&raw_v6_hashinfo.ht[hash]); | 356 | sk = sk_head(&raw_v6_hashinfo.ht[hash]); |
357 | if (sk != NULL) { | 357 | if (sk != NULL) { |
358 | /* Note: ipv6_hdr(skb) != skb->data */ | 358 | /* Note: ipv6_hdr(skb) != skb->data */ |
359 | struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data; | 359 | struct ipv6hdr *ip6h = (struct ipv6hdr *)skb->data; |
360 | saddr = &ip6h->saddr; | 360 | saddr = &ip6h->saddr; |
361 | daddr = &ip6h->daddr; | 361 | daddr = &ip6h->daddr; |
362 | net = dev_net(skb->dev); | 362 | net = dev_net(skb->dev); |
363 | 363 | ||
364 | while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, | 364 | while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, |
365 | IP6CB(skb)->iif))) { | 365 | IP6CB(skb)->iif))) { |
366 | rawv6_err(sk, skb, NULL, type, code, | 366 | rawv6_err(sk, skb, NULL, type, code, |
367 | inner_offset, info); | 367 | inner_offset, info); |
368 | sk = sk_next(sk); | 368 | sk = sk_next(sk); |
369 | } | 369 | } |
370 | } | 370 | } |
371 | read_unlock(&raw_v6_hashinfo.lock); | 371 | read_unlock(&raw_v6_hashinfo.lock); |
372 | } | 372 | } |
373 | 373 | ||
374 | static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) | 374 | static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) |
375 | { | 375 | { |
376 | if ((raw6_sk(sk)->checksum || sk->sk_filter) && | 376 | if ((raw6_sk(sk)->checksum || sk->sk_filter) && |
377 | skb_checksum_complete(skb)) { | 377 | skb_checksum_complete(skb)) { |
378 | atomic_inc(&sk->sk_drops); | 378 | atomic_inc(&sk->sk_drops); |
379 | kfree_skb(skb); | 379 | kfree_skb(skb); |
380 | return NET_RX_DROP; | 380 | return NET_RX_DROP; |
381 | } | 381 | } |
382 | 382 | ||
383 | /* Charge it to the socket. */ | 383 | /* Charge it to the socket. */ |
384 | if (sock_queue_rcv_skb(sk, skb) < 0) { | 384 | if (sock_queue_rcv_skb(sk, skb) < 0) { |
385 | kfree_skb(skb); | 385 | kfree_skb(skb); |
386 | return NET_RX_DROP; | 386 | return NET_RX_DROP; |
387 | } | 387 | } |
388 | 388 | ||
389 | return 0; | 389 | return 0; |
390 | } | 390 | } |
391 | 391 | ||
392 | /* | 392 | /* |
393 | * This is next to useless... | 393 | * This is next to useless... |
394 | * if we demultiplex in network layer we don't need the extra call | 394 | * if we demultiplex in network layer we don't need the extra call |
395 | * just to queue the skb... | 395 | * just to queue the skb... |
396 | * maybe we could have the network decide upon a hint if it | 396 | * maybe we could have the network decide upon a hint if it |
397 | * should call raw_rcv for demultiplexing | 397 | * should call raw_rcv for demultiplexing |
398 | */ | 398 | */ |
399 | int rawv6_rcv(struct sock *sk, struct sk_buff *skb) | 399 | int rawv6_rcv(struct sock *sk, struct sk_buff *skb) |
400 | { | 400 | { |
401 | struct inet_sock *inet = inet_sk(sk); | 401 | struct inet_sock *inet = inet_sk(sk); |
402 | struct raw6_sock *rp = raw6_sk(sk); | 402 | struct raw6_sock *rp = raw6_sk(sk); |
403 | 403 | ||
404 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { | 404 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { |
405 | atomic_inc(&sk->sk_drops); | 405 | atomic_inc(&sk->sk_drops); |
406 | kfree_skb(skb); | 406 | kfree_skb(skb); |
407 | return NET_RX_DROP; | 407 | return NET_RX_DROP; |
408 | } | 408 | } |
409 | 409 | ||
410 | if (!rp->checksum) | 410 | if (!rp->checksum) |
411 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 411 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
412 | 412 | ||
413 | if (skb->ip_summed == CHECKSUM_COMPLETE) { | 413 | if (skb->ip_summed == CHECKSUM_COMPLETE) { |
414 | skb_postpull_rcsum(skb, skb_network_header(skb), | 414 | skb_postpull_rcsum(skb, skb_network_header(skb), |
415 | skb_network_header_len(skb)); | 415 | skb_network_header_len(skb)); |
416 | if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | 416 | if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
417 | &ipv6_hdr(skb)->daddr, | 417 | &ipv6_hdr(skb)->daddr, |
418 | skb->len, inet->inet_num, skb->csum)) | 418 | skb->len, inet->inet_num, skb->csum)) |
419 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 419 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
420 | } | 420 | } |
421 | if (!skb_csum_unnecessary(skb)) | 421 | if (!skb_csum_unnecessary(skb)) |
422 | skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | 422 | skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
423 | &ipv6_hdr(skb)->daddr, | 423 | &ipv6_hdr(skb)->daddr, |
424 | skb->len, | 424 | skb->len, |
425 | inet->inet_num, 0)); | 425 | inet->inet_num, 0)); |
426 | 426 | ||
427 | if (inet->hdrincl) { | 427 | if (inet->hdrincl) { |
428 | if (skb_checksum_complete(skb)) { | 428 | if (skb_checksum_complete(skb)) { |
429 | atomic_inc(&sk->sk_drops); | 429 | atomic_inc(&sk->sk_drops); |
430 | kfree_skb(skb); | 430 | kfree_skb(skb); |
431 | return NET_RX_DROP; | 431 | return NET_RX_DROP; |
432 | } | 432 | } |
433 | } | 433 | } |
434 | 434 | ||
435 | rawv6_rcv_skb(sk, skb); | 435 | rawv6_rcv_skb(sk, skb); |
436 | return 0; | 436 | return 0; |
437 | } | 437 | } |
438 | 438 | ||
439 | 439 | ||
440 | /* | 440 | /* |
441 | * This should be easy, if there is something there | 441 | * This should be easy, if there is something there |
442 | * we return it, otherwise we block. | 442 | * we return it, otherwise we block. |
443 | */ | 443 | */ |
444 | 444 | ||
445 | static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, | 445 | static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, |
446 | struct msghdr *msg, size_t len, | 446 | struct msghdr *msg, size_t len, |
447 | int noblock, int flags, int *addr_len) | 447 | int noblock, int flags, int *addr_len) |
448 | { | 448 | { |
449 | struct ipv6_pinfo *np = inet6_sk(sk); | 449 | struct ipv6_pinfo *np = inet6_sk(sk); |
450 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; | 450 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; |
451 | struct sk_buff *skb; | 451 | struct sk_buff *skb; |
452 | size_t copied; | 452 | size_t copied; |
453 | int err; | 453 | int err; |
454 | 454 | ||
455 | if (flags & MSG_OOB) | 455 | if (flags & MSG_OOB) |
456 | return -EOPNOTSUPP; | 456 | return -EOPNOTSUPP; |
457 | 457 | ||
458 | if (addr_len) | 458 | if (addr_len) |
459 | *addr_len=sizeof(*sin6); | 459 | *addr_len=sizeof(*sin6); |
460 | 460 | ||
461 | if (flags & MSG_ERRQUEUE) | 461 | if (flags & MSG_ERRQUEUE) |
462 | return ipv6_recv_error(sk, msg, len); | 462 | return ipv6_recv_error(sk, msg, len); |
463 | 463 | ||
464 | skb = skb_recv_datagram(sk, flags, noblock, &err); | 464 | skb = skb_recv_datagram(sk, flags, noblock, &err); |
465 | if (!skb) | 465 | if (!skb) |
466 | goto out; | 466 | goto out; |
467 | 467 | ||
468 | copied = skb->len; | 468 | copied = skb->len; |
469 | if (copied > len) { | 469 | if (copied > len) { |
470 | copied = len; | 470 | copied = len; |
471 | msg->msg_flags |= MSG_TRUNC; | 471 | msg->msg_flags |= MSG_TRUNC; |
472 | } | 472 | } |
473 | 473 | ||
474 | if (skb_csum_unnecessary(skb)) { | 474 | if (skb_csum_unnecessary(skb)) { |
475 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); | 475 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); |
476 | } else if (msg->msg_flags&MSG_TRUNC) { | 476 | } else if (msg->msg_flags&MSG_TRUNC) { |
477 | if (__skb_checksum_complete(skb)) | 477 | if (__skb_checksum_complete(skb)) |
478 | goto csum_copy_err; | 478 | goto csum_copy_err; |
479 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); | 479 | err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); |
480 | } else { | 480 | } else { |
481 | err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); | 481 | err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); |
482 | if (err == -EINVAL) | 482 | if (err == -EINVAL) |
483 | goto csum_copy_err; | 483 | goto csum_copy_err; |
484 | } | 484 | } |
485 | if (err) | 485 | if (err) |
486 | goto out_free; | 486 | goto out_free; |
487 | 487 | ||
488 | /* Copy the address. */ | 488 | /* Copy the address. */ |
489 | if (sin6) { | 489 | if (sin6) { |
490 | sin6->sin6_family = AF_INET6; | 490 | sin6->sin6_family = AF_INET6; |
491 | sin6->sin6_port = 0; | 491 | sin6->sin6_port = 0; |
492 | ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); | 492 | ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); |
493 | sin6->sin6_flowinfo = 0; | 493 | sin6->sin6_flowinfo = 0; |
494 | sin6->sin6_scope_id = 0; | 494 | sin6->sin6_scope_id = 0; |
495 | if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) | 495 | if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) |
496 | sin6->sin6_scope_id = IP6CB(skb)->iif; | 496 | sin6->sin6_scope_id = IP6CB(skb)->iif; |
497 | } | 497 | } |
498 | 498 | ||
499 | sock_recv_ts_and_drops(msg, sk, skb); | 499 | sock_recv_ts_and_drops(msg, sk, skb); |
500 | 500 | ||
501 | if (np->rxopt.all) | 501 | if (np->rxopt.all) |
502 | datagram_recv_ctl(sk, msg, skb); | 502 | datagram_recv_ctl(sk, msg, skb); |
503 | 503 | ||
504 | err = copied; | 504 | err = copied; |
505 | if (flags & MSG_TRUNC) | 505 | if (flags & MSG_TRUNC) |
506 | err = skb->len; | 506 | err = skb->len; |
507 | 507 | ||
508 | out_free: | 508 | out_free: |
509 | skb_free_datagram(sk, skb); | 509 | skb_free_datagram(sk, skb); |
510 | out: | 510 | out: |
511 | return err; | 511 | return err; |
512 | 512 | ||
513 | csum_copy_err: | 513 | csum_copy_err: |
514 | skb_kill_datagram(sk, skb, flags); | 514 | skb_kill_datagram(sk, skb, flags); |
515 | 515 | ||
516 | /* Error for blocking case is chosen to masquerade | 516 | /* Error for blocking case is chosen to masquerade |
517 | as some normal condition. | 517 | as some normal condition. |
518 | */ | 518 | */ |
519 | err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; | 519 | err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; |
520 | atomic_inc(&sk->sk_drops); | ||
521 | goto out; | 520 | goto out; |
522 | } | 521 | } |
523 | 522 | ||
524 | static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, | 523 | static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, |
525 | struct raw6_sock *rp) | 524 | struct raw6_sock *rp) |
526 | { | 525 | { |
527 | struct sk_buff *skb; | 526 | struct sk_buff *skb; |
528 | int err = 0; | 527 | int err = 0; |
529 | int offset; | 528 | int offset; |
530 | int len; | 529 | int len; |
531 | int total_len; | 530 | int total_len; |
532 | __wsum tmp_csum; | 531 | __wsum tmp_csum; |
533 | __sum16 csum; | 532 | __sum16 csum; |
534 | 533 | ||
535 | if (!rp->checksum) | 534 | if (!rp->checksum) |
536 | goto send; | 535 | goto send; |
537 | 536 | ||
538 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | 537 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) |
539 | goto out; | 538 | goto out; |
540 | 539 | ||
541 | offset = rp->offset; | 540 | offset = rp->offset; |
542 | total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) - | 541 | total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) - |
543 | skb->data); | 542 | skb->data); |
544 | if (offset >= total_len - 1) { | 543 | if (offset >= total_len - 1) { |
545 | err = -EINVAL; | 544 | err = -EINVAL; |
546 | ip6_flush_pending_frames(sk); | 545 | ip6_flush_pending_frames(sk); |
547 | goto out; | 546 | goto out; |
548 | } | 547 | } |
549 | 548 | ||
550 | /* should be check HW csum miyazawa */ | 549 | /* should be check HW csum miyazawa */ |
551 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 550 | if (skb_queue_len(&sk->sk_write_queue) == 1) { |
552 | /* | 551 | /* |
553 | * Only one fragment on the socket. | 552 | * Only one fragment on the socket. |
554 | */ | 553 | */ |
555 | tmp_csum = skb->csum; | 554 | tmp_csum = skb->csum; |
556 | } else { | 555 | } else { |
557 | struct sk_buff *csum_skb = NULL; | 556 | struct sk_buff *csum_skb = NULL; |
558 | tmp_csum = 0; | 557 | tmp_csum = 0; |
559 | 558 | ||
560 | skb_queue_walk(&sk->sk_write_queue, skb) { | 559 | skb_queue_walk(&sk->sk_write_queue, skb) { |
561 | tmp_csum = csum_add(tmp_csum, skb->csum); | 560 | tmp_csum = csum_add(tmp_csum, skb->csum); |
562 | 561 | ||
563 | if (csum_skb) | 562 | if (csum_skb) |
564 | continue; | 563 | continue; |
565 | 564 | ||
566 | len = skb->len - skb_transport_offset(skb); | 565 | len = skb->len - skb_transport_offset(skb); |
567 | if (offset >= len) { | 566 | if (offset >= len) { |
568 | offset -= len; | 567 | offset -= len; |
569 | continue; | 568 | continue; |
570 | } | 569 | } |
571 | 570 | ||
572 | csum_skb = skb; | 571 | csum_skb = skb; |
573 | } | 572 | } |
574 | 573 | ||
575 | skb = csum_skb; | 574 | skb = csum_skb; |
576 | } | 575 | } |
577 | 576 | ||
578 | offset += skb_transport_offset(skb); | 577 | offset += skb_transport_offset(skb); |
579 | if (skb_copy_bits(skb, offset, &csum, 2)) | 578 | if (skb_copy_bits(skb, offset, &csum, 2)) |
580 | BUG(); | 579 | BUG(); |
581 | 580 | ||
582 | /* in case cksum was not initialized */ | 581 | /* in case cksum was not initialized */ |
583 | if (unlikely(csum)) | 582 | if (unlikely(csum)) |
584 | tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); | 583 | tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); |
585 | 584 | ||
586 | csum = csum_ipv6_magic(&fl->fl6_src, | 585 | csum = csum_ipv6_magic(&fl->fl6_src, |
587 | &fl->fl6_dst, | 586 | &fl->fl6_dst, |
588 | total_len, fl->proto, tmp_csum); | 587 | total_len, fl->proto, tmp_csum); |
589 | 588 | ||
590 | if (csum == 0 && fl->proto == IPPROTO_UDP) | 589 | if (csum == 0 && fl->proto == IPPROTO_UDP) |
591 | csum = CSUM_MANGLED_0; | 590 | csum = CSUM_MANGLED_0; |
592 | 591 | ||
593 | if (skb_store_bits(skb, offset, &csum, 2)) | 592 | if (skb_store_bits(skb, offset, &csum, 2)) |
594 | BUG(); | 593 | BUG(); |
595 | 594 | ||
596 | send: | 595 | send: |
597 | err = ip6_push_pending_frames(sk); | 596 | err = ip6_push_pending_frames(sk); |
598 | out: | 597 | out: |
599 | return err; | 598 | return err; |
600 | } | 599 | } |
601 | 600 | ||
602 | static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, | 601 | static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, |
603 | struct flowi *fl, struct rt6_info *rt, | 602 | struct flowi *fl, struct rt6_info *rt, |
604 | unsigned int flags) | 603 | unsigned int flags) |
605 | { | 604 | { |
606 | struct ipv6_pinfo *np = inet6_sk(sk); | 605 | struct ipv6_pinfo *np = inet6_sk(sk); |
607 | struct ipv6hdr *iph; | 606 | struct ipv6hdr *iph; |
608 | struct sk_buff *skb; | 607 | struct sk_buff *skb; |
609 | int err; | 608 | int err; |
610 | 609 | ||
611 | if (length > rt->u.dst.dev->mtu) { | 610 | if (length > rt->u.dst.dev->mtu) { |
612 | ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); | 611 | ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); |
613 | return -EMSGSIZE; | 612 | return -EMSGSIZE; |
614 | } | 613 | } |
615 | if (flags&MSG_PROBE) | 614 | if (flags&MSG_PROBE) |
616 | goto out; | 615 | goto out; |
617 | 616 | ||
618 | skb = sock_alloc_send_skb(sk, | 617 | skb = sock_alloc_send_skb(sk, |
619 | length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, | 618 | length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, |
620 | flags & MSG_DONTWAIT, &err); | 619 | flags & MSG_DONTWAIT, &err); |
621 | if (skb == NULL) | 620 | if (skb == NULL) |
622 | goto error; | 621 | goto error; |
623 | skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); | 622 | skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); |
624 | 623 | ||
625 | skb->priority = sk->sk_priority; | 624 | skb->priority = sk->sk_priority; |
626 | skb->mark = sk->sk_mark; | 625 | skb->mark = sk->sk_mark; |
627 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 626 | skb_dst_set(skb, dst_clone(&rt->u.dst)); |
628 | 627 | ||
629 | skb_put(skb, length); | 628 | skb_put(skb, length); |
630 | skb_reset_network_header(skb); | 629 | skb_reset_network_header(skb); |
631 | iph = ipv6_hdr(skb); | 630 | iph = ipv6_hdr(skb); |
632 | 631 | ||
633 | skb->ip_summed = CHECKSUM_NONE; | 632 | skb->ip_summed = CHECKSUM_NONE; |
634 | 633 | ||
635 | skb->transport_header = skb->network_header; | 634 | skb->transport_header = skb->network_header; |
636 | err = memcpy_fromiovecend((void *)iph, from, 0, length); | 635 | err = memcpy_fromiovecend((void *)iph, from, 0, length); |
637 | if (err) | 636 | if (err) |
638 | goto error_fault; | 637 | goto error_fault; |
639 | 638 | ||
640 | IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); | 639 | IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); |
641 | err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, | 640 | err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, |
642 | dst_output); | 641 | dst_output); |
643 | if (err > 0) | 642 | if (err > 0) |
644 | err = net_xmit_errno(err); | 643 | err = net_xmit_errno(err); |
645 | if (err) | 644 | if (err) |
646 | goto error; | 645 | goto error; |
647 | out: | 646 | out: |
648 | return 0; | 647 | return 0; |
649 | 648 | ||
650 | error_fault: | 649 | error_fault: |
651 | err = -EFAULT; | 650 | err = -EFAULT; |
652 | kfree_skb(skb); | 651 | kfree_skb(skb); |
653 | error: | 652 | error: |
654 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); | 653 | IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); |
655 | if (err == -ENOBUFS && !np->recverr) | 654 | if (err == -ENOBUFS && !np->recverr) |
656 | err = 0; | 655 | err = 0; |
657 | return err; | 656 | return err; |
658 | } | 657 | } |
659 | 658 | ||
660 | static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) | 659 | static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) |
661 | { | 660 | { |
662 | struct iovec *iov; | 661 | struct iovec *iov; |
663 | u8 __user *type = NULL; | 662 | u8 __user *type = NULL; |
664 | u8 __user *code = NULL; | 663 | u8 __user *code = NULL; |
665 | u8 len = 0; | 664 | u8 len = 0; |
666 | int probed = 0; | 665 | int probed = 0; |
667 | int i; | 666 | int i; |
668 | 667 | ||
669 | if (!msg->msg_iov) | 668 | if (!msg->msg_iov) |
670 | return 0; | 669 | return 0; |
671 | 670 | ||
672 | for (i = 0; i < msg->msg_iovlen; i++) { | 671 | for (i = 0; i < msg->msg_iovlen; i++) { |
673 | iov = &msg->msg_iov[i]; | 672 | iov = &msg->msg_iov[i]; |
674 | if (!iov) | 673 | if (!iov) |
675 | continue; | 674 | continue; |
676 | 675 | ||
677 | switch (fl->proto) { | 676 | switch (fl->proto) { |
678 | case IPPROTO_ICMPV6: | 677 | case IPPROTO_ICMPV6: |
679 | /* check if one-byte field is readable or not. */ | 678 | /* check if one-byte field is readable or not. */ |
680 | if (iov->iov_base && iov->iov_len < 1) | 679 | if (iov->iov_base && iov->iov_len < 1) |
681 | break; | 680 | break; |
682 | 681 | ||
683 | if (!type) { | 682 | if (!type) { |
684 | type = iov->iov_base; | 683 | type = iov->iov_base; |
685 | /* check if code field is readable or not. */ | 684 | /* check if code field is readable or not. */ |
686 | if (iov->iov_len > 1) | 685 | if (iov->iov_len > 1) |
687 | code = type + 1; | 686 | code = type + 1; |
688 | } else if (!code) | 687 | } else if (!code) |
689 | code = iov->iov_base; | 688 | code = iov->iov_base; |
690 | 689 | ||
691 | if (type && code) { | 690 | if (type && code) { |
692 | if (get_user(fl->fl_icmp_type, type) || | 691 | if (get_user(fl->fl_icmp_type, type) || |
693 | get_user(fl->fl_icmp_code, code)) | 692 | get_user(fl->fl_icmp_code, code)) |
694 | return -EFAULT; | 693 | return -EFAULT; |
695 | probed = 1; | 694 | probed = 1; |
696 | } | 695 | } |
697 | break; | 696 | break; |
698 | case IPPROTO_MH: | 697 | case IPPROTO_MH: |
699 | if (iov->iov_base && iov->iov_len < 1) | 698 | if (iov->iov_base && iov->iov_len < 1) |
700 | break; | 699 | break; |
701 | /* check if type field is readable or not. */ | 700 | /* check if type field is readable or not. */ |
702 | if (iov->iov_len > 2 - len) { | 701 | if (iov->iov_len > 2 - len) { |
703 | u8 __user *p = iov->iov_base; | 702 | u8 __user *p = iov->iov_base; |
704 | if (get_user(fl->fl_mh_type, &p[2 - len])) | 703 | if (get_user(fl->fl_mh_type, &p[2 - len])) |
705 | return -EFAULT; | 704 | return -EFAULT; |
706 | probed = 1; | 705 | probed = 1; |
707 | } else | 706 | } else |
708 | len += iov->iov_len; | 707 | len += iov->iov_len; |
709 | 708 | ||
710 | break; | 709 | break; |
711 | default: | 710 | default: |
712 | probed = 1; | 711 | probed = 1; |
713 | break; | 712 | break; |
714 | } | 713 | } |
715 | if (probed) | 714 | if (probed) |
716 | break; | 715 | break; |
717 | } | 716 | } |
718 | return 0; | 717 | return 0; |
719 | } | 718 | } |
720 | 719 | ||
721 | static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, | 720 | static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, |
722 | struct msghdr *msg, size_t len) | 721 | struct msghdr *msg, size_t len) |
723 | { | 722 | { |
724 | struct ipv6_txoptions opt_space; | 723 | struct ipv6_txoptions opt_space; |
725 | struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; | 724 | struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; |
726 | struct in6_addr *daddr, *final_p = NULL, final; | 725 | struct in6_addr *daddr, *final_p = NULL, final; |
727 | struct inet_sock *inet = inet_sk(sk); | 726 | struct inet_sock *inet = inet_sk(sk); |
728 | struct ipv6_pinfo *np = inet6_sk(sk); | 727 | struct ipv6_pinfo *np = inet6_sk(sk); |
729 | struct raw6_sock *rp = raw6_sk(sk); | 728 | struct raw6_sock *rp = raw6_sk(sk); |
730 | struct ipv6_txoptions *opt = NULL; | 729 | struct ipv6_txoptions *opt = NULL; |
731 | struct ip6_flowlabel *flowlabel = NULL; | 730 | struct ip6_flowlabel *flowlabel = NULL; |
732 | struct dst_entry *dst = NULL; | 731 | struct dst_entry *dst = NULL; |
733 | struct flowi fl; | 732 | struct flowi fl; |
734 | int addr_len = msg->msg_namelen; | 733 | int addr_len = msg->msg_namelen; |
735 | int hlimit = -1; | 734 | int hlimit = -1; |
736 | int tclass = -1; | 735 | int tclass = -1; |
737 | u16 proto; | 736 | u16 proto; |
738 | int err; | 737 | int err; |
739 | 738 | ||
740 | /* Rough check on arithmetic overflow, | 739 | /* Rough check on arithmetic overflow, |
741 | better check is made in ip6_append_data(). | 740 | better check is made in ip6_append_data(). |
742 | */ | 741 | */ |
743 | if (len > INT_MAX) | 742 | if (len > INT_MAX) |
744 | return -EMSGSIZE; | 743 | return -EMSGSIZE; |
745 | 744 | ||
746 | /* Mirror BSD error message compatibility */ | 745 | /* Mirror BSD error message compatibility */ |
747 | if (msg->msg_flags & MSG_OOB) | 746 | if (msg->msg_flags & MSG_OOB) |
748 | return -EOPNOTSUPP; | 747 | return -EOPNOTSUPP; |
749 | 748 | ||
750 | /* | 749 | /* |
751 | * Get and verify the address. | 750 | * Get and verify the address. |
752 | */ | 751 | */ |
753 | memset(&fl, 0, sizeof(fl)); | 752 | memset(&fl, 0, sizeof(fl)); |
754 | 753 | ||
755 | fl.mark = sk->sk_mark; | 754 | fl.mark = sk->sk_mark; |
756 | 755 | ||
757 | if (sin6) { | 756 | if (sin6) { |
758 | if (addr_len < SIN6_LEN_RFC2133) | 757 | if (addr_len < SIN6_LEN_RFC2133) |
759 | return -EINVAL; | 758 | return -EINVAL; |
760 | 759 | ||
761 | if (sin6->sin6_family && sin6->sin6_family != AF_INET6) | 760 | if (sin6->sin6_family && sin6->sin6_family != AF_INET6) |
762 | return(-EAFNOSUPPORT); | 761 | return(-EAFNOSUPPORT); |
763 | 762 | ||
764 | /* port is the proto value [0..255] carried in nexthdr */ | 763 | /* port is the proto value [0..255] carried in nexthdr */ |
765 | proto = ntohs(sin6->sin6_port); | 764 | proto = ntohs(sin6->sin6_port); |
766 | 765 | ||
767 | if (!proto) | 766 | if (!proto) |
768 | proto = inet->inet_num; | 767 | proto = inet->inet_num; |
769 | else if (proto != inet->inet_num) | 768 | else if (proto != inet->inet_num) |
770 | return(-EINVAL); | 769 | return(-EINVAL); |
771 | 770 | ||
772 | if (proto > 255) | 771 | if (proto > 255) |
773 | return(-EINVAL); | 772 | return(-EINVAL); |
774 | 773 | ||
775 | daddr = &sin6->sin6_addr; | 774 | daddr = &sin6->sin6_addr; |
776 | if (np->sndflow) { | 775 | if (np->sndflow) { |
777 | fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; | 776 | fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; |
778 | if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { | 777 | if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { |
779 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | 778 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); |
780 | if (flowlabel == NULL) | 779 | if (flowlabel == NULL) |
781 | return -EINVAL; | 780 | return -EINVAL; |
782 | daddr = &flowlabel->dst; | 781 | daddr = &flowlabel->dst; |
783 | } | 782 | } |
784 | } | 783 | } |
785 | 784 | ||
786 | /* | 785 | /* |
787 | * Otherwise it will be difficult to maintain | 786 | * Otherwise it will be difficult to maintain |
788 | * sk->sk_dst_cache. | 787 | * sk->sk_dst_cache. |
789 | */ | 788 | */ |
790 | if (sk->sk_state == TCP_ESTABLISHED && | 789 | if (sk->sk_state == TCP_ESTABLISHED && |
791 | ipv6_addr_equal(daddr, &np->daddr)) | 790 | ipv6_addr_equal(daddr, &np->daddr)) |
792 | daddr = &np->daddr; | 791 | daddr = &np->daddr; |
793 | 792 | ||
794 | if (addr_len >= sizeof(struct sockaddr_in6) && | 793 | if (addr_len >= sizeof(struct sockaddr_in6) && |
795 | sin6->sin6_scope_id && | 794 | sin6->sin6_scope_id && |
796 | ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) | 795 | ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) |
797 | fl.oif = sin6->sin6_scope_id; | 796 | fl.oif = sin6->sin6_scope_id; |
798 | } else { | 797 | } else { |
799 | if (sk->sk_state != TCP_ESTABLISHED) | 798 | if (sk->sk_state != TCP_ESTABLISHED) |
800 | return -EDESTADDRREQ; | 799 | return -EDESTADDRREQ; |
801 | 800 | ||
802 | proto = inet->inet_num; | 801 | proto = inet->inet_num; |
803 | daddr = &np->daddr; | 802 | daddr = &np->daddr; |
804 | fl.fl6_flowlabel = np->flow_label; | 803 | fl.fl6_flowlabel = np->flow_label; |
805 | } | 804 | } |
806 | 805 | ||
807 | if (fl.oif == 0) | 806 | if (fl.oif == 0) |
808 | fl.oif = sk->sk_bound_dev_if; | 807 | fl.oif = sk->sk_bound_dev_if; |
809 | 808 | ||
810 | if (msg->msg_controllen) { | 809 | if (msg->msg_controllen) { |
811 | opt = &opt_space; | 810 | opt = &opt_space; |
812 | memset(opt, 0, sizeof(struct ipv6_txoptions)); | 811 | memset(opt, 0, sizeof(struct ipv6_txoptions)); |
813 | opt->tot_len = sizeof(struct ipv6_txoptions); | 812 | opt->tot_len = sizeof(struct ipv6_txoptions); |
814 | 813 | ||
815 | err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); | 814 | err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); |
816 | if (err < 0) { | 815 | if (err < 0) { |
817 | fl6_sock_release(flowlabel); | 816 | fl6_sock_release(flowlabel); |
818 | return err; | 817 | return err; |
819 | } | 818 | } |
820 | if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { | 819 | if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { |
821 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | 820 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); |
822 | if (flowlabel == NULL) | 821 | if (flowlabel == NULL) |
823 | return -EINVAL; | 822 | return -EINVAL; |
824 | } | 823 | } |
825 | if (!(opt->opt_nflen|opt->opt_flen)) | 824 | if (!(opt->opt_nflen|opt->opt_flen)) |
826 | opt = NULL; | 825 | opt = NULL; |
827 | } | 826 | } |
828 | if (opt == NULL) | 827 | if (opt == NULL) |
829 | opt = np->opt; | 828 | opt = np->opt; |
830 | if (flowlabel) | 829 | if (flowlabel) |
831 | opt = fl6_merge_options(&opt_space, flowlabel, opt); | 830 | opt = fl6_merge_options(&opt_space, flowlabel, opt); |
832 | opt = ipv6_fixup_options(&opt_space, opt); | 831 | opt = ipv6_fixup_options(&opt_space, opt); |
833 | 832 | ||
834 | fl.proto = proto; | 833 | fl.proto = proto; |
835 | err = rawv6_probe_proto_opt(&fl, msg); | 834 | err = rawv6_probe_proto_opt(&fl, msg); |
836 | if (err) | 835 | if (err) |
837 | goto out; | 836 | goto out; |
838 | 837 | ||
839 | if (!ipv6_addr_any(daddr)) | 838 | if (!ipv6_addr_any(daddr)) |
840 | ipv6_addr_copy(&fl.fl6_dst, daddr); | 839 | ipv6_addr_copy(&fl.fl6_dst, daddr); |
841 | else | 840 | else |
842 | fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ | 841 | fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ |
843 | if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) | 842 | if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) |
844 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | 843 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); |
845 | 844 | ||
846 | /* merge ip6_build_xmit from ip6_output */ | 845 | /* merge ip6_build_xmit from ip6_output */ |
847 | if (opt && opt->srcrt) { | 846 | if (opt && opt->srcrt) { |
848 | struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; | 847 | struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; |
849 | ipv6_addr_copy(&final, &fl.fl6_dst); | 848 | ipv6_addr_copy(&final, &fl.fl6_dst); |
850 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | 849 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); |
851 | final_p = &final; | 850 | final_p = &final; |
852 | } | 851 | } |
853 | 852 | ||
854 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) | 853 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) |
855 | fl.oif = np->mcast_oif; | 854 | fl.oif = np->mcast_oif; |
856 | security_sk_classify_flow(sk, &fl); | 855 | security_sk_classify_flow(sk, &fl); |
857 | 856 | ||
858 | err = ip6_dst_lookup(sk, &dst, &fl); | 857 | err = ip6_dst_lookup(sk, &dst, &fl); |
859 | if (err) | 858 | if (err) |
860 | goto out; | 859 | goto out; |
861 | if (final_p) | 860 | if (final_p) |
862 | ipv6_addr_copy(&fl.fl6_dst, final_p); | 861 | ipv6_addr_copy(&fl.fl6_dst, final_p); |
863 | 862 | ||
864 | err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); | 863 | err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); |
865 | if (err < 0) { | 864 | if (err < 0) { |
866 | if (err == -EREMOTE) | 865 | if (err == -EREMOTE) |
867 | err = ip6_dst_blackhole(sk, &dst, &fl); | 866 | err = ip6_dst_blackhole(sk, &dst, &fl); |
868 | if (err < 0) | 867 | if (err < 0) |
869 | goto out; | 868 | goto out; |
870 | } | 869 | } |
871 | 870 | ||
872 | if (hlimit < 0) { | 871 | if (hlimit < 0) { |
873 | if (ipv6_addr_is_multicast(&fl.fl6_dst)) | 872 | if (ipv6_addr_is_multicast(&fl.fl6_dst)) |
874 | hlimit = np->mcast_hops; | 873 | hlimit = np->mcast_hops; |
875 | else | 874 | else |
876 | hlimit = np->hop_limit; | 875 | hlimit = np->hop_limit; |
877 | if (hlimit < 0) | 876 | if (hlimit < 0) |
878 | hlimit = ip6_dst_hoplimit(dst); | 877 | hlimit = ip6_dst_hoplimit(dst); |
879 | } | 878 | } |
880 | 879 | ||
881 | if (tclass < 0) | 880 | if (tclass < 0) |
882 | tclass = np->tclass; | 881 | tclass = np->tclass; |
883 | 882 | ||
884 | if (msg->msg_flags&MSG_CONFIRM) | 883 | if (msg->msg_flags&MSG_CONFIRM) |
885 | goto do_confirm; | 884 | goto do_confirm; |
886 | 885 | ||
887 | back_from_confirm: | 886 | back_from_confirm: |
888 | if (inet->hdrincl) { | 887 | if (inet->hdrincl) { |
889 | err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); | 888 | err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); |
890 | } else { | 889 | } else { |
891 | lock_sock(sk); | 890 | lock_sock(sk); |
892 | err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, | 891 | err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, |
893 | len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, | 892 | len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, |
894 | msg->msg_flags); | 893 | msg->msg_flags); |
895 | 894 | ||
896 | if (err) | 895 | if (err) |
897 | ip6_flush_pending_frames(sk); | 896 | ip6_flush_pending_frames(sk); |
898 | else if (!(msg->msg_flags & MSG_MORE)) | 897 | else if (!(msg->msg_flags & MSG_MORE)) |
899 | err = rawv6_push_pending_frames(sk, &fl, rp); | 898 | err = rawv6_push_pending_frames(sk, &fl, rp); |
900 | release_sock(sk); | 899 | release_sock(sk); |
901 | } | 900 | } |
902 | done: | 901 | done: |
903 | dst_release(dst); | 902 | dst_release(dst); |
904 | out: | 903 | out: |
905 | fl6_sock_release(flowlabel); | 904 | fl6_sock_release(flowlabel); |
906 | return err<0?err:len; | 905 | return err<0?err:len; |
907 | do_confirm: | 906 | do_confirm: |
908 | dst_confirm(dst); | 907 | dst_confirm(dst); |
909 | if (!(msg->msg_flags & MSG_PROBE) || len) | 908 | if (!(msg->msg_flags & MSG_PROBE) || len) |
910 | goto back_from_confirm; | 909 | goto back_from_confirm; |
911 | err = 0; | 910 | err = 0; |
912 | goto done; | 911 | goto done; |
913 | } | 912 | } |
914 | 913 | ||
915 | static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, | 914 | static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, |
916 | char __user *optval, int optlen) | 915 | char __user *optval, int optlen) |
917 | { | 916 | { |
918 | switch (optname) { | 917 | switch (optname) { |
919 | case ICMPV6_FILTER: | 918 | case ICMPV6_FILTER: |
920 | if (optlen > sizeof(struct icmp6_filter)) | 919 | if (optlen > sizeof(struct icmp6_filter)) |
921 | optlen = sizeof(struct icmp6_filter); | 920 | optlen = sizeof(struct icmp6_filter); |
922 | if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen)) | 921 | if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen)) |
923 | return -EFAULT; | 922 | return -EFAULT; |
924 | return 0; | 923 | return 0; |
925 | default: | 924 | default: |
926 | return -ENOPROTOOPT; | 925 | return -ENOPROTOOPT; |
927 | } | 926 | } |
928 | 927 | ||
929 | return 0; | 928 | return 0; |
930 | } | 929 | } |
931 | 930 | ||
932 | static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, | 931 | static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, |
933 | char __user *optval, int __user *optlen) | 932 | char __user *optval, int __user *optlen) |
934 | { | 933 | { |
935 | int len; | 934 | int len; |
936 | 935 | ||
937 | switch (optname) { | 936 | switch (optname) { |
938 | case ICMPV6_FILTER: | 937 | case ICMPV6_FILTER: |
939 | if (get_user(len, optlen)) | 938 | if (get_user(len, optlen)) |
940 | return -EFAULT; | 939 | return -EFAULT; |
941 | if (len < 0) | 940 | if (len < 0) |
942 | return -EINVAL; | 941 | return -EINVAL; |
943 | if (len > sizeof(struct icmp6_filter)) | 942 | if (len > sizeof(struct icmp6_filter)) |
944 | len = sizeof(struct icmp6_filter); | 943 | len = sizeof(struct icmp6_filter); |
945 | if (put_user(len, optlen)) | 944 | if (put_user(len, optlen)) |
946 | return -EFAULT; | 945 | return -EFAULT; |
947 | if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) | 946 | if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) |
948 | return -EFAULT; | 947 | return -EFAULT; |
949 | return 0; | 948 | return 0; |
950 | default: | 949 | default: |
951 | return -ENOPROTOOPT; | 950 | return -ENOPROTOOPT; |
952 | } | 951 | } |
953 | 952 | ||
954 | return 0; | 953 | return 0; |
955 | } | 954 | } |
956 | 955 | ||
957 | 956 | ||
958 | static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, | 957 | static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, |
959 | char __user *optval, unsigned int optlen) | 958 | char __user *optval, unsigned int optlen) |
960 | { | 959 | { |
961 | struct raw6_sock *rp = raw6_sk(sk); | 960 | struct raw6_sock *rp = raw6_sk(sk); |
962 | int val; | 961 | int val; |
963 | 962 | ||
964 | if (get_user(val, (int __user *)optval)) | 963 | if (get_user(val, (int __user *)optval)) |
965 | return -EFAULT; | 964 | return -EFAULT; |
966 | 965 | ||
967 | switch (optname) { | 966 | switch (optname) { |
968 | case IPV6_CHECKSUM: | 967 | case IPV6_CHECKSUM: |
969 | if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && | 968 | if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && |
970 | level == IPPROTO_IPV6) { | 969 | level == IPPROTO_IPV6) { |
971 | /* | 970 | /* |
972 | * RFC3542 tells that IPV6_CHECKSUM socket | 971 | * RFC3542 tells that IPV6_CHECKSUM socket |
973 | * option in the IPPROTO_IPV6 level is not | 972 | * option in the IPPROTO_IPV6 level is not |
974 | * allowed on ICMPv6 sockets. | 973 | * allowed on ICMPv6 sockets. |
975 | * If you want to set it, use IPPROTO_RAW | 974 | * If you want to set it, use IPPROTO_RAW |
976 | * level IPV6_CHECKSUM socket option | 975 | * level IPV6_CHECKSUM socket option |
977 | * (Linux extension). | 976 | * (Linux extension). |
978 | */ | 977 | */ |
979 | return -EINVAL; | 978 | return -EINVAL; |
980 | } | 979 | } |
981 | 980 | ||
982 | /* You may get strange result with a positive odd offset; | 981 | /* You may get strange result with a positive odd offset; |
983 | RFC2292bis agrees with me. */ | 982 | RFC2292bis agrees with me. */ |
984 | if (val > 0 && (val&1)) | 983 | if (val > 0 && (val&1)) |
985 | return(-EINVAL); | 984 | return(-EINVAL); |
986 | if (val < 0) { | 985 | if (val < 0) { |
987 | rp->checksum = 0; | 986 | rp->checksum = 0; |
988 | } else { | 987 | } else { |
989 | rp->checksum = 1; | 988 | rp->checksum = 1; |
990 | rp->offset = val; | 989 | rp->offset = val; |
991 | } | 990 | } |
992 | 991 | ||
993 | return 0; | 992 | return 0; |
994 | break; | 993 | break; |
995 | 994 | ||
996 | default: | 995 | default: |
997 | return(-ENOPROTOOPT); | 996 | return(-ENOPROTOOPT); |
998 | } | 997 | } |
999 | } | 998 | } |
1000 | 999 | ||
1001 | static int rawv6_setsockopt(struct sock *sk, int level, int optname, | 1000 | static int rawv6_setsockopt(struct sock *sk, int level, int optname, |
1002 | char __user *optval, unsigned int optlen) | 1001 | char __user *optval, unsigned int optlen) |
1003 | { | 1002 | { |
1004 | switch(level) { | 1003 | switch(level) { |
1005 | case SOL_RAW: | 1004 | case SOL_RAW: |
1006 | break; | 1005 | break; |
1007 | 1006 | ||
1008 | case SOL_ICMPV6: | 1007 | case SOL_ICMPV6: |
1009 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) | 1008 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) |
1010 | return -EOPNOTSUPP; | 1009 | return -EOPNOTSUPP; |
1011 | return rawv6_seticmpfilter(sk, level, optname, optval, | 1010 | return rawv6_seticmpfilter(sk, level, optname, optval, |
1012 | optlen); | 1011 | optlen); |
1013 | case SOL_IPV6: | 1012 | case SOL_IPV6: |
1014 | if (optname == IPV6_CHECKSUM) | 1013 | if (optname == IPV6_CHECKSUM) |
1015 | break; | 1014 | break; |
1016 | default: | 1015 | default: |
1017 | return ipv6_setsockopt(sk, level, optname, optval, | 1016 | return ipv6_setsockopt(sk, level, optname, optval, |
1018 | optlen); | 1017 | optlen); |
1019 | } | 1018 | } |
1020 | 1019 | ||
1021 | return do_rawv6_setsockopt(sk, level, optname, optval, optlen); | 1020 | return do_rawv6_setsockopt(sk, level, optname, optval, optlen); |
1022 | } | 1021 | } |
1023 | 1022 | ||
1024 | #ifdef CONFIG_COMPAT | 1023 | #ifdef CONFIG_COMPAT |
1025 | static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, | 1024 | static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, |
1026 | char __user *optval, unsigned int optlen) | 1025 | char __user *optval, unsigned int optlen) |
1027 | { | 1026 | { |
1028 | switch (level) { | 1027 | switch (level) { |
1029 | case SOL_RAW: | 1028 | case SOL_RAW: |
1030 | break; | 1029 | break; |
1031 | case SOL_ICMPV6: | 1030 | case SOL_ICMPV6: |
1032 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) | 1031 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) |
1033 | return -EOPNOTSUPP; | 1032 | return -EOPNOTSUPP; |
1034 | return rawv6_seticmpfilter(sk, level, optname, optval, optlen); | 1033 | return rawv6_seticmpfilter(sk, level, optname, optval, optlen); |
1035 | case SOL_IPV6: | 1034 | case SOL_IPV6: |
1036 | if (optname == IPV6_CHECKSUM) | 1035 | if (optname == IPV6_CHECKSUM) |
1037 | break; | 1036 | break; |
1038 | default: | 1037 | default: |
1039 | return compat_ipv6_setsockopt(sk, level, optname, | 1038 | return compat_ipv6_setsockopt(sk, level, optname, |
1040 | optval, optlen); | 1039 | optval, optlen); |
1041 | } | 1040 | } |
1042 | return do_rawv6_setsockopt(sk, level, optname, optval, optlen); | 1041 | return do_rawv6_setsockopt(sk, level, optname, optval, optlen); |
1043 | } | 1042 | } |
1044 | #endif | 1043 | #endif |
1045 | 1044 | ||
1046 | static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, | 1045 | static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, |
1047 | char __user *optval, int __user *optlen) | 1046 | char __user *optval, int __user *optlen) |
1048 | { | 1047 | { |
1049 | struct raw6_sock *rp = raw6_sk(sk); | 1048 | struct raw6_sock *rp = raw6_sk(sk); |
1050 | int val, len; | 1049 | int val, len; |
1051 | 1050 | ||
1052 | if (get_user(len,optlen)) | 1051 | if (get_user(len,optlen)) |
1053 | return -EFAULT; | 1052 | return -EFAULT; |
1054 | 1053 | ||
1055 | switch (optname) { | 1054 | switch (optname) { |
1056 | case IPV6_CHECKSUM: | 1055 | case IPV6_CHECKSUM: |
1057 | /* | 1056 | /* |
1058 | * We allow getsockopt() for IPPROTO_IPV6-level | 1057 | * We allow getsockopt() for IPPROTO_IPV6-level |
1059 | * IPV6_CHECKSUM socket option on ICMPv6 sockets | 1058 | * IPV6_CHECKSUM socket option on ICMPv6 sockets |
1060 | * since RFC3542 is silent about it. | 1059 | * since RFC3542 is silent about it. |
1061 | */ | 1060 | */ |
1062 | if (rp->checksum == 0) | 1061 | if (rp->checksum == 0) |
1063 | val = -1; | 1062 | val = -1; |
1064 | else | 1063 | else |
1065 | val = rp->offset; | 1064 | val = rp->offset; |
1066 | break; | 1065 | break; |
1067 | 1066 | ||
1068 | default: | 1067 | default: |
1069 | return -ENOPROTOOPT; | 1068 | return -ENOPROTOOPT; |
1070 | } | 1069 | } |
1071 | 1070 | ||
1072 | len = min_t(unsigned int, sizeof(int), len); | 1071 | len = min_t(unsigned int, sizeof(int), len); |
1073 | 1072 | ||
1074 | if (put_user(len, optlen)) | 1073 | if (put_user(len, optlen)) |
1075 | return -EFAULT; | 1074 | return -EFAULT; |
1076 | if (copy_to_user(optval,&val,len)) | 1075 | if (copy_to_user(optval,&val,len)) |
1077 | return -EFAULT; | 1076 | return -EFAULT; |
1078 | return 0; | 1077 | return 0; |
1079 | } | 1078 | } |
1080 | 1079 | ||
1081 | static int rawv6_getsockopt(struct sock *sk, int level, int optname, | 1080 | static int rawv6_getsockopt(struct sock *sk, int level, int optname, |
1082 | char __user *optval, int __user *optlen) | 1081 | char __user *optval, int __user *optlen) |
1083 | { | 1082 | { |
1084 | switch(level) { | 1083 | switch(level) { |
1085 | case SOL_RAW: | 1084 | case SOL_RAW: |
1086 | break; | 1085 | break; |
1087 | 1086 | ||
1088 | case SOL_ICMPV6: | 1087 | case SOL_ICMPV6: |
1089 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) | 1088 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) |
1090 | return -EOPNOTSUPP; | 1089 | return -EOPNOTSUPP; |
1091 | return rawv6_geticmpfilter(sk, level, optname, optval, | 1090 | return rawv6_geticmpfilter(sk, level, optname, optval, |
1092 | optlen); | 1091 | optlen); |
1093 | case SOL_IPV6: | 1092 | case SOL_IPV6: |
1094 | if (optname == IPV6_CHECKSUM) | 1093 | if (optname == IPV6_CHECKSUM) |
1095 | break; | 1094 | break; |
1096 | default: | 1095 | default: |
1097 | return ipv6_getsockopt(sk, level, optname, optval, | 1096 | return ipv6_getsockopt(sk, level, optname, optval, |
1098 | optlen); | 1097 | optlen); |
1099 | } | 1098 | } |
1100 | 1099 | ||
1101 | return do_rawv6_getsockopt(sk, level, optname, optval, optlen); | 1100 | return do_rawv6_getsockopt(sk, level, optname, optval, optlen); |
1102 | } | 1101 | } |
1103 | 1102 | ||
1104 | #ifdef CONFIG_COMPAT | 1103 | #ifdef CONFIG_COMPAT |
1105 | static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, | 1104 | static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, |
1106 | char __user *optval, int __user *optlen) | 1105 | char __user *optval, int __user *optlen) |
1107 | { | 1106 | { |
1108 | switch (level) { | 1107 | switch (level) { |
1109 | case SOL_RAW: | 1108 | case SOL_RAW: |
1110 | break; | 1109 | break; |
1111 | case SOL_ICMPV6: | 1110 | case SOL_ICMPV6: |
1112 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) | 1111 | if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) |
1113 | return -EOPNOTSUPP; | 1112 | return -EOPNOTSUPP; |
1114 | return rawv6_geticmpfilter(sk, level, optname, optval, optlen); | 1113 | return rawv6_geticmpfilter(sk, level, optname, optval, optlen); |
1115 | case SOL_IPV6: | 1114 | case SOL_IPV6: |
1116 | if (optname == IPV6_CHECKSUM) | 1115 | if (optname == IPV6_CHECKSUM) |
1117 | break; | 1116 | break; |
1118 | default: | 1117 | default: |
1119 | return compat_ipv6_getsockopt(sk, level, optname, | 1118 | return compat_ipv6_getsockopt(sk, level, optname, |
1120 | optval, optlen); | 1119 | optval, optlen); |
1121 | } | 1120 | } |
1122 | return do_rawv6_getsockopt(sk, level, optname, optval, optlen); | 1121 | return do_rawv6_getsockopt(sk, level, optname, optval, optlen); |
1123 | } | 1122 | } |
1124 | #endif | 1123 | #endif |
1125 | 1124 | ||
1126 | static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) | 1125 | static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) |
1127 | { | 1126 | { |
1128 | switch(cmd) { | 1127 | switch(cmd) { |
1129 | case SIOCOUTQ: | 1128 | case SIOCOUTQ: |
1130 | { | 1129 | { |
1131 | int amount = sk_wmem_alloc_get(sk); | 1130 | int amount = sk_wmem_alloc_get(sk); |
1132 | 1131 | ||
1133 | return put_user(amount, (int __user *)arg); | 1132 | return put_user(amount, (int __user *)arg); |
1134 | } | 1133 | } |
1135 | case SIOCINQ: | 1134 | case SIOCINQ: |
1136 | { | 1135 | { |
1137 | struct sk_buff *skb; | 1136 | struct sk_buff *skb; |
1138 | int amount = 0; | 1137 | int amount = 0; |
1139 | 1138 | ||
1140 | spin_lock_bh(&sk->sk_receive_queue.lock); | 1139 | spin_lock_bh(&sk->sk_receive_queue.lock); |
1141 | skb = skb_peek(&sk->sk_receive_queue); | 1140 | skb = skb_peek(&sk->sk_receive_queue); |
1142 | if (skb != NULL) | 1141 | if (skb != NULL) |
1143 | amount = skb->tail - skb->transport_header; | 1142 | amount = skb->tail - skb->transport_header; |
1144 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 1143 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
1145 | return put_user(amount, (int __user *)arg); | 1144 | return put_user(amount, (int __user *)arg); |
1146 | } | 1145 | } |
1147 | 1146 | ||
1148 | default: | 1147 | default: |
1149 | #ifdef CONFIG_IPV6_MROUTE | 1148 | #ifdef CONFIG_IPV6_MROUTE |
1150 | return ip6mr_ioctl(sk, cmd, (void __user *)arg); | 1149 | return ip6mr_ioctl(sk, cmd, (void __user *)arg); |
1151 | #else | 1150 | #else |
1152 | return -ENOIOCTLCMD; | 1151 | return -ENOIOCTLCMD; |
1153 | #endif | 1152 | #endif |
1154 | } | 1153 | } |
1155 | } | 1154 | } |
1156 | 1155 | ||
1157 | static void rawv6_close(struct sock *sk, long timeout) | 1156 | static void rawv6_close(struct sock *sk, long timeout) |
1158 | { | 1157 | { |
1159 | if (inet_sk(sk)->inet_num == IPPROTO_RAW) | 1158 | if (inet_sk(sk)->inet_num == IPPROTO_RAW) |
1160 | ip6_ra_control(sk, -1); | 1159 | ip6_ra_control(sk, -1); |
1161 | ip6mr_sk_done(sk); | 1160 | ip6mr_sk_done(sk); |
1162 | sk_common_release(sk); | 1161 | sk_common_release(sk); |
1163 | } | 1162 | } |
1164 | 1163 | ||
1165 | static void raw6_destroy(struct sock *sk) | 1164 | static void raw6_destroy(struct sock *sk) |
1166 | { | 1165 | { |
1167 | lock_sock(sk); | 1166 | lock_sock(sk); |
1168 | ip6_flush_pending_frames(sk); | 1167 | ip6_flush_pending_frames(sk); |
1169 | release_sock(sk); | 1168 | release_sock(sk); |
1170 | 1169 | ||
1171 | inet6_destroy_sock(sk); | 1170 | inet6_destroy_sock(sk); |
1172 | } | 1171 | } |
1173 | 1172 | ||
1174 | static int rawv6_init_sk(struct sock *sk) | 1173 | static int rawv6_init_sk(struct sock *sk) |
1175 | { | 1174 | { |
1176 | struct raw6_sock *rp = raw6_sk(sk); | 1175 | struct raw6_sock *rp = raw6_sk(sk); |
1177 | 1176 | ||
1178 | switch (inet_sk(sk)->inet_num) { | 1177 | switch (inet_sk(sk)->inet_num) { |
1179 | case IPPROTO_ICMPV6: | 1178 | case IPPROTO_ICMPV6: |
1180 | rp->checksum = 1; | 1179 | rp->checksum = 1; |
1181 | rp->offset = 2; | 1180 | rp->offset = 2; |
1182 | break; | 1181 | break; |
1183 | case IPPROTO_MH: | 1182 | case IPPROTO_MH: |
1184 | rp->checksum = 1; | 1183 | rp->checksum = 1; |
1185 | rp->offset = 4; | 1184 | rp->offset = 4; |
1186 | break; | 1185 | break; |
1187 | default: | 1186 | default: |
1188 | break; | 1187 | break; |
1189 | } | 1188 | } |
1190 | return(0); | 1189 | return(0); |
1191 | } | 1190 | } |
1192 | 1191 | ||
1193 | struct proto rawv6_prot = { | 1192 | struct proto rawv6_prot = { |
1194 | .name = "RAWv6", | 1193 | .name = "RAWv6", |
1195 | .owner = THIS_MODULE, | 1194 | .owner = THIS_MODULE, |
1196 | .close = rawv6_close, | 1195 | .close = rawv6_close, |
1197 | .destroy = raw6_destroy, | 1196 | .destroy = raw6_destroy, |
1198 | .connect = ip6_datagram_connect, | 1197 | .connect = ip6_datagram_connect, |
1199 | .disconnect = udp_disconnect, | 1198 | .disconnect = udp_disconnect, |
1200 | .ioctl = rawv6_ioctl, | 1199 | .ioctl = rawv6_ioctl, |
1201 | .init = rawv6_init_sk, | 1200 | .init = rawv6_init_sk, |
1202 | .setsockopt = rawv6_setsockopt, | 1201 | .setsockopt = rawv6_setsockopt, |
1203 | .getsockopt = rawv6_getsockopt, | 1202 | .getsockopt = rawv6_getsockopt, |
1204 | .sendmsg = rawv6_sendmsg, | 1203 | .sendmsg = rawv6_sendmsg, |
1205 | .recvmsg = rawv6_recvmsg, | 1204 | .recvmsg = rawv6_recvmsg, |
1206 | .bind = rawv6_bind, | 1205 | .bind = rawv6_bind, |
1207 | .backlog_rcv = rawv6_rcv_skb, | 1206 | .backlog_rcv = rawv6_rcv_skb, |
1208 | .hash = raw_hash_sk, | 1207 | .hash = raw_hash_sk, |
1209 | .unhash = raw_unhash_sk, | 1208 | .unhash = raw_unhash_sk, |
1210 | .obj_size = sizeof(struct raw6_sock), | 1209 | .obj_size = sizeof(struct raw6_sock), |
1211 | .h.raw_hash = &raw_v6_hashinfo, | 1210 | .h.raw_hash = &raw_v6_hashinfo, |
1212 | #ifdef CONFIG_COMPAT | 1211 | #ifdef CONFIG_COMPAT |
1213 | .compat_setsockopt = compat_rawv6_setsockopt, | 1212 | .compat_setsockopt = compat_rawv6_setsockopt, |
1214 | .compat_getsockopt = compat_rawv6_getsockopt, | 1213 | .compat_getsockopt = compat_rawv6_getsockopt, |
1215 | #endif | 1214 | #endif |
1216 | }; | 1215 | }; |
1217 | 1216 | ||
1218 | #ifdef CONFIG_PROC_FS | 1217 | #ifdef CONFIG_PROC_FS |
1219 | static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) | 1218 | static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) |
1220 | { | 1219 | { |
1221 | struct ipv6_pinfo *np = inet6_sk(sp); | 1220 | struct ipv6_pinfo *np = inet6_sk(sp); |
1222 | struct in6_addr *dest, *src; | 1221 | struct in6_addr *dest, *src; |
1223 | __u16 destp, srcp; | 1222 | __u16 destp, srcp; |
1224 | 1223 | ||
1225 | dest = &np->daddr; | 1224 | dest = &np->daddr; |
1226 | src = &np->rcv_saddr; | 1225 | src = &np->rcv_saddr; |
1227 | destp = 0; | 1226 | destp = 0; |
1228 | srcp = inet_sk(sp)->inet_num; | 1227 | srcp = inet_sk(sp)->inet_num; |
1229 | seq_printf(seq, | 1228 | seq_printf(seq, |
1230 | "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " | 1229 | "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " |
1231 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", | 1230 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", |
1232 | i, | 1231 | i, |
1233 | src->s6_addr32[0], src->s6_addr32[1], | 1232 | src->s6_addr32[0], src->s6_addr32[1], |
1234 | src->s6_addr32[2], src->s6_addr32[3], srcp, | 1233 | src->s6_addr32[2], src->s6_addr32[3], srcp, |
1235 | dest->s6_addr32[0], dest->s6_addr32[1], | 1234 | dest->s6_addr32[0], dest->s6_addr32[1], |
1236 | dest->s6_addr32[2], dest->s6_addr32[3], destp, | 1235 | dest->s6_addr32[2], dest->s6_addr32[3], destp, |
1237 | sp->sk_state, | 1236 | sp->sk_state, |
1238 | sk_wmem_alloc_get(sp), | 1237 | sk_wmem_alloc_get(sp), |
1239 | sk_rmem_alloc_get(sp), | 1238 | sk_rmem_alloc_get(sp), |
1240 | 0, 0L, 0, | 1239 | 0, 0L, 0, |
1241 | sock_i_uid(sp), 0, | 1240 | sock_i_uid(sp), 0, |
1242 | sock_i_ino(sp), | 1241 | sock_i_ino(sp), |
1243 | atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); | 1242 | atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); |
1244 | } | 1243 | } |
1245 | 1244 | ||
1246 | static int raw6_seq_show(struct seq_file *seq, void *v) | 1245 | static int raw6_seq_show(struct seq_file *seq, void *v) |
1247 | { | 1246 | { |
1248 | if (v == SEQ_START_TOKEN) | 1247 | if (v == SEQ_START_TOKEN) |
1249 | seq_printf(seq, | 1248 | seq_printf(seq, |
1250 | " sl " | 1249 | " sl " |
1251 | "local_address " | 1250 | "local_address " |
1252 | "remote_address " | 1251 | "remote_address " |
1253 | "st tx_queue rx_queue tr tm->when retrnsmt" | 1252 | "st tx_queue rx_queue tr tm->when retrnsmt" |
1254 | " uid timeout inode ref pointer drops\n"); | 1253 | " uid timeout inode ref pointer drops\n"); |
1255 | else | 1254 | else |
1256 | raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); | 1255 | raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); |
1257 | return 0; | 1256 | return 0; |
1258 | } | 1257 | } |
1259 | 1258 | ||
1260 | static const struct seq_operations raw6_seq_ops = { | 1259 | static const struct seq_operations raw6_seq_ops = { |
1261 | .start = raw_seq_start, | 1260 | .start = raw_seq_start, |
1262 | .next = raw_seq_next, | 1261 | .next = raw_seq_next, |
1263 | .stop = raw_seq_stop, | 1262 | .stop = raw_seq_stop, |
1264 | .show = raw6_seq_show, | 1263 | .show = raw6_seq_show, |
1265 | }; | 1264 | }; |
1266 | 1265 | ||
1267 | static int raw6_seq_open(struct inode *inode, struct file *file) | 1266 | static int raw6_seq_open(struct inode *inode, struct file *file) |
1268 | { | 1267 | { |
1269 | return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); | 1268 | return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); |
1270 | } | 1269 | } |
1271 | 1270 | ||
1272 | static const struct file_operations raw6_seq_fops = { | 1271 | static const struct file_operations raw6_seq_fops = { |
1273 | .owner = THIS_MODULE, | 1272 | .owner = THIS_MODULE, |
1274 | .open = raw6_seq_open, | 1273 | .open = raw6_seq_open, |
1275 | .read = seq_read, | 1274 | .read = seq_read, |
1276 | .llseek = seq_lseek, | 1275 | .llseek = seq_lseek, |
1277 | .release = seq_release_net, | 1276 | .release = seq_release_net, |
1278 | }; | 1277 | }; |
1279 | 1278 | ||
1280 | static int raw6_init_net(struct net *net) | 1279 | static int raw6_init_net(struct net *net) |
1281 | { | 1280 | { |
1282 | if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) | 1281 | if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) |
1283 | return -ENOMEM; | 1282 | return -ENOMEM; |
1284 | 1283 | ||
1285 | return 0; | 1284 | return 0; |
1286 | } | 1285 | } |
1287 | 1286 | ||
1288 | static void raw6_exit_net(struct net *net) | 1287 | static void raw6_exit_net(struct net *net) |
1289 | { | 1288 | { |
1290 | proc_net_remove(net, "raw6"); | 1289 | proc_net_remove(net, "raw6"); |
1291 | } | 1290 | } |
1292 | 1291 | ||
1293 | static struct pernet_operations raw6_net_ops = { | 1292 | static struct pernet_operations raw6_net_ops = { |
1294 | .init = raw6_init_net, | 1293 | .init = raw6_init_net, |
1295 | .exit = raw6_exit_net, | 1294 | .exit = raw6_exit_net, |
1296 | }; | 1295 | }; |
1297 | 1296 | ||
1298 | int __init raw6_proc_init(void) | 1297 | int __init raw6_proc_init(void) |
1299 | { | 1298 | { |
1300 | return register_pernet_subsys(&raw6_net_ops); | 1299 | return register_pernet_subsys(&raw6_net_ops); |
1301 | } | 1300 | } |
1302 | 1301 | ||
1303 | void raw6_proc_exit(void) | 1302 | void raw6_proc_exit(void) |
1304 | { | 1303 | { |
1305 | unregister_pernet_subsys(&raw6_net_ops); | 1304 | unregister_pernet_subsys(&raw6_net_ops); |
1306 | } | 1305 | } |
1307 | #endif /* CONFIG_PROC_FS */ | 1306 | #endif /* CONFIG_PROC_FS */ |
1308 | 1307 | ||
1309 | /* Same as inet6_dgram_ops, sans udp_poll. */ | 1308 | /* Same as inet6_dgram_ops, sans udp_poll. */ |
1310 | static const struct proto_ops inet6_sockraw_ops = { | 1309 | static const struct proto_ops inet6_sockraw_ops = { |
1311 | .family = PF_INET6, | 1310 | .family = PF_INET6, |
1312 | .owner = THIS_MODULE, | 1311 | .owner = THIS_MODULE, |
1313 | .release = inet6_release, | 1312 | .release = inet6_release, |
1314 | .bind = inet6_bind, | 1313 | .bind = inet6_bind, |
1315 | .connect = inet_dgram_connect, /* ok */ | 1314 | .connect = inet_dgram_connect, /* ok */ |
1316 | .socketpair = sock_no_socketpair, /* a do nothing */ | 1315 | .socketpair = sock_no_socketpair, /* a do nothing */ |
1317 | .accept = sock_no_accept, /* a do nothing */ | 1316 | .accept = sock_no_accept, /* a do nothing */ |
1318 | .getname = inet6_getname, | 1317 | .getname = inet6_getname, |
1319 | .poll = datagram_poll, /* ok */ | 1318 | .poll = datagram_poll, /* ok */ |
1320 | .ioctl = inet6_ioctl, /* must change */ | 1319 | .ioctl = inet6_ioctl, /* must change */ |
1321 | .listen = sock_no_listen, /* ok */ | 1320 | .listen = sock_no_listen, /* ok */ |
1322 | .shutdown = inet_shutdown, /* ok */ | 1321 | .shutdown = inet_shutdown, /* ok */ |
1323 | .setsockopt = sock_common_setsockopt, /* ok */ | 1322 | .setsockopt = sock_common_setsockopt, /* ok */ |
1324 | .getsockopt = sock_common_getsockopt, /* ok */ | 1323 | .getsockopt = sock_common_getsockopt, /* ok */ |
1325 | .sendmsg = inet_sendmsg, /* ok */ | 1324 | .sendmsg = inet_sendmsg, /* ok */ |
1326 | .recvmsg = sock_common_recvmsg, /* ok */ | 1325 | .recvmsg = sock_common_recvmsg, /* ok */ |
1327 | .mmap = sock_no_mmap, | 1326 | .mmap = sock_no_mmap, |
1328 | .sendpage = sock_no_sendpage, | 1327 | .sendpage = sock_no_sendpage, |
1329 | #ifdef CONFIG_COMPAT | 1328 | #ifdef CONFIG_COMPAT |
1330 | .compat_setsockopt = compat_sock_common_setsockopt, | 1329 | .compat_setsockopt = compat_sock_common_setsockopt, |
1331 | .compat_getsockopt = compat_sock_common_getsockopt, | 1330 | .compat_getsockopt = compat_sock_common_getsockopt, |
1332 | #endif | 1331 | #endif |
1333 | }; | 1332 | }; |
1334 | 1333 | ||
1335 | static struct inet_protosw rawv6_protosw = { | 1334 | static struct inet_protosw rawv6_protosw = { |
1336 | .type = SOCK_RAW, | 1335 | .type = SOCK_RAW, |
1337 | .protocol = IPPROTO_IP, /* wild card */ | 1336 | .protocol = IPPROTO_IP, /* wild card */ |
1338 | .prot = &rawv6_prot, | 1337 | .prot = &rawv6_prot, |
1339 | .ops = &inet6_sockraw_ops, | 1338 | .ops = &inet6_sockraw_ops, |
1340 | .capability = CAP_NET_RAW, | 1339 | .capability = CAP_NET_RAW, |
1341 | .no_check = UDP_CSUM_DEFAULT, | 1340 | .no_check = UDP_CSUM_DEFAULT, |
1342 | .flags = INET_PROTOSW_REUSE, | 1341 | .flags = INET_PROTOSW_REUSE, |
1343 | }; | 1342 | }; |
1344 | 1343 | ||
1345 | int __init rawv6_init(void) | 1344 | int __init rawv6_init(void) |
1346 | { | 1345 | { |
1347 | int ret; | 1346 | int ret; |
1348 | 1347 | ||
1349 | ret = inet6_register_protosw(&rawv6_protosw); | 1348 | ret = inet6_register_protosw(&rawv6_protosw); |
1350 | if (ret) | 1349 | if (ret) |
1351 | goto out; | 1350 | goto out; |
1352 | out: | 1351 | out: |
1353 | return ret; | 1352 | return ret; |
1354 | } | 1353 | } |
1355 | 1354 | ||
1356 | void rawv6_exit(void) | 1355 | void rawv6_exit(void) |
1357 | { | 1356 | { |
1358 | inet6_unregister_protosw(&rawv6_protosw); | 1357 | inet6_unregister_protosw(&rawv6_protosw); |
1359 | } | 1358 | } |
1360 | 1359 |
net/ipv6/udp.c
1 | /* | 1 | /* |
2 | * UDP over IPv6 | 2 | * UDP over IPv6 |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * Based on linux/ipv4/udp.c | 8 | * Based on linux/ipv4/udp.c |
9 | * | 9 | * |
10 | * Fixes: | 10 | * Fixes: |
11 | * Hideaki YOSHIFUJI : sin6_scope_id support | 11 | * Hideaki YOSHIFUJI : sin6_scope_id support |
12 | * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which | 12 | * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which |
13 | * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind | 13 | * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind |
14 | * a single port at the same time. | 14 | * a single port at the same time. |
15 | * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data | 15 | * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data |
16 | * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. | 16 | * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. |
17 | * | 17 | * |
18 | * This program is free software; you can redistribute it and/or | 18 | * This program is free software; you can redistribute it and/or |
19 | * modify it under the terms of the GNU General Public License | 19 | * modify it under the terms of the GNU General Public License |
20 | * as published by the Free Software Foundation; either version | 20 | * as published by the Free Software Foundation; either version |
21 | * 2 of the License, or (at your option) any later version. | 21 | * 2 of the License, or (at your option) any later version. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/socket.h> | 26 | #include <linux/socket.h> |
27 | #include <linux/sockios.h> | 27 | #include <linux/sockios.h> |
28 | #include <linux/net.h> | 28 | #include <linux/net.h> |
29 | #include <linux/in6.h> | 29 | #include <linux/in6.h> |
30 | #include <linux/netdevice.h> | 30 | #include <linux/netdevice.h> |
31 | #include <linux/if_arp.h> | 31 | #include <linux/if_arp.h> |
32 | #include <linux/ipv6.h> | 32 | #include <linux/ipv6.h> |
33 | #include <linux/icmpv6.h> | 33 | #include <linux/icmpv6.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/skbuff.h> | 36 | #include <linux/skbuff.h> |
37 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
38 | 38 | ||
39 | #include <net/ndisc.h> | 39 | #include <net/ndisc.h> |
40 | #include <net/protocol.h> | 40 | #include <net/protocol.h> |
41 | #include <net/transp_v6.h> | 41 | #include <net/transp_v6.h> |
42 | #include <net/ip6_route.h> | 42 | #include <net/ip6_route.h> |
43 | #include <net/raw.h> | 43 | #include <net/raw.h> |
44 | #include <net/tcp_states.h> | 44 | #include <net/tcp_states.h> |
45 | #include <net/ip6_checksum.h> | 45 | #include <net/ip6_checksum.h> |
46 | #include <net/xfrm.h> | 46 | #include <net/xfrm.h> |
47 | 47 | ||
48 | #include <linux/proc_fs.h> | 48 | #include <linux/proc_fs.h> |
49 | #include <linux/seq_file.h> | 49 | #include <linux/seq_file.h> |
50 | #include "udp_impl.h" | 50 | #include "udp_impl.h" |
51 | 51 | ||
52 | int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) | 52 | int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) |
53 | { | 53 | { |
54 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; | 54 | const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; |
55 | const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); | 55 | const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); |
56 | __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; | 56 | __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; |
57 | __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); | 57 | __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); |
58 | int sk_ipv6only = ipv6_only_sock(sk); | 58 | int sk_ipv6only = ipv6_only_sock(sk); |
59 | int sk2_ipv6only = inet_v6_ipv6only(sk2); | 59 | int sk2_ipv6only = inet_v6_ipv6only(sk2); |
60 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); | 60 | int addr_type = ipv6_addr_type(sk_rcv_saddr6); |
61 | int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; | 61 | int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; |
62 | 62 | ||
63 | /* if both are mapped, treat as IPv4 */ | 63 | /* if both are mapped, treat as IPv4 */ |
64 | if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) | 64 | if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) |
65 | return (!sk2_ipv6only && | 65 | return (!sk2_ipv6only && |
66 | (!sk1_rcv_saddr || !sk2_rcv_saddr || | 66 | (!sk1_rcv_saddr || !sk2_rcv_saddr || |
67 | sk1_rcv_saddr == sk2_rcv_saddr)); | 67 | sk1_rcv_saddr == sk2_rcv_saddr)); |
68 | 68 | ||
69 | if (addr_type2 == IPV6_ADDR_ANY && | 69 | if (addr_type2 == IPV6_ADDR_ANY && |
70 | !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) | 70 | !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) |
71 | return 1; | 71 | return 1; |
72 | 72 | ||
73 | if (addr_type == IPV6_ADDR_ANY && | 73 | if (addr_type == IPV6_ADDR_ANY && |
74 | !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) | 74 | !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) |
75 | return 1; | 75 | return 1; |
76 | 76 | ||
77 | if (sk2_rcv_saddr6 && | 77 | if (sk2_rcv_saddr6 && |
78 | ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) | 78 | ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) |
79 | return 1; | 79 | return 1; |
80 | 80 | ||
81 | return 0; | 81 | return 0; |
82 | } | 82 | } |
83 | 83 | ||
84 | int udp_v6_get_port(struct sock *sk, unsigned short snum) | 84 | int udp_v6_get_port(struct sock *sk, unsigned short snum) |
85 | { | 85 | { |
86 | return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); | 86 | return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); |
87 | } | 87 | } |
88 | 88 | ||
89 | static inline int compute_score(struct sock *sk, struct net *net, | 89 | static inline int compute_score(struct sock *sk, struct net *net, |
90 | unsigned short hnum, | 90 | unsigned short hnum, |
91 | struct in6_addr *saddr, __be16 sport, | 91 | struct in6_addr *saddr, __be16 sport, |
92 | struct in6_addr *daddr, __be16 dport, | 92 | struct in6_addr *daddr, __be16 dport, |
93 | int dif) | 93 | int dif) |
94 | { | 94 | { |
95 | int score = -1; | 95 | int score = -1; |
96 | 96 | ||
97 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && | 97 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && |
98 | sk->sk_family == PF_INET6) { | 98 | sk->sk_family == PF_INET6) { |
99 | struct ipv6_pinfo *np = inet6_sk(sk); | 99 | struct ipv6_pinfo *np = inet6_sk(sk); |
100 | struct inet_sock *inet = inet_sk(sk); | 100 | struct inet_sock *inet = inet_sk(sk); |
101 | 101 | ||
102 | score = 0; | 102 | score = 0; |
103 | if (inet->inet_dport) { | 103 | if (inet->inet_dport) { |
104 | if (inet->inet_dport != sport) | 104 | if (inet->inet_dport != sport) |
105 | return -1; | 105 | return -1; |
106 | score++; | 106 | score++; |
107 | } | 107 | } |
108 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 108 | if (!ipv6_addr_any(&np->rcv_saddr)) { |
109 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) | 109 | if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) |
110 | return -1; | 110 | return -1; |
111 | score++; | 111 | score++; |
112 | } | 112 | } |
113 | if (!ipv6_addr_any(&np->daddr)) { | 113 | if (!ipv6_addr_any(&np->daddr)) { |
114 | if (!ipv6_addr_equal(&np->daddr, saddr)) | 114 | if (!ipv6_addr_equal(&np->daddr, saddr)) |
115 | return -1; | 115 | return -1; |
116 | score++; | 116 | score++; |
117 | } | 117 | } |
118 | if (sk->sk_bound_dev_if) { | 118 | if (sk->sk_bound_dev_if) { |
119 | if (sk->sk_bound_dev_if != dif) | 119 | if (sk->sk_bound_dev_if != dif) |
120 | return -1; | 120 | return -1; |
121 | score++; | 121 | score++; |
122 | } | 122 | } |
123 | } | 123 | } |
124 | return score; | 124 | return score; |
125 | } | 125 | } |
126 | 126 | ||
127 | static struct sock *__udp6_lib_lookup(struct net *net, | 127 | static struct sock *__udp6_lib_lookup(struct net *net, |
128 | struct in6_addr *saddr, __be16 sport, | 128 | struct in6_addr *saddr, __be16 sport, |
129 | struct in6_addr *daddr, __be16 dport, | 129 | struct in6_addr *daddr, __be16 dport, |
130 | int dif, struct udp_table *udptable) | 130 | int dif, struct udp_table *udptable) |
131 | { | 131 | { |
132 | struct sock *sk, *result; | 132 | struct sock *sk, *result; |
133 | struct hlist_nulls_node *node; | 133 | struct hlist_nulls_node *node; |
134 | unsigned short hnum = ntohs(dport); | 134 | unsigned short hnum = ntohs(dport); |
135 | unsigned int hash = udp_hashfn(net, hnum, udptable->mask); | 135 | unsigned int hash = udp_hashfn(net, hnum, udptable->mask); |
136 | struct udp_hslot *hslot = &udptable->hash[hash]; | 136 | struct udp_hslot *hslot = &udptable->hash[hash]; |
137 | int score, badness; | 137 | int score, badness; |
138 | 138 | ||
139 | rcu_read_lock(); | 139 | rcu_read_lock(); |
140 | begin: | 140 | begin: |
141 | result = NULL; | 141 | result = NULL; |
142 | badness = -1; | 142 | badness = -1; |
143 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { | 143 | sk_nulls_for_each_rcu(sk, node, &hslot->head) { |
144 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); | 144 | score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); |
145 | if (score > badness) { | 145 | if (score > badness) { |
146 | result = sk; | 146 | result = sk; |
147 | badness = score; | 147 | badness = score; |
148 | } | 148 | } |
149 | } | 149 | } |
150 | /* | 150 | /* |
151 | * if the nulls value we got at the end of this lookup is | 151 | * if the nulls value we got at the end of this lookup is |
152 | * not the expected one, we must restart lookup. | 152 | * not the expected one, we must restart lookup. |
153 | * We probably met an item that was moved to another chain. | 153 | * We probably met an item that was moved to another chain. |
154 | */ | 154 | */ |
155 | if (get_nulls_value(node) != hash) | 155 | if (get_nulls_value(node) != hash) |
156 | goto begin; | 156 | goto begin; |
157 | 157 | ||
158 | if (result) { | 158 | if (result) { |
159 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) | 159 | if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) |
160 | result = NULL; | 160 | result = NULL; |
161 | else if (unlikely(compute_score(result, net, hnum, saddr, sport, | 161 | else if (unlikely(compute_score(result, net, hnum, saddr, sport, |
162 | daddr, dport, dif) < badness)) { | 162 | daddr, dport, dif) < badness)) { |
163 | sock_put(result); | 163 | sock_put(result); |
164 | goto begin; | 164 | goto begin; |
165 | } | 165 | } |
166 | } | 166 | } |
167 | rcu_read_unlock(); | 167 | rcu_read_unlock(); |
168 | return result; | 168 | return result; |
169 | } | 169 | } |
170 | 170 | ||
171 | static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, | 171 | static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, |
172 | __be16 sport, __be16 dport, | 172 | __be16 sport, __be16 dport, |
173 | struct udp_table *udptable) | 173 | struct udp_table *udptable) |
174 | { | 174 | { |
175 | struct sock *sk; | 175 | struct sock *sk; |
176 | struct ipv6hdr *iph = ipv6_hdr(skb); | 176 | struct ipv6hdr *iph = ipv6_hdr(skb); |
177 | 177 | ||
178 | if (unlikely(sk = skb_steal_sock(skb))) | 178 | if (unlikely(sk = skb_steal_sock(skb))) |
179 | return sk; | 179 | return sk; |
180 | return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, | 180 | return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, |
181 | &iph->daddr, dport, inet6_iif(skb), | 181 | &iph->daddr, dport, inet6_iif(skb), |
182 | udptable); | 182 | udptable); |
183 | } | 183 | } |
184 | 184 | ||
185 | /* | 185 | /* |
186 | * This should be easy, if there is something there we | 186 | * This should be easy, if there is something there we |
187 | * return it, otherwise we block. | 187 | * return it, otherwise we block. |
188 | */ | 188 | */ |
189 | 189 | ||
190 | int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, | 190 | int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, |
191 | struct msghdr *msg, size_t len, | 191 | struct msghdr *msg, size_t len, |
192 | int noblock, int flags, int *addr_len) | 192 | int noblock, int flags, int *addr_len) |
193 | { | 193 | { |
194 | struct ipv6_pinfo *np = inet6_sk(sk); | 194 | struct ipv6_pinfo *np = inet6_sk(sk); |
195 | struct inet_sock *inet = inet_sk(sk); | 195 | struct inet_sock *inet = inet_sk(sk); |
196 | struct sk_buff *skb; | 196 | struct sk_buff *skb; |
197 | unsigned int ulen, copied; | 197 | unsigned int ulen, copied; |
198 | int peeked; | 198 | int peeked; |
199 | int err; | 199 | int err; |
200 | int is_udplite = IS_UDPLITE(sk); | 200 | int is_udplite = IS_UDPLITE(sk); |
201 | int is_udp4; | 201 | int is_udp4; |
202 | 202 | ||
203 | if (addr_len) | 203 | if (addr_len) |
204 | *addr_len=sizeof(struct sockaddr_in6); | 204 | *addr_len=sizeof(struct sockaddr_in6); |
205 | 205 | ||
206 | if (flags & MSG_ERRQUEUE) | 206 | if (flags & MSG_ERRQUEUE) |
207 | return ipv6_recv_error(sk, msg, len); | 207 | return ipv6_recv_error(sk, msg, len); |
208 | 208 | ||
209 | try_again: | 209 | try_again: |
210 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), | 210 | skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), |
211 | &peeked, &err); | 211 | &peeked, &err); |
212 | if (!skb) | 212 | if (!skb) |
213 | goto out; | 213 | goto out; |
214 | 214 | ||
215 | ulen = skb->len - sizeof(struct udphdr); | 215 | ulen = skb->len - sizeof(struct udphdr); |
216 | copied = len; | 216 | copied = len; |
217 | if (copied > ulen) | 217 | if (copied > ulen) |
218 | copied = ulen; | 218 | copied = ulen; |
219 | else if (copied < ulen) | 219 | else if (copied < ulen) |
220 | msg->msg_flags |= MSG_TRUNC; | 220 | msg->msg_flags |= MSG_TRUNC; |
221 | 221 | ||
222 | is_udp4 = (skb->protocol == htons(ETH_P_IP)); | 222 | is_udp4 = (skb->protocol == htons(ETH_P_IP)); |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * If checksum is needed at all, try to do it while copying the | 225 | * If checksum is needed at all, try to do it while copying the |
226 | * data. If the data is truncated, or if we only want a partial | 226 | * data. If the data is truncated, or if we only want a partial |
227 | * coverage checksum (UDP-Lite), do it before the copy. | 227 | * coverage checksum (UDP-Lite), do it before the copy. |
228 | */ | 228 | */ |
229 | 229 | ||
230 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { | 230 | if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { |
231 | if (udp_lib_checksum_complete(skb)) | 231 | if (udp_lib_checksum_complete(skb)) |
232 | goto csum_copy_err; | 232 | goto csum_copy_err; |
233 | } | 233 | } |
234 | 234 | ||
235 | if (skb_csum_unnecessary(skb)) | 235 | if (skb_csum_unnecessary(skb)) |
236 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 236 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
237 | msg->msg_iov, copied ); | 237 | msg->msg_iov, copied ); |
238 | else { | 238 | else { |
239 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | 239 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); |
240 | if (err == -EINVAL) | 240 | if (err == -EINVAL) |
241 | goto csum_copy_err; | 241 | goto csum_copy_err; |
242 | } | 242 | } |
243 | if (err) | 243 | if (err) |
244 | goto out_free; | 244 | goto out_free; |
245 | 245 | ||
246 | if (!peeked) { | 246 | if (!peeked) { |
247 | if (is_udp4) | 247 | if (is_udp4) |
248 | UDP_INC_STATS_USER(sock_net(sk), | 248 | UDP_INC_STATS_USER(sock_net(sk), |
249 | UDP_MIB_INDATAGRAMS, is_udplite); | 249 | UDP_MIB_INDATAGRAMS, is_udplite); |
250 | else | 250 | else |
251 | UDP6_INC_STATS_USER(sock_net(sk), | 251 | UDP6_INC_STATS_USER(sock_net(sk), |
252 | UDP_MIB_INDATAGRAMS, is_udplite); | 252 | UDP_MIB_INDATAGRAMS, is_udplite); |
253 | } | 253 | } |
254 | 254 | ||
255 | sock_recv_ts_and_drops(msg, sk, skb); | 255 | sock_recv_ts_and_drops(msg, sk, skb); |
256 | 256 | ||
257 | /* Copy the address. */ | 257 | /* Copy the address. */ |
258 | if (msg->msg_name) { | 258 | if (msg->msg_name) { |
259 | struct sockaddr_in6 *sin6; | 259 | struct sockaddr_in6 *sin6; |
260 | 260 | ||
261 | sin6 = (struct sockaddr_in6 *) msg->msg_name; | 261 | sin6 = (struct sockaddr_in6 *) msg->msg_name; |
262 | sin6->sin6_family = AF_INET6; | 262 | sin6->sin6_family = AF_INET6; |
263 | sin6->sin6_port = udp_hdr(skb)->source; | 263 | sin6->sin6_port = udp_hdr(skb)->source; |
264 | sin6->sin6_flowinfo = 0; | 264 | sin6->sin6_flowinfo = 0; |
265 | sin6->sin6_scope_id = 0; | 265 | sin6->sin6_scope_id = 0; |
266 | 266 | ||
267 | if (is_udp4) | 267 | if (is_udp4) |
268 | ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, | 268 | ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, |
269 | &sin6->sin6_addr); | 269 | &sin6->sin6_addr); |
270 | else { | 270 | else { |
271 | ipv6_addr_copy(&sin6->sin6_addr, | 271 | ipv6_addr_copy(&sin6->sin6_addr, |
272 | &ipv6_hdr(skb)->saddr); | 272 | &ipv6_hdr(skb)->saddr); |
273 | if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) | 273 | if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) |
274 | sin6->sin6_scope_id = IP6CB(skb)->iif; | 274 | sin6->sin6_scope_id = IP6CB(skb)->iif; |
275 | } | 275 | } |
276 | 276 | ||
277 | } | 277 | } |
278 | if (is_udp4) { | 278 | if (is_udp4) { |
279 | if (inet->cmsg_flags) | 279 | if (inet->cmsg_flags) |
280 | ip_cmsg_recv(msg, skb); | 280 | ip_cmsg_recv(msg, skb); |
281 | } else { | 281 | } else { |
282 | if (np->rxopt.all) | 282 | if (np->rxopt.all) |
283 | datagram_recv_ctl(sk, msg, skb); | 283 | datagram_recv_ctl(sk, msg, skb); |
284 | } | 284 | } |
285 | 285 | ||
286 | err = copied; | 286 | err = copied; |
287 | if (flags & MSG_TRUNC) | 287 | if (flags & MSG_TRUNC) |
288 | err = ulen; | 288 | err = ulen; |
289 | 289 | ||
290 | out_free: | 290 | out_free: |
291 | lock_sock(sk); | 291 | lock_sock(sk); |
292 | skb_free_datagram(sk, skb); | 292 | skb_free_datagram(sk, skb); |
293 | release_sock(sk); | 293 | release_sock(sk); |
294 | out: | 294 | out: |
295 | return err; | 295 | return err; |
296 | 296 | ||
297 | csum_copy_err: | 297 | csum_copy_err: |
298 | lock_sock(sk); | 298 | lock_sock(sk); |
299 | if (!skb_kill_datagram(sk, skb, flags)) { | 299 | if (!skb_kill_datagram(sk, skb, flags)) { |
300 | if (is_udp4) | 300 | if (is_udp4) |
301 | UDP_INC_STATS_USER(sock_net(sk), | 301 | UDP_INC_STATS_USER(sock_net(sk), |
302 | UDP_MIB_INERRORS, is_udplite); | 302 | UDP_MIB_INERRORS, is_udplite); |
303 | else | 303 | else |
304 | UDP6_INC_STATS_USER(sock_net(sk), | 304 | UDP6_INC_STATS_USER(sock_net(sk), |
305 | UDP_MIB_INERRORS, is_udplite); | 305 | UDP_MIB_INERRORS, is_udplite); |
306 | } | 306 | } |
307 | release_sock(sk); | 307 | release_sock(sk); |
308 | 308 | ||
309 | if (flags & MSG_DONTWAIT) | 309 | if (flags & MSG_DONTWAIT) |
310 | return -EAGAIN; | 310 | return -EAGAIN; |
311 | goto try_again; | 311 | goto try_again; |
312 | } | 312 | } |
313 | 313 | ||
314 | void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 314 | void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
315 | u8 type, u8 code, int offset, __be32 info, | 315 | u8 type, u8 code, int offset, __be32 info, |
316 | struct udp_table *udptable) | 316 | struct udp_table *udptable) |
317 | { | 317 | { |
318 | struct ipv6_pinfo *np; | 318 | struct ipv6_pinfo *np; |
319 | struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; | 319 | struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; |
320 | struct in6_addr *saddr = &hdr->saddr; | 320 | struct in6_addr *saddr = &hdr->saddr; |
321 | struct in6_addr *daddr = &hdr->daddr; | 321 | struct in6_addr *daddr = &hdr->daddr; |
322 | struct udphdr *uh = (struct udphdr*)(skb->data+offset); | 322 | struct udphdr *uh = (struct udphdr*)(skb->data+offset); |
323 | struct sock *sk; | 323 | struct sock *sk; |
324 | int err; | 324 | int err; |
325 | 325 | ||
326 | sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, | 326 | sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, |
327 | saddr, uh->source, inet6_iif(skb), udptable); | 327 | saddr, uh->source, inet6_iif(skb), udptable); |
328 | if (sk == NULL) | 328 | if (sk == NULL) |
329 | return; | 329 | return; |
330 | 330 | ||
331 | np = inet6_sk(sk); | 331 | np = inet6_sk(sk); |
332 | 332 | ||
333 | if (!icmpv6_err_convert(type, code, &err) && !np->recverr) | 333 | if (!icmpv6_err_convert(type, code, &err) && !np->recverr) |
334 | goto out; | 334 | goto out; |
335 | 335 | ||
336 | if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) | 336 | if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) |
337 | goto out; | 337 | goto out; |
338 | 338 | ||
339 | if (np->recverr) | 339 | if (np->recverr) |
340 | ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); | 340 | ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); |
341 | 341 | ||
342 | sk->sk_err = err; | 342 | sk->sk_err = err; |
343 | sk->sk_error_report(sk); | 343 | sk->sk_error_report(sk); |
344 | out: | 344 | out: |
345 | sock_put(sk); | 345 | sock_put(sk); |
346 | } | 346 | } |
347 | 347 | ||
348 | static __inline__ void udpv6_err(struct sk_buff *skb, | 348 | static __inline__ void udpv6_err(struct sk_buff *skb, |
349 | struct inet6_skb_parm *opt, u8 type, | 349 | struct inet6_skb_parm *opt, u8 type, |
350 | u8 code, int offset, __be32 info ) | 350 | u8 code, int offset, __be32 info ) |
351 | { | 351 | { |
352 | __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); | 352 | __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); |
353 | } | 353 | } |
354 | 354 | ||
355 | int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 355 | int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) |
356 | { | 356 | { |
357 | struct udp_sock *up = udp_sk(sk); | 357 | struct udp_sock *up = udp_sk(sk); |
358 | int rc; | 358 | int rc; |
359 | int is_udplite = IS_UDPLITE(sk); | 359 | int is_udplite = IS_UDPLITE(sk); |
360 | 360 | ||
361 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) | 361 | if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) |
362 | goto drop; | 362 | goto drop; |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). | 365 | * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). |
366 | */ | 366 | */ |
367 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { | 367 | if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { |
368 | 368 | ||
369 | if (up->pcrlen == 0) { /* full coverage was set */ | 369 | if (up->pcrlen == 0) { /* full coverage was set */ |
370 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" | 370 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" |
371 | " %d while full coverage %d requested\n", | 371 | " %d while full coverage %d requested\n", |
372 | UDP_SKB_CB(skb)->cscov, skb->len); | 372 | UDP_SKB_CB(skb)->cscov, skb->len); |
373 | goto drop; | 373 | goto drop; |
374 | } | 374 | } |
375 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { | 375 | if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { |
376 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " | 376 | LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " |
377 | "too small, need min %d\n", | 377 | "too small, need min %d\n", |
378 | UDP_SKB_CB(skb)->cscov, up->pcrlen); | 378 | UDP_SKB_CB(skb)->cscov, up->pcrlen); |
379 | goto drop; | 379 | goto drop; |
380 | } | 380 | } |
381 | } | 381 | } |
382 | 382 | ||
383 | if (sk->sk_filter) { | 383 | if (sk->sk_filter) { |
384 | if (udp_lib_checksum_complete(skb)) | 384 | if (udp_lib_checksum_complete(skb)) |
385 | goto drop; | 385 | goto drop; |
386 | } | 386 | } |
387 | 387 | ||
388 | if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { | 388 | if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { |
389 | /* Note that an ENOMEM error is charged twice */ | 389 | /* Note that an ENOMEM error is charged twice */ |
390 | if (rc == -ENOMEM) | 390 | if (rc == -ENOMEM) |
391 | UDP6_INC_STATS_BH(sock_net(sk), | 391 | UDP6_INC_STATS_BH(sock_net(sk), |
392 | UDP_MIB_RCVBUFERRORS, is_udplite); | 392 | UDP_MIB_RCVBUFERRORS, is_udplite); |
393 | goto drop; | 393 | goto drop_no_sk_drops_inc; |
394 | } | 394 | } |
395 | 395 | ||
396 | return 0; | 396 | return 0; |
397 | drop: | 397 | drop: |
398 | atomic_inc(&sk->sk_drops); | ||
399 | drop_no_sk_drops_inc: | ||
398 | UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 400 | UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
399 | kfree_skb(skb); | 401 | kfree_skb(skb); |
400 | return -1; | 402 | return -1; |
401 | } | 403 | } |
402 | 404 | ||
403 | static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, | 405 | static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, |
404 | __be16 loc_port, struct in6_addr *loc_addr, | 406 | __be16 loc_port, struct in6_addr *loc_addr, |
405 | __be16 rmt_port, struct in6_addr *rmt_addr, | 407 | __be16 rmt_port, struct in6_addr *rmt_addr, |
406 | int dif) | 408 | int dif) |
407 | { | 409 | { |
408 | struct hlist_nulls_node *node; | 410 | struct hlist_nulls_node *node; |
409 | struct sock *s = sk; | 411 | struct sock *s = sk; |
410 | unsigned short num = ntohs(loc_port); | 412 | unsigned short num = ntohs(loc_port); |
411 | 413 | ||
412 | sk_nulls_for_each_from(s, node) { | 414 | sk_nulls_for_each_from(s, node) { |
413 | struct inet_sock *inet = inet_sk(s); | 415 | struct inet_sock *inet = inet_sk(s); |
414 | 416 | ||
415 | if (!net_eq(sock_net(s), net)) | 417 | if (!net_eq(sock_net(s), net)) |
416 | continue; | 418 | continue; |
417 | 419 | ||
418 | if (s->sk_hash == num && s->sk_family == PF_INET6) { | 420 | if (s->sk_hash == num && s->sk_family == PF_INET6) { |
419 | struct ipv6_pinfo *np = inet6_sk(s); | 421 | struct ipv6_pinfo *np = inet6_sk(s); |
420 | if (inet->inet_dport) { | 422 | if (inet->inet_dport) { |
421 | if (inet->inet_dport != rmt_port) | 423 | if (inet->inet_dport != rmt_port) |
422 | continue; | 424 | continue; |
423 | } | 425 | } |
424 | if (!ipv6_addr_any(&np->daddr) && | 426 | if (!ipv6_addr_any(&np->daddr) && |
425 | !ipv6_addr_equal(&np->daddr, rmt_addr)) | 427 | !ipv6_addr_equal(&np->daddr, rmt_addr)) |
426 | continue; | 428 | continue; |
427 | 429 | ||
428 | if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) | 430 | if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) |
429 | continue; | 431 | continue; |
430 | 432 | ||
431 | if (!ipv6_addr_any(&np->rcv_saddr)) { | 433 | if (!ipv6_addr_any(&np->rcv_saddr)) { |
432 | if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) | 434 | if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) |
433 | continue; | 435 | continue; |
434 | } | 436 | } |
435 | if (!inet6_mc_check(s, loc_addr, rmt_addr)) | 437 | if (!inet6_mc_check(s, loc_addr, rmt_addr)) |
436 | continue; | 438 | continue; |
437 | return s; | 439 | return s; |
438 | } | 440 | } |
439 | } | 441 | } |
440 | return NULL; | 442 | return NULL; |
441 | } | 443 | } |
442 | 444 | ||
443 | /* | 445 | /* |
444 | * Note: called only from the BH handler context, | 446 | * Note: called only from the BH handler context, |
445 | * so we don't need to lock the hashes. | 447 | * so we don't need to lock the hashes. |
446 | */ | 448 | */ |
447 | static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | 449 | static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, |
448 | struct in6_addr *saddr, struct in6_addr *daddr, | 450 | struct in6_addr *saddr, struct in6_addr *daddr, |
449 | struct udp_table *udptable) | 451 | struct udp_table *udptable) |
450 | { | 452 | { |
451 | struct sock *sk, *sk2; | 453 | struct sock *sk, *sk2; |
452 | const struct udphdr *uh = udp_hdr(skb); | 454 | const struct udphdr *uh = udp_hdr(skb); |
453 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); | 455 | struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); |
454 | int dif; | 456 | int dif; |
455 | 457 | ||
456 | spin_lock(&hslot->lock); | 458 | spin_lock(&hslot->lock); |
457 | sk = sk_nulls_head(&hslot->head); | 459 | sk = sk_nulls_head(&hslot->head); |
458 | dif = inet6_iif(skb); | 460 | dif = inet6_iif(skb); |
459 | sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); | 461 | sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); |
460 | if (!sk) { | 462 | if (!sk) { |
461 | kfree_skb(skb); | 463 | kfree_skb(skb); |
462 | goto out; | 464 | goto out; |
463 | } | 465 | } |
464 | 466 | ||
465 | sk2 = sk; | 467 | sk2 = sk; |
466 | while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, | 468 | while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, |
467 | uh->source, saddr, dif))) { | 469 | uh->source, saddr, dif))) { |
468 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); | 470 | struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); |
469 | if (buff) { | 471 | if (buff) { |
470 | bh_lock_sock(sk2); | 472 | bh_lock_sock(sk2); |
471 | if (!sock_owned_by_user(sk2)) | 473 | if (!sock_owned_by_user(sk2)) |
472 | udpv6_queue_rcv_skb(sk2, buff); | 474 | udpv6_queue_rcv_skb(sk2, buff); |
473 | else | 475 | else |
474 | sk_add_backlog(sk2, buff); | 476 | sk_add_backlog(sk2, buff); |
475 | bh_unlock_sock(sk2); | 477 | bh_unlock_sock(sk2); |
476 | } | 478 | } |
477 | } | 479 | } |
478 | bh_lock_sock(sk); | 480 | bh_lock_sock(sk); |
479 | if (!sock_owned_by_user(sk)) | 481 | if (!sock_owned_by_user(sk)) |
480 | udpv6_queue_rcv_skb(sk, skb); | 482 | udpv6_queue_rcv_skb(sk, skb); |
481 | else | 483 | else |
482 | sk_add_backlog(sk, skb); | 484 | sk_add_backlog(sk, skb); |
483 | bh_unlock_sock(sk); | 485 | bh_unlock_sock(sk); |
484 | out: | 486 | out: |
485 | spin_unlock(&hslot->lock); | 487 | spin_unlock(&hslot->lock); |
486 | return 0; | 488 | return 0; |
487 | } | 489 | } |
488 | 490 | ||
489 | static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, | 491 | static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, |
490 | int proto) | 492 | int proto) |
491 | { | 493 | { |
492 | int err; | 494 | int err; |
493 | 495 | ||
494 | UDP_SKB_CB(skb)->partial_cov = 0; | 496 | UDP_SKB_CB(skb)->partial_cov = 0; |
495 | UDP_SKB_CB(skb)->cscov = skb->len; | 497 | UDP_SKB_CB(skb)->cscov = skb->len; |
496 | 498 | ||
497 | if (proto == IPPROTO_UDPLITE) { | 499 | if (proto == IPPROTO_UDPLITE) { |
498 | err = udplite_checksum_init(skb, uh); | 500 | err = udplite_checksum_init(skb, uh); |
499 | if (err) | 501 | if (err) |
500 | return err; | 502 | return err; |
501 | } | 503 | } |
502 | 504 | ||
503 | if (uh->check == 0) { | 505 | if (uh->check == 0) { |
504 | /* RFC 2460 section 8.1 says that we SHOULD log | 506 | /* RFC 2460 section 8.1 says that we SHOULD log |
505 | this error. Well, it is reasonable. | 507 | this error. Well, it is reasonable. |
506 | */ | 508 | */ |
507 | LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); | 509 | LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); |
508 | return 1; | 510 | return 1; |
509 | } | 511 | } |
510 | if (skb->ip_summed == CHECKSUM_COMPLETE && | 512 | if (skb->ip_summed == CHECKSUM_COMPLETE && |
511 | !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, | 513 | !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, |
512 | skb->len, proto, skb->csum)) | 514 | skb->len, proto, skb->csum)) |
513 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 515 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
514 | 516 | ||
515 | if (!skb_csum_unnecessary(skb)) | 517 | if (!skb_csum_unnecessary(skb)) |
516 | skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, | 518 | skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, |
517 | &ipv6_hdr(skb)->daddr, | 519 | &ipv6_hdr(skb)->daddr, |
518 | skb->len, proto, 0)); | 520 | skb->len, proto, 0)); |
519 | 521 | ||
520 | return 0; | 522 | return 0; |
521 | } | 523 | } |
522 | 524 | ||
523 | int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, | 525 | int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, |
524 | int proto) | 526 | int proto) |
525 | { | 527 | { |
526 | struct sock *sk; | 528 | struct sock *sk; |
527 | struct udphdr *uh; | 529 | struct udphdr *uh; |
528 | struct net_device *dev = skb->dev; | 530 | struct net_device *dev = skb->dev; |
529 | struct in6_addr *saddr, *daddr; | 531 | struct in6_addr *saddr, *daddr; |
530 | u32 ulen = 0; | 532 | u32 ulen = 0; |
531 | struct net *net = dev_net(skb->dev); | 533 | struct net *net = dev_net(skb->dev); |
532 | 534 | ||
533 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) | 535 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
534 | goto short_packet; | 536 | goto short_packet; |
535 | 537 | ||
536 | saddr = &ipv6_hdr(skb)->saddr; | 538 | saddr = &ipv6_hdr(skb)->saddr; |
537 | daddr = &ipv6_hdr(skb)->daddr; | 539 | daddr = &ipv6_hdr(skb)->daddr; |
538 | uh = udp_hdr(skb); | 540 | uh = udp_hdr(skb); |
539 | 541 | ||
540 | ulen = ntohs(uh->len); | 542 | ulen = ntohs(uh->len); |
541 | if (ulen > skb->len) | 543 | if (ulen > skb->len) |
542 | goto short_packet; | 544 | goto short_packet; |
543 | 545 | ||
544 | if (proto == IPPROTO_UDP) { | 546 | if (proto == IPPROTO_UDP) { |
545 | /* UDP validates ulen. */ | 547 | /* UDP validates ulen. */ |
546 | 548 | ||
547 | /* Check for jumbo payload */ | 549 | /* Check for jumbo payload */ |
548 | if (ulen == 0) | 550 | if (ulen == 0) |
549 | ulen = skb->len; | 551 | ulen = skb->len; |
550 | 552 | ||
551 | if (ulen < sizeof(*uh)) | 553 | if (ulen < sizeof(*uh)) |
552 | goto short_packet; | 554 | goto short_packet; |
553 | 555 | ||
554 | if (ulen < skb->len) { | 556 | if (ulen < skb->len) { |
555 | if (pskb_trim_rcsum(skb, ulen)) | 557 | if (pskb_trim_rcsum(skb, ulen)) |
556 | goto short_packet; | 558 | goto short_packet; |
557 | saddr = &ipv6_hdr(skb)->saddr; | 559 | saddr = &ipv6_hdr(skb)->saddr; |
558 | daddr = &ipv6_hdr(skb)->daddr; | 560 | daddr = &ipv6_hdr(skb)->daddr; |
559 | uh = udp_hdr(skb); | 561 | uh = udp_hdr(skb); |
560 | } | 562 | } |
561 | } | 563 | } |
562 | 564 | ||
563 | if (udp6_csum_init(skb, uh, proto)) | 565 | if (udp6_csum_init(skb, uh, proto)) |
564 | goto discard; | 566 | goto discard; |
565 | 567 | ||
566 | /* | 568 | /* |
567 | * Multicast receive code | 569 | * Multicast receive code |
568 | */ | 570 | */ |
569 | if (ipv6_addr_is_multicast(daddr)) | 571 | if (ipv6_addr_is_multicast(daddr)) |
570 | return __udp6_lib_mcast_deliver(net, skb, | 572 | return __udp6_lib_mcast_deliver(net, skb, |
571 | saddr, daddr, udptable); | 573 | saddr, daddr, udptable); |
572 | 574 | ||
573 | /* Unicast */ | 575 | /* Unicast */ |
574 | 576 | ||
575 | /* | 577 | /* |
576 | * check socket cache ... must talk to Alan about his plans | 578 | * check socket cache ... must talk to Alan about his plans |
577 | * for sock caches... i'll skip this for now. | 579 | * for sock caches... i'll skip this for now. |
578 | */ | 580 | */ |
579 | sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); | 581 | sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); |
580 | 582 | ||
581 | if (sk == NULL) { | 583 | if (sk == NULL) { |
582 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) | 584 | if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) |
583 | goto discard; | 585 | goto discard; |
584 | 586 | ||
585 | if (udp_lib_checksum_complete(skb)) | 587 | if (udp_lib_checksum_complete(skb)) |
586 | goto discard; | 588 | goto discard; |
587 | UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, | 589 | UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, |
588 | proto == IPPROTO_UDPLITE); | 590 | proto == IPPROTO_UDPLITE); |
589 | 591 | ||
590 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); | 592 | icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); |
591 | 593 | ||
592 | kfree_skb(skb); | 594 | kfree_skb(skb); |
593 | return 0; | 595 | return 0; |
594 | } | 596 | } |
595 | 597 | ||
596 | /* deliver */ | 598 | /* deliver */ |
597 | 599 | ||
598 | bh_lock_sock(sk); | 600 | bh_lock_sock(sk); |
599 | if (!sock_owned_by_user(sk)) | 601 | if (!sock_owned_by_user(sk)) |
600 | udpv6_queue_rcv_skb(sk, skb); | 602 | udpv6_queue_rcv_skb(sk, skb); |
601 | else | 603 | else |
602 | sk_add_backlog(sk, skb); | 604 | sk_add_backlog(sk, skb); |
603 | bh_unlock_sock(sk); | 605 | bh_unlock_sock(sk); |
604 | sock_put(sk); | 606 | sock_put(sk); |
605 | return 0; | 607 | return 0; |
606 | 608 | ||
607 | short_packet: | 609 | short_packet: |
608 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", | 610 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", |
609 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 611 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
610 | ulen, skb->len); | 612 | ulen, skb->len); |
611 | 613 | ||
612 | discard: | 614 | discard: |
613 | UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); | 615 | UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); |
614 | kfree_skb(skb); | 616 | kfree_skb(skb); |
615 | return 0; | 617 | return 0; |
616 | } | 618 | } |
617 | 619 | ||
618 | static __inline__ int udpv6_rcv(struct sk_buff *skb) | 620 | static __inline__ int udpv6_rcv(struct sk_buff *skb) |
619 | { | 621 | { |
620 | return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); | 622 | return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); |
621 | } | 623 | } |
622 | 624 | ||
623 | /* | 625 | /* |
624 | * Throw away all pending data and cancel the corking. Socket is locked. | 626 | * Throw away all pending data and cancel the corking. Socket is locked. |
625 | */ | 627 | */ |
626 | static void udp_v6_flush_pending_frames(struct sock *sk) | 628 | static void udp_v6_flush_pending_frames(struct sock *sk) |
627 | { | 629 | { |
628 | struct udp_sock *up = udp_sk(sk); | 630 | struct udp_sock *up = udp_sk(sk); |
629 | 631 | ||
630 | if (up->pending == AF_INET) | 632 | if (up->pending == AF_INET) |
631 | udp_flush_pending_frames(sk); | 633 | udp_flush_pending_frames(sk); |
632 | else if (up->pending) { | 634 | else if (up->pending) { |
633 | up->len = 0; | 635 | up->len = 0; |
634 | up->pending = 0; | 636 | up->pending = 0; |
635 | ip6_flush_pending_frames(sk); | 637 | ip6_flush_pending_frames(sk); |
636 | } | 638 | } |
637 | } | 639 | } |
638 | 640 | ||
639 | /** | 641 | /** |
640 | * udp6_hwcsum_outgoing - handle outgoing HW checksumming | 642 | * udp6_hwcsum_outgoing - handle outgoing HW checksumming |
641 | * @sk: socket we are sending on | 643 | * @sk: socket we are sending on |
642 | * @skb: sk_buff containing the filled-in UDP header | 644 | * @skb: sk_buff containing the filled-in UDP header |
643 | * (checksum field must be zeroed out) | 645 | * (checksum field must be zeroed out) |
644 | */ | 646 | */ |
645 | static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 647 | static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, |
646 | const struct in6_addr *saddr, | 648 | const struct in6_addr *saddr, |
647 | const struct in6_addr *daddr, int len) | 649 | const struct in6_addr *daddr, int len) |
648 | { | 650 | { |
649 | unsigned int offset; | 651 | unsigned int offset; |
650 | struct udphdr *uh = udp_hdr(skb); | 652 | struct udphdr *uh = udp_hdr(skb); |
651 | __wsum csum = 0; | 653 | __wsum csum = 0; |
652 | 654 | ||
653 | if (skb_queue_len(&sk->sk_write_queue) == 1) { | 655 | if (skb_queue_len(&sk->sk_write_queue) == 1) { |
654 | /* Only one fragment on the socket. */ | 656 | /* Only one fragment on the socket. */ |
655 | skb->csum_start = skb_transport_header(skb) - skb->head; | 657 | skb->csum_start = skb_transport_header(skb) - skb->head; |
656 | skb->csum_offset = offsetof(struct udphdr, check); | 658 | skb->csum_offset = offsetof(struct udphdr, check); |
657 | uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0); | 659 | uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0); |
658 | } else { | 660 | } else { |
659 | /* | 661 | /* |
660 | * HW-checksum won't work as there are two or more | 662 | * HW-checksum won't work as there are two or more |
661 | * fragments on the socket so that all csums of sk_buffs | 663 | * fragments on the socket so that all csums of sk_buffs |
662 | * should be together | 664 | * should be together |
663 | */ | 665 | */ |
664 | offset = skb_transport_offset(skb); | 666 | offset = skb_transport_offset(skb); |
665 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); | 667 | skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); |
666 | 668 | ||
667 | skb->ip_summed = CHECKSUM_NONE; | 669 | skb->ip_summed = CHECKSUM_NONE; |
668 | 670 | ||
669 | skb_queue_walk(&sk->sk_write_queue, skb) { | 671 | skb_queue_walk(&sk->sk_write_queue, skb) { |
670 | csum = csum_add(csum, skb->csum); | 672 | csum = csum_add(csum, skb->csum); |
671 | } | 673 | } |
672 | 674 | ||
673 | uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, | 675 | uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, |
674 | csum); | 676 | csum); |
675 | if (uh->check == 0) | 677 | if (uh->check == 0) |
676 | uh->check = CSUM_MANGLED_0; | 678 | uh->check = CSUM_MANGLED_0; |
677 | } | 679 | } |
678 | } | 680 | } |
679 | 681 | ||
680 | /* | 682 | /* |
681 | * Sending | 683 | * Sending |
682 | */ | 684 | */ |
683 | 685 | ||
684 | static int udp_v6_push_pending_frames(struct sock *sk) | 686 | static int udp_v6_push_pending_frames(struct sock *sk) |
685 | { | 687 | { |
686 | struct sk_buff *skb; | 688 | struct sk_buff *skb; |
687 | struct udphdr *uh; | 689 | struct udphdr *uh; |
688 | struct udp_sock *up = udp_sk(sk); | 690 | struct udp_sock *up = udp_sk(sk); |
689 | struct inet_sock *inet = inet_sk(sk); | 691 | struct inet_sock *inet = inet_sk(sk); |
690 | struct flowi *fl = &inet->cork.fl; | 692 | struct flowi *fl = &inet->cork.fl; |
691 | int err = 0; | 693 | int err = 0; |
692 | int is_udplite = IS_UDPLITE(sk); | 694 | int is_udplite = IS_UDPLITE(sk); |
693 | __wsum csum = 0; | 695 | __wsum csum = 0; |
694 | 696 | ||
695 | /* Grab the skbuff where UDP header space exists. */ | 697 | /* Grab the skbuff where UDP header space exists. */ |
696 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) | 698 | if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) |
697 | goto out; | 699 | goto out; |
698 | 700 | ||
699 | /* | 701 | /* |
700 | * Create a UDP header | 702 | * Create a UDP header |
701 | */ | 703 | */ |
702 | uh = udp_hdr(skb); | 704 | uh = udp_hdr(skb); |
703 | uh->source = fl->fl_ip_sport; | 705 | uh->source = fl->fl_ip_sport; |
704 | uh->dest = fl->fl_ip_dport; | 706 | uh->dest = fl->fl_ip_dport; |
705 | uh->len = htons(up->len); | 707 | uh->len = htons(up->len); |
706 | uh->check = 0; | 708 | uh->check = 0; |
707 | 709 | ||
708 | if (is_udplite) | 710 | if (is_udplite) |
709 | csum = udplite_csum_outgoing(sk, skb); | 711 | csum = udplite_csum_outgoing(sk, skb); |
710 | else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 712 | else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
711 | udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, | 713 | udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, |
712 | up->len); | 714 | up->len); |
713 | goto send; | 715 | goto send; |
714 | } else | 716 | } else |
715 | csum = udp_csum_outgoing(sk, skb); | 717 | csum = udp_csum_outgoing(sk, skb); |
716 | 718 | ||
717 | /* add protocol-dependent pseudo-header */ | 719 | /* add protocol-dependent pseudo-header */ |
718 | uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, | 720 | uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, |
719 | up->len, fl->proto, csum ); | 721 | up->len, fl->proto, csum ); |
720 | if (uh->check == 0) | 722 | if (uh->check == 0) |
721 | uh->check = CSUM_MANGLED_0; | 723 | uh->check = CSUM_MANGLED_0; |
722 | 724 | ||
723 | send: | 725 | send: |
724 | err = ip6_push_pending_frames(sk); | 726 | err = ip6_push_pending_frames(sk); |
725 | if (err) { | 727 | if (err) { |
726 | if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { | 728 | if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { |
727 | UDP6_INC_STATS_USER(sock_net(sk), | 729 | UDP6_INC_STATS_USER(sock_net(sk), |
728 | UDP_MIB_SNDBUFERRORS, is_udplite); | 730 | UDP_MIB_SNDBUFERRORS, is_udplite); |
729 | err = 0; | 731 | err = 0; |
730 | } | 732 | } |
731 | } else | 733 | } else |
732 | UDP6_INC_STATS_USER(sock_net(sk), | 734 | UDP6_INC_STATS_USER(sock_net(sk), |
733 | UDP_MIB_OUTDATAGRAMS, is_udplite); | 735 | UDP_MIB_OUTDATAGRAMS, is_udplite); |
734 | out: | 736 | out: |
735 | up->len = 0; | 737 | up->len = 0; |
736 | up->pending = 0; | 738 | up->pending = 0; |
737 | return err; | 739 | return err; |
738 | } | 740 | } |
739 | 741 | ||
740 | int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, | 742 | int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, |
741 | struct msghdr *msg, size_t len) | 743 | struct msghdr *msg, size_t len) |
742 | { | 744 | { |
743 | struct ipv6_txoptions opt_space; | 745 | struct ipv6_txoptions opt_space; |
744 | struct udp_sock *up = udp_sk(sk); | 746 | struct udp_sock *up = udp_sk(sk); |
745 | struct inet_sock *inet = inet_sk(sk); | 747 | struct inet_sock *inet = inet_sk(sk); |
746 | struct ipv6_pinfo *np = inet6_sk(sk); | 748 | struct ipv6_pinfo *np = inet6_sk(sk); |
747 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; | 749 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; |
748 | struct in6_addr *daddr, *final_p = NULL, final; | 750 | struct in6_addr *daddr, *final_p = NULL, final; |
749 | struct ipv6_txoptions *opt = NULL; | 751 | struct ipv6_txoptions *opt = NULL; |
750 | struct ip6_flowlabel *flowlabel = NULL; | 752 | struct ip6_flowlabel *flowlabel = NULL; |
751 | struct flowi fl; | 753 | struct flowi fl; |
752 | struct dst_entry *dst; | 754 | struct dst_entry *dst; |
753 | int addr_len = msg->msg_namelen; | 755 | int addr_len = msg->msg_namelen; |
754 | int ulen = len; | 756 | int ulen = len; |
755 | int hlimit = -1; | 757 | int hlimit = -1; |
756 | int tclass = -1; | 758 | int tclass = -1; |
757 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; | 759 | int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; |
758 | int err; | 760 | int err; |
759 | int connected = 0; | 761 | int connected = 0; |
760 | int is_udplite = IS_UDPLITE(sk); | 762 | int is_udplite = IS_UDPLITE(sk); |
761 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); | 763 | int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); |
762 | 764 | ||
763 | /* destination address check */ | 765 | /* destination address check */ |
764 | if (sin6) { | 766 | if (sin6) { |
765 | if (addr_len < offsetof(struct sockaddr, sa_data)) | 767 | if (addr_len < offsetof(struct sockaddr, sa_data)) |
766 | return -EINVAL; | 768 | return -EINVAL; |
767 | 769 | ||
768 | switch (sin6->sin6_family) { | 770 | switch (sin6->sin6_family) { |
769 | case AF_INET6: | 771 | case AF_INET6: |
770 | if (addr_len < SIN6_LEN_RFC2133) | 772 | if (addr_len < SIN6_LEN_RFC2133) |
771 | return -EINVAL; | 773 | return -EINVAL; |
772 | daddr = &sin6->sin6_addr; | 774 | daddr = &sin6->sin6_addr; |
773 | break; | 775 | break; |
774 | case AF_INET: | 776 | case AF_INET: |
775 | goto do_udp_sendmsg; | 777 | goto do_udp_sendmsg; |
776 | case AF_UNSPEC: | 778 | case AF_UNSPEC: |
777 | msg->msg_name = sin6 = NULL; | 779 | msg->msg_name = sin6 = NULL; |
778 | msg->msg_namelen = addr_len = 0; | 780 | msg->msg_namelen = addr_len = 0; |
779 | daddr = NULL; | 781 | daddr = NULL; |
780 | break; | 782 | break; |
781 | default: | 783 | default: |
782 | return -EINVAL; | 784 | return -EINVAL; |
783 | } | 785 | } |
784 | } else if (!up->pending) { | 786 | } else if (!up->pending) { |
785 | if (sk->sk_state != TCP_ESTABLISHED) | 787 | if (sk->sk_state != TCP_ESTABLISHED) |
786 | return -EDESTADDRREQ; | 788 | return -EDESTADDRREQ; |
787 | daddr = &np->daddr; | 789 | daddr = &np->daddr; |
788 | } else | 790 | } else |
789 | daddr = NULL; | 791 | daddr = NULL; |
790 | 792 | ||
791 | if (daddr) { | 793 | if (daddr) { |
792 | if (ipv6_addr_v4mapped(daddr)) { | 794 | if (ipv6_addr_v4mapped(daddr)) { |
793 | struct sockaddr_in sin; | 795 | struct sockaddr_in sin; |
794 | sin.sin_family = AF_INET; | 796 | sin.sin_family = AF_INET; |
795 | sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; | 797 | sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; |
796 | sin.sin_addr.s_addr = daddr->s6_addr32[3]; | 798 | sin.sin_addr.s_addr = daddr->s6_addr32[3]; |
797 | msg->msg_name = &sin; | 799 | msg->msg_name = &sin; |
798 | msg->msg_namelen = sizeof(sin); | 800 | msg->msg_namelen = sizeof(sin); |
799 | do_udp_sendmsg: | 801 | do_udp_sendmsg: |
800 | if (__ipv6_only_sock(sk)) | 802 | if (__ipv6_only_sock(sk)) |
801 | return -ENETUNREACH; | 803 | return -ENETUNREACH; |
802 | return udp_sendmsg(iocb, sk, msg, len); | 804 | return udp_sendmsg(iocb, sk, msg, len); |
803 | } | 805 | } |
804 | } | 806 | } |
805 | 807 | ||
806 | if (up->pending == AF_INET) | 808 | if (up->pending == AF_INET) |
807 | return udp_sendmsg(iocb, sk, msg, len); | 809 | return udp_sendmsg(iocb, sk, msg, len); |
808 | 810 | ||
809 | /* Rough check on arithmetic overflow, | 811 | /* Rough check on arithmetic overflow, |
810 | better check is made in ip6_append_data(). | 812 | better check is made in ip6_append_data(). |
811 | */ | 813 | */ |
812 | if (len > INT_MAX - sizeof(struct udphdr)) | 814 | if (len > INT_MAX - sizeof(struct udphdr)) |
813 | return -EMSGSIZE; | 815 | return -EMSGSIZE; |
814 | 816 | ||
815 | if (up->pending) { | 817 | if (up->pending) { |
816 | /* | 818 | /* |
817 | * There are pending frames. | 819 | * There are pending frames. |
818 | * The socket lock must be held while it's corked. | 820 | * The socket lock must be held while it's corked. |
819 | */ | 821 | */ |
820 | lock_sock(sk); | 822 | lock_sock(sk); |
821 | if (likely(up->pending)) { | 823 | if (likely(up->pending)) { |
822 | if (unlikely(up->pending != AF_INET6)) { | 824 | if (unlikely(up->pending != AF_INET6)) { |
823 | release_sock(sk); | 825 | release_sock(sk); |
824 | return -EAFNOSUPPORT; | 826 | return -EAFNOSUPPORT; |
825 | } | 827 | } |
826 | dst = NULL; | 828 | dst = NULL; |
827 | goto do_append_data; | 829 | goto do_append_data; |
828 | } | 830 | } |
829 | release_sock(sk); | 831 | release_sock(sk); |
830 | } | 832 | } |
831 | ulen += sizeof(struct udphdr); | 833 | ulen += sizeof(struct udphdr); |
832 | 834 | ||
833 | memset(&fl, 0, sizeof(fl)); | 835 | memset(&fl, 0, sizeof(fl)); |
834 | 836 | ||
835 | if (sin6) { | 837 | if (sin6) { |
836 | if (sin6->sin6_port == 0) | 838 | if (sin6->sin6_port == 0) |
837 | return -EINVAL; | 839 | return -EINVAL; |
838 | 840 | ||
839 | fl.fl_ip_dport = sin6->sin6_port; | 841 | fl.fl_ip_dport = sin6->sin6_port; |
840 | daddr = &sin6->sin6_addr; | 842 | daddr = &sin6->sin6_addr; |
841 | 843 | ||
842 | if (np->sndflow) { | 844 | if (np->sndflow) { |
843 | fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; | 845 | fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; |
844 | if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { | 846 | if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { |
845 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | 847 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); |
846 | if (flowlabel == NULL) | 848 | if (flowlabel == NULL) |
847 | return -EINVAL; | 849 | return -EINVAL; |
848 | daddr = &flowlabel->dst; | 850 | daddr = &flowlabel->dst; |
849 | } | 851 | } |
850 | } | 852 | } |
851 | 853 | ||
852 | /* | 854 | /* |
853 | * Otherwise it will be difficult to maintain | 855 | * Otherwise it will be difficult to maintain |
854 | * sk->sk_dst_cache. | 856 | * sk->sk_dst_cache. |
855 | */ | 857 | */ |
856 | if (sk->sk_state == TCP_ESTABLISHED && | 858 | if (sk->sk_state == TCP_ESTABLISHED && |
857 | ipv6_addr_equal(daddr, &np->daddr)) | 859 | ipv6_addr_equal(daddr, &np->daddr)) |
858 | daddr = &np->daddr; | 860 | daddr = &np->daddr; |
859 | 861 | ||
860 | if (addr_len >= sizeof(struct sockaddr_in6) && | 862 | if (addr_len >= sizeof(struct sockaddr_in6) && |
861 | sin6->sin6_scope_id && | 863 | sin6->sin6_scope_id && |
862 | ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) | 864 | ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) |
863 | fl.oif = sin6->sin6_scope_id; | 865 | fl.oif = sin6->sin6_scope_id; |
864 | } else { | 866 | } else { |
865 | if (sk->sk_state != TCP_ESTABLISHED) | 867 | if (sk->sk_state != TCP_ESTABLISHED) |
866 | return -EDESTADDRREQ; | 868 | return -EDESTADDRREQ; |
867 | 869 | ||
868 | fl.fl_ip_dport = inet->inet_dport; | 870 | fl.fl_ip_dport = inet->inet_dport; |
869 | daddr = &np->daddr; | 871 | daddr = &np->daddr; |
870 | fl.fl6_flowlabel = np->flow_label; | 872 | fl.fl6_flowlabel = np->flow_label; |
871 | connected = 1; | 873 | connected = 1; |
872 | } | 874 | } |
873 | 875 | ||
874 | if (!fl.oif) | 876 | if (!fl.oif) |
875 | fl.oif = sk->sk_bound_dev_if; | 877 | fl.oif = sk->sk_bound_dev_if; |
876 | 878 | ||
877 | if (!fl.oif) | 879 | if (!fl.oif) |
878 | fl.oif = np->sticky_pktinfo.ipi6_ifindex; | 880 | fl.oif = np->sticky_pktinfo.ipi6_ifindex; |
879 | 881 | ||
880 | fl.mark = sk->sk_mark; | 882 | fl.mark = sk->sk_mark; |
881 | 883 | ||
882 | if (msg->msg_controllen) { | 884 | if (msg->msg_controllen) { |
883 | opt = &opt_space; | 885 | opt = &opt_space; |
884 | memset(opt, 0, sizeof(struct ipv6_txoptions)); | 886 | memset(opt, 0, sizeof(struct ipv6_txoptions)); |
885 | opt->tot_len = sizeof(*opt); | 887 | opt->tot_len = sizeof(*opt); |
886 | 888 | ||
887 | err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); | 889 | err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); |
888 | if (err < 0) { | 890 | if (err < 0) { |
889 | fl6_sock_release(flowlabel); | 891 | fl6_sock_release(flowlabel); |
890 | return err; | 892 | return err; |
891 | } | 893 | } |
892 | if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { | 894 | if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { |
893 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); | 895 | flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); |
894 | if (flowlabel == NULL) | 896 | if (flowlabel == NULL) |
895 | return -EINVAL; | 897 | return -EINVAL; |
896 | } | 898 | } |
897 | if (!(opt->opt_nflen|opt->opt_flen)) | 899 | if (!(opt->opt_nflen|opt->opt_flen)) |
898 | opt = NULL; | 900 | opt = NULL; |
899 | connected = 0; | 901 | connected = 0; |
900 | } | 902 | } |
901 | if (opt == NULL) | 903 | if (opt == NULL) |
902 | opt = np->opt; | 904 | opt = np->opt; |
903 | if (flowlabel) | 905 | if (flowlabel) |
904 | opt = fl6_merge_options(&opt_space, flowlabel, opt); | 906 | opt = fl6_merge_options(&opt_space, flowlabel, opt); |
905 | opt = ipv6_fixup_options(&opt_space, opt); | 907 | opt = ipv6_fixup_options(&opt_space, opt); |
906 | 908 | ||
907 | fl.proto = sk->sk_protocol; | 909 | fl.proto = sk->sk_protocol; |
908 | if (!ipv6_addr_any(daddr)) | 910 | if (!ipv6_addr_any(daddr)) |
909 | ipv6_addr_copy(&fl.fl6_dst, daddr); | 911 | ipv6_addr_copy(&fl.fl6_dst, daddr); |
910 | else | 912 | else |
911 | fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ | 913 | fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ |
912 | if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) | 914 | if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) |
913 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); | 915 | ipv6_addr_copy(&fl.fl6_src, &np->saddr); |
914 | fl.fl_ip_sport = inet->inet_sport; | 916 | fl.fl_ip_sport = inet->inet_sport; |
915 | 917 | ||
916 | /* merge ip6_build_xmit from ip6_output */ | 918 | /* merge ip6_build_xmit from ip6_output */ |
917 | if (opt && opt->srcrt) { | 919 | if (opt && opt->srcrt) { |
918 | struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; | 920 | struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; |
919 | ipv6_addr_copy(&final, &fl.fl6_dst); | 921 | ipv6_addr_copy(&final, &fl.fl6_dst); |
920 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); | 922 | ipv6_addr_copy(&fl.fl6_dst, rt0->addr); |
921 | final_p = &final; | 923 | final_p = &final; |
922 | connected = 0; | 924 | connected = 0; |
923 | } | 925 | } |
924 | 926 | ||
925 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { | 927 | if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { |
926 | fl.oif = np->mcast_oif; | 928 | fl.oif = np->mcast_oif; |
927 | connected = 0; | 929 | connected = 0; |
928 | } | 930 | } |
929 | 931 | ||
930 | security_sk_classify_flow(sk, &fl); | 932 | security_sk_classify_flow(sk, &fl); |
931 | 933 | ||
932 | err = ip6_sk_dst_lookup(sk, &dst, &fl); | 934 | err = ip6_sk_dst_lookup(sk, &dst, &fl); |
933 | if (err) | 935 | if (err) |
934 | goto out; | 936 | goto out; |
935 | if (final_p) | 937 | if (final_p) |
936 | ipv6_addr_copy(&fl.fl6_dst, final_p); | 938 | ipv6_addr_copy(&fl.fl6_dst, final_p); |
937 | 939 | ||
938 | err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); | 940 | err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); |
939 | if (err < 0) { | 941 | if (err < 0) { |
940 | if (err == -EREMOTE) | 942 | if (err == -EREMOTE) |
941 | err = ip6_dst_blackhole(sk, &dst, &fl); | 943 | err = ip6_dst_blackhole(sk, &dst, &fl); |
942 | if (err < 0) | 944 | if (err < 0) |
943 | goto out; | 945 | goto out; |
944 | } | 946 | } |
945 | 947 | ||
946 | if (hlimit < 0) { | 948 | if (hlimit < 0) { |
947 | if (ipv6_addr_is_multicast(&fl.fl6_dst)) | 949 | if (ipv6_addr_is_multicast(&fl.fl6_dst)) |
948 | hlimit = np->mcast_hops; | 950 | hlimit = np->mcast_hops; |
949 | else | 951 | else |
950 | hlimit = np->hop_limit; | 952 | hlimit = np->hop_limit; |
951 | if (hlimit < 0) | 953 | if (hlimit < 0) |
952 | hlimit = ip6_dst_hoplimit(dst); | 954 | hlimit = ip6_dst_hoplimit(dst); |
953 | } | 955 | } |
954 | 956 | ||
955 | if (tclass < 0) | 957 | if (tclass < 0) |
956 | tclass = np->tclass; | 958 | tclass = np->tclass; |
957 | 959 | ||
958 | if (msg->msg_flags&MSG_CONFIRM) | 960 | if (msg->msg_flags&MSG_CONFIRM) |
959 | goto do_confirm; | 961 | goto do_confirm; |
960 | back_from_confirm: | 962 | back_from_confirm: |
961 | 963 | ||
962 | lock_sock(sk); | 964 | lock_sock(sk); |
963 | if (unlikely(up->pending)) { | 965 | if (unlikely(up->pending)) { |
964 | /* The socket is already corked while preparing it. */ | 966 | /* The socket is already corked while preparing it. */ |
965 | /* ... which is an evident application bug. --ANK */ | 967 | /* ... which is an evident application bug. --ANK */ |
966 | release_sock(sk); | 968 | release_sock(sk); |
967 | 969 | ||
968 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); | 970 | LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); |
969 | err = -EINVAL; | 971 | err = -EINVAL; |
970 | goto out; | 972 | goto out; |
971 | } | 973 | } |
972 | 974 | ||
973 | up->pending = AF_INET6; | 975 | up->pending = AF_INET6; |
974 | 976 | ||
975 | do_append_data: | 977 | do_append_data: |
976 | up->len += ulen; | 978 | up->len += ulen; |
977 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; | 979 | getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; |
978 | err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, | 980 | err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, |
979 | sizeof(struct udphdr), hlimit, tclass, opt, &fl, | 981 | sizeof(struct udphdr), hlimit, tclass, opt, &fl, |
980 | (struct rt6_info*)dst, | 982 | (struct rt6_info*)dst, |
981 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); | 983 | corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); |
982 | if (err) | 984 | if (err) |
983 | udp_v6_flush_pending_frames(sk); | 985 | udp_v6_flush_pending_frames(sk); |
984 | else if (!corkreq) | 986 | else if (!corkreq) |
985 | err = udp_v6_push_pending_frames(sk); | 987 | err = udp_v6_push_pending_frames(sk); |
986 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) | 988 | else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) |
987 | up->pending = 0; | 989 | up->pending = 0; |
988 | 990 | ||
989 | if (dst) { | 991 | if (dst) { |
990 | if (connected) { | 992 | if (connected) { |
991 | ip6_dst_store(sk, dst, | 993 | ip6_dst_store(sk, dst, |
992 | ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? | 994 | ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? |
993 | &np->daddr : NULL, | 995 | &np->daddr : NULL, |
994 | #ifdef CONFIG_IPV6_SUBTREES | 996 | #ifdef CONFIG_IPV6_SUBTREES |
995 | ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? | 997 | ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? |
996 | &np->saddr : | 998 | &np->saddr : |
997 | #endif | 999 | #endif |
998 | NULL); | 1000 | NULL); |
999 | } else { | 1001 | } else { |
1000 | dst_release(dst); | 1002 | dst_release(dst); |
1001 | } | 1003 | } |
1002 | dst = NULL; | 1004 | dst = NULL; |
1003 | } | 1005 | } |
1004 | 1006 | ||
1005 | if (err > 0) | 1007 | if (err > 0) |
1006 | err = np->recverr ? net_xmit_errno(err) : 0; | 1008 | err = np->recverr ? net_xmit_errno(err) : 0; |
1007 | release_sock(sk); | 1009 | release_sock(sk); |
1008 | out: | 1010 | out: |
1009 | dst_release(dst); | 1011 | dst_release(dst); |
1010 | fl6_sock_release(flowlabel); | 1012 | fl6_sock_release(flowlabel); |
1011 | if (!err) | 1013 | if (!err) |
1012 | return len; | 1014 | return len; |
1013 | /* | 1015 | /* |
1014 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting | 1016 | * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting |
1015 | * ENOBUFS might not be good (it's not tunable per se), but otherwise | 1017 | * ENOBUFS might not be good (it's not tunable per se), but otherwise |
1016 | * we don't have a good statistic (IpOutDiscards but it can be too many | 1018 | * we don't have a good statistic (IpOutDiscards but it can be too many |
1017 | * things). We could add another new stat but at least for now that | 1019 | * things). We could add another new stat but at least for now that |
1018 | * seems like overkill. | 1020 | * seems like overkill. |
1019 | */ | 1021 | */ |
1020 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { | 1022 | if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { |
1021 | UDP6_INC_STATS_USER(sock_net(sk), | 1023 | UDP6_INC_STATS_USER(sock_net(sk), |
1022 | UDP_MIB_SNDBUFERRORS, is_udplite); | 1024 | UDP_MIB_SNDBUFERRORS, is_udplite); |
1023 | } | 1025 | } |
1024 | return err; | 1026 | return err; |
1025 | 1027 | ||
1026 | do_confirm: | 1028 | do_confirm: |
1027 | dst_confirm(dst); | 1029 | dst_confirm(dst); |
1028 | if (!(msg->msg_flags&MSG_PROBE) || len) | 1030 | if (!(msg->msg_flags&MSG_PROBE) || len) |
1029 | goto back_from_confirm; | 1031 | goto back_from_confirm; |
1030 | err = 0; | 1032 | err = 0; |
1031 | goto out; | 1033 | goto out; |
1032 | } | 1034 | } |
1033 | 1035 | ||
1034 | void udpv6_destroy_sock(struct sock *sk) | 1036 | void udpv6_destroy_sock(struct sock *sk) |
1035 | { | 1037 | { |
1036 | lock_sock(sk); | 1038 | lock_sock(sk); |
1037 | udp_v6_flush_pending_frames(sk); | 1039 | udp_v6_flush_pending_frames(sk); |
1038 | release_sock(sk); | 1040 | release_sock(sk); |
1039 | 1041 | ||
1040 | inet6_destroy_sock(sk); | 1042 | inet6_destroy_sock(sk); |
1041 | } | 1043 | } |
1042 | 1044 | ||
1043 | /* | 1045 | /* |
1044 | * Socket option code for UDP | 1046 | * Socket option code for UDP |
1045 | */ | 1047 | */ |
1046 | int udpv6_setsockopt(struct sock *sk, int level, int optname, | 1048 | int udpv6_setsockopt(struct sock *sk, int level, int optname, |
1047 | char __user *optval, unsigned int optlen) | 1049 | char __user *optval, unsigned int optlen) |
1048 | { | 1050 | { |
1049 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1051 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1050 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | 1052 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1051 | udp_v6_push_pending_frames); | 1053 | udp_v6_push_pending_frames); |
1052 | return ipv6_setsockopt(sk, level, optname, optval, optlen); | 1054 | return ipv6_setsockopt(sk, level, optname, optval, optlen); |
1053 | } | 1055 | } |
1054 | 1056 | ||
1055 | #ifdef CONFIG_COMPAT | 1057 | #ifdef CONFIG_COMPAT |
1056 | int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, | 1058 | int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, |
1057 | char __user *optval, unsigned int optlen) | 1059 | char __user *optval, unsigned int optlen) |
1058 | { | 1060 | { |
1059 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1061 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1060 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, | 1062 | return udp_lib_setsockopt(sk, level, optname, optval, optlen, |
1061 | udp_v6_push_pending_frames); | 1063 | udp_v6_push_pending_frames); |
1062 | return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); | 1064 | return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); |
1063 | } | 1065 | } |
1064 | #endif | 1066 | #endif |
1065 | 1067 | ||
1066 | int udpv6_getsockopt(struct sock *sk, int level, int optname, | 1068 | int udpv6_getsockopt(struct sock *sk, int level, int optname, |
1067 | char __user *optval, int __user *optlen) | 1069 | char __user *optval, int __user *optlen) |
1068 | { | 1070 | { |
1069 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1071 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1070 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | 1072 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1071 | return ipv6_getsockopt(sk, level, optname, optval, optlen); | 1073 | return ipv6_getsockopt(sk, level, optname, optval, optlen); |
1072 | } | 1074 | } |
1073 | 1075 | ||
1074 | #ifdef CONFIG_COMPAT | 1076 | #ifdef CONFIG_COMPAT |
1075 | int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, | 1077 | int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, |
1076 | char __user *optval, int __user *optlen) | 1078 | char __user *optval, int __user *optlen) |
1077 | { | 1079 | { |
1078 | if (level == SOL_UDP || level == SOL_UDPLITE) | 1080 | if (level == SOL_UDP || level == SOL_UDPLITE) |
1079 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); | 1081 | return udp_lib_getsockopt(sk, level, optname, optval, optlen); |
1080 | return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); | 1082 | return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); |
1081 | } | 1083 | } |
1082 | #endif | 1084 | #endif |
1083 | 1085 | ||
1084 | static int udp6_ufo_send_check(struct sk_buff *skb) | 1086 | static int udp6_ufo_send_check(struct sk_buff *skb) |
1085 | { | 1087 | { |
1086 | struct ipv6hdr *ipv6h; | 1088 | struct ipv6hdr *ipv6h; |
1087 | struct udphdr *uh; | 1089 | struct udphdr *uh; |
1088 | 1090 | ||
1089 | if (!pskb_may_pull(skb, sizeof(*uh))) | 1091 | if (!pskb_may_pull(skb, sizeof(*uh))) |
1090 | return -EINVAL; | 1092 | return -EINVAL; |
1091 | 1093 | ||
1092 | ipv6h = ipv6_hdr(skb); | 1094 | ipv6h = ipv6_hdr(skb); |
1093 | uh = udp_hdr(skb); | 1095 | uh = udp_hdr(skb); |
1094 | 1096 | ||
1095 | uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, | 1097 | uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, |
1096 | IPPROTO_UDP, 0); | 1098 | IPPROTO_UDP, 0); |
1097 | skb->csum_start = skb_transport_header(skb) - skb->head; | 1099 | skb->csum_start = skb_transport_header(skb) - skb->head; |
1098 | skb->csum_offset = offsetof(struct udphdr, check); | 1100 | skb->csum_offset = offsetof(struct udphdr, check); |
1099 | skb->ip_summed = CHECKSUM_PARTIAL; | 1101 | skb->ip_summed = CHECKSUM_PARTIAL; |
1100 | return 0; | 1102 | return 0; |
1101 | } | 1103 | } |
1102 | 1104 | ||
1103 | static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) | 1105 | static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) |
1104 | { | 1106 | { |
1105 | struct sk_buff *segs = ERR_PTR(-EINVAL); | 1107 | struct sk_buff *segs = ERR_PTR(-EINVAL); |
1106 | unsigned int mss; | 1108 | unsigned int mss; |
1107 | unsigned int unfrag_ip6hlen, unfrag_len; | 1109 | unsigned int unfrag_ip6hlen, unfrag_len; |
1108 | struct frag_hdr *fptr; | 1110 | struct frag_hdr *fptr; |
1109 | u8 *mac_start, *prevhdr; | 1111 | u8 *mac_start, *prevhdr; |
1110 | u8 nexthdr; | 1112 | u8 nexthdr; |
1111 | u8 frag_hdr_sz = sizeof(struct frag_hdr); | 1113 | u8 frag_hdr_sz = sizeof(struct frag_hdr); |
1112 | int offset; | 1114 | int offset; |
1113 | __wsum csum; | 1115 | __wsum csum; |
1114 | 1116 | ||
1115 | mss = skb_shinfo(skb)->gso_size; | 1117 | mss = skb_shinfo(skb)->gso_size; |
1116 | if (unlikely(skb->len <= mss)) | 1118 | if (unlikely(skb->len <= mss)) |
1117 | goto out; | 1119 | goto out; |
1118 | 1120 | ||
1119 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | 1121 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { |
1120 | /* Packet is from an untrusted source, reset gso_segs. */ | 1122 | /* Packet is from an untrusted source, reset gso_segs. */ |
1121 | int type = skb_shinfo(skb)->gso_type; | 1123 | int type = skb_shinfo(skb)->gso_type; |
1122 | 1124 | ||
1123 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || | 1125 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || |
1124 | !(type & (SKB_GSO_UDP)))) | 1126 | !(type & (SKB_GSO_UDP)))) |
1125 | goto out; | 1127 | goto out; |
1126 | 1128 | ||
1127 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | 1129 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); |
1128 | 1130 | ||
1129 | segs = NULL; | 1131 | segs = NULL; |
1130 | goto out; | 1132 | goto out; |
1131 | } | 1133 | } |
1132 | 1134 | ||
1133 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot | 1135 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot |
1134 | * do checksum of UDP packets sent as multiple IP fragments. | 1136 | * do checksum of UDP packets sent as multiple IP fragments. |
1135 | */ | 1137 | */ |
1136 | offset = skb->csum_start - skb_headroom(skb); | 1138 | offset = skb->csum_start - skb_headroom(skb); |
1137 | csum = skb_checksum(skb, offset, skb->len- offset, 0); | 1139 | csum = skb_checksum(skb, offset, skb->len- offset, 0); |
1138 | offset += skb->csum_offset; | 1140 | offset += skb->csum_offset; |
1139 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | 1141 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); |
1140 | skb->ip_summed = CHECKSUM_NONE; | 1142 | skb->ip_summed = CHECKSUM_NONE; |
1141 | 1143 | ||
1142 | /* Check if there is enough headroom to insert fragment header. */ | 1144 | /* Check if there is enough headroom to insert fragment header. */ |
1143 | if ((skb_headroom(skb) < frag_hdr_sz) && | 1145 | if ((skb_headroom(skb) < frag_hdr_sz) && |
1144 | pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) | 1146 | pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) |
1145 | goto out; | 1147 | goto out; |
1146 | 1148 | ||
1147 | /* Find the unfragmentable header and shift it left by frag_hdr_sz | 1149 | /* Find the unfragmentable header and shift it left by frag_hdr_sz |
1148 | * bytes to insert fragment header. | 1150 | * bytes to insert fragment header. |
1149 | */ | 1151 | */ |
1150 | unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); | 1152 | unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); |
1151 | nexthdr = *prevhdr; | 1153 | nexthdr = *prevhdr; |
1152 | *prevhdr = NEXTHDR_FRAGMENT; | 1154 | *prevhdr = NEXTHDR_FRAGMENT; |
1153 | unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + | 1155 | unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + |
1154 | unfrag_ip6hlen; | 1156 | unfrag_ip6hlen; |
1155 | mac_start = skb_mac_header(skb); | 1157 | mac_start = skb_mac_header(skb); |
1156 | memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); | 1158 | memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); |
1157 | 1159 | ||
1158 | skb->mac_header -= frag_hdr_sz; | 1160 | skb->mac_header -= frag_hdr_sz; |
1159 | skb->network_header -= frag_hdr_sz; | 1161 | skb->network_header -= frag_hdr_sz; |
1160 | 1162 | ||
1161 | fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); | 1163 | fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); |
1162 | fptr->nexthdr = nexthdr; | 1164 | fptr->nexthdr = nexthdr; |
1163 | fptr->reserved = 0; | 1165 | fptr->reserved = 0; |
1164 | ipv6_select_ident(fptr); | 1166 | ipv6_select_ident(fptr); |
1165 | 1167 | ||
1166 | /* Fragment the skb. ipv6 header and the remaining fields of the | 1168 | /* Fragment the skb. ipv6 header and the remaining fields of the |
1167 | * fragment header are updated in ipv6_gso_segment() | 1169 | * fragment header are updated in ipv6_gso_segment() |
1168 | */ | 1170 | */ |
1169 | segs = skb_segment(skb, features); | 1171 | segs = skb_segment(skb, features); |
1170 | 1172 | ||
1171 | out: | 1173 | out: |
1172 | return segs; | 1174 | return segs; |
1173 | } | 1175 | } |
1174 | 1176 | ||
1175 | static const struct inet6_protocol udpv6_protocol = { | 1177 | static const struct inet6_protocol udpv6_protocol = { |
1176 | .handler = udpv6_rcv, | 1178 | .handler = udpv6_rcv, |
1177 | .err_handler = udpv6_err, | 1179 | .err_handler = udpv6_err, |
1178 | .gso_send_check = udp6_ufo_send_check, | 1180 | .gso_send_check = udp6_ufo_send_check, |
1179 | .gso_segment = udp6_ufo_fragment, | 1181 | .gso_segment = udp6_ufo_fragment, |
1180 | .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, | 1182 | .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, |
1181 | }; | 1183 | }; |
1182 | 1184 | ||
1183 | /* ------------------------------------------------------------------------ */ | 1185 | /* ------------------------------------------------------------------------ */ |
1184 | #ifdef CONFIG_PROC_FS | 1186 | #ifdef CONFIG_PROC_FS |
1185 | 1187 | ||
1186 | static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) | 1188 | static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) |
1187 | { | 1189 | { |
1188 | struct inet_sock *inet = inet_sk(sp); | 1190 | struct inet_sock *inet = inet_sk(sp); |
1189 | struct ipv6_pinfo *np = inet6_sk(sp); | 1191 | struct ipv6_pinfo *np = inet6_sk(sp); |
1190 | struct in6_addr *dest, *src; | 1192 | struct in6_addr *dest, *src; |
1191 | __u16 destp, srcp; | 1193 | __u16 destp, srcp; |
1192 | 1194 | ||
1193 | dest = &np->daddr; | 1195 | dest = &np->daddr; |
1194 | src = &np->rcv_saddr; | 1196 | src = &np->rcv_saddr; |
1195 | destp = ntohs(inet->inet_dport); | 1197 | destp = ntohs(inet->inet_dport); |
1196 | srcp = ntohs(inet->inet_sport); | 1198 | srcp = ntohs(inet->inet_sport); |
1197 | seq_printf(seq, | 1199 | seq_printf(seq, |
1198 | "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " | 1200 | "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " |
1199 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", | 1201 | "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", |
1200 | bucket, | 1202 | bucket, |
1201 | src->s6_addr32[0], src->s6_addr32[1], | 1203 | src->s6_addr32[0], src->s6_addr32[1], |
1202 | src->s6_addr32[2], src->s6_addr32[3], srcp, | 1204 | src->s6_addr32[2], src->s6_addr32[3], srcp, |
1203 | dest->s6_addr32[0], dest->s6_addr32[1], | 1205 | dest->s6_addr32[0], dest->s6_addr32[1], |
1204 | dest->s6_addr32[2], dest->s6_addr32[3], destp, | 1206 | dest->s6_addr32[2], dest->s6_addr32[3], destp, |
1205 | sp->sk_state, | 1207 | sp->sk_state, |
1206 | sk_wmem_alloc_get(sp), | 1208 | sk_wmem_alloc_get(sp), |
1207 | sk_rmem_alloc_get(sp), | 1209 | sk_rmem_alloc_get(sp), |
1208 | 0, 0L, 0, | 1210 | 0, 0L, 0, |
1209 | sock_i_uid(sp), 0, | 1211 | sock_i_uid(sp), 0, |
1210 | sock_i_ino(sp), | 1212 | sock_i_ino(sp), |
1211 | atomic_read(&sp->sk_refcnt), sp, | 1213 | atomic_read(&sp->sk_refcnt), sp, |
1212 | atomic_read(&sp->sk_drops)); | 1214 | atomic_read(&sp->sk_drops)); |
1213 | } | 1215 | } |
1214 | 1216 | ||
1215 | int udp6_seq_show(struct seq_file *seq, void *v) | 1217 | int udp6_seq_show(struct seq_file *seq, void *v) |
1216 | { | 1218 | { |
1217 | if (v == SEQ_START_TOKEN) | 1219 | if (v == SEQ_START_TOKEN) |
1218 | seq_printf(seq, | 1220 | seq_printf(seq, |
1219 | " sl " | 1221 | " sl " |
1220 | "local_address " | 1222 | "local_address " |
1221 | "remote_address " | 1223 | "remote_address " |
1222 | "st tx_queue rx_queue tr tm->when retrnsmt" | 1224 | "st tx_queue rx_queue tr tm->when retrnsmt" |
1223 | " uid timeout inode ref pointer drops\n"); | 1225 | " uid timeout inode ref pointer drops\n"); |
1224 | else | 1226 | else |
1225 | udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); | 1227 | udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); |
1226 | return 0; | 1228 | return 0; |
1227 | } | 1229 | } |
1228 | 1230 | ||
1229 | static struct udp_seq_afinfo udp6_seq_afinfo = { | 1231 | static struct udp_seq_afinfo udp6_seq_afinfo = { |
1230 | .name = "udp6", | 1232 | .name = "udp6", |
1231 | .family = AF_INET6, | 1233 | .family = AF_INET6, |
1232 | .udp_table = &udp_table, | 1234 | .udp_table = &udp_table, |
1233 | .seq_fops = { | 1235 | .seq_fops = { |
1234 | .owner = THIS_MODULE, | 1236 | .owner = THIS_MODULE, |
1235 | }, | 1237 | }, |
1236 | .seq_ops = { | 1238 | .seq_ops = { |
1237 | .show = udp6_seq_show, | 1239 | .show = udp6_seq_show, |
1238 | }, | 1240 | }, |
1239 | }; | 1241 | }; |
1240 | 1242 | ||
1241 | int udp6_proc_init(struct net *net) | 1243 | int udp6_proc_init(struct net *net) |
1242 | { | 1244 | { |
1243 | return udp_proc_register(net, &udp6_seq_afinfo); | 1245 | return udp_proc_register(net, &udp6_seq_afinfo); |
1244 | } | 1246 | } |
1245 | 1247 | ||
1246 | void udp6_proc_exit(struct net *net) { | 1248 | void udp6_proc_exit(struct net *net) { |
1247 | udp_proc_unregister(net, &udp6_seq_afinfo); | 1249 | udp_proc_unregister(net, &udp6_seq_afinfo); |
1248 | } | 1250 | } |
1249 | #endif /* CONFIG_PROC_FS */ | 1251 | #endif /* CONFIG_PROC_FS */ |
1250 | 1252 | ||
1251 | /* ------------------------------------------------------------------------ */ | 1253 | /* ------------------------------------------------------------------------ */ |
1252 | 1254 | ||
1253 | struct proto udpv6_prot = { | 1255 | struct proto udpv6_prot = { |
1254 | .name = "UDPv6", | 1256 | .name = "UDPv6", |
1255 | .owner = THIS_MODULE, | 1257 | .owner = THIS_MODULE, |
1256 | .close = udp_lib_close, | 1258 | .close = udp_lib_close, |
1257 | .connect = ip6_datagram_connect, | 1259 | .connect = ip6_datagram_connect, |
1258 | .disconnect = udp_disconnect, | 1260 | .disconnect = udp_disconnect, |
1259 | .ioctl = udp_ioctl, | 1261 | .ioctl = udp_ioctl, |
1260 | .destroy = udpv6_destroy_sock, | 1262 | .destroy = udpv6_destroy_sock, |
1261 | .setsockopt = udpv6_setsockopt, | 1263 | .setsockopt = udpv6_setsockopt, |
1262 | .getsockopt = udpv6_getsockopt, | 1264 | .getsockopt = udpv6_getsockopt, |
1263 | .sendmsg = udpv6_sendmsg, | 1265 | .sendmsg = udpv6_sendmsg, |
1264 | .recvmsg = udpv6_recvmsg, | 1266 | .recvmsg = udpv6_recvmsg, |
1265 | .backlog_rcv = udpv6_queue_rcv_skb, | 1267 | .backlog_rcv = udpv6_queue_rcv_skb, |
1266 | .hash = udp_lib_hash, | 1268 | .hash = udp_lib_hash, |
1267 | .unhash = udp_lib_unhash, | 1269 | .unhash = udp_lib_unhash, |
1268 | .get_port = udp_v6_get_port, | 1270 | .get_port = udp_v6_get_port, |
1269 | .memory_allocated = &udp_memory_allocated, | 1271 | .memory_allocated = &udp_memory_allocated, |
1270 | .sysctl_mem = sysctl_udp_mem, | 1272 | .sysctl_mem = sysctl_udp_mem, |
1271 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1273 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1272 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1274 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1273 | .obj_size = sizeof(struct udp6_sock), | 1275 | .obj_size = sizeof(struct udp6_sock), |
1274 | .slab_flags = SLAB_DESTROY_BY_RCU, | 1276 | .slab_flags = SLAB_DESTROY_BY_RCU, |
1275 | .h.udp_table = &udp_table, | 1277 | .h.udp_table = &udp_table, |
1276 | #ifdef CONFIG_COMPAT | 1278 | #ifdef CONFIG_COMPAT |
1277 | .compat_setsockopt = compat_udpv6_setsockopt, | 1279 | .compat_setsockopt = compat_udpv6_setsockopt, |
1278 | .compat_getsockopt = compat_udpv6_getsockopt, | 1280 | .compat_getsockopt = compat_udpv6_getsockopt, |
1279 | #endif | 1281 | #endif |
1280 | }; | 1282 | }; |
1281 | 1283 | ||
1282 | static struct inet_protosw udpv6_protosw = { | 1284 | static struct inet_protosw udpv6_protosw = { |
1283 | .type = SOCK_DGRAM, | 1285 | .type = SOCK_DGRAM, |
1284 | .protocol = IPPROTO_UDP, | 1286 | .protocol = IPPROTO_UDP, |
1285 | .prot = &udpv6_prot, | 1287 | .prot = &udpv6_prot, |
1286 | .ops = &inet6_dgram_ops, | 1288 | .ops = &inet6_dgram_ops, |
1287 | .capability =-1, | 1289 | .capability =-1, |
1288 | .no_check = UDP_CSUM_DEFAULT, | 1290 | .no_check = UDP_CSUM_DEFAULT, |
1289 | .flags = INET_PROTOSW_PERMANENT, | 1291 | .flags = INET_PROTOSW_PERMANENT, |
1290 | }; | 1292 | }; |
1291 | 1293 | ||
1292 | 1294 | ||
1293 | int __init udpv6_init(void) | 1295 | int __init udpv6_init(void) |
1294 | { | 1296 | { |
1295 | int ret; | 1297 | int ret; |
1296 | 1298 | ||
1297 | ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); | 1299 | ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); |
1298 | if (ret) | 1300 | if (ret) |
1299 | goto out; | 1301 | goto out; |
1300 | 1302 | ||
1301 | ret = inet6_register_protosw(&udpv6_protosw); | 1303 | ret = inet6_register_protosw(&udpv6_protosw); |
1302 | if (ret) | 1304 | if (ret) |
1303 | goto out_udpv6_protocol; | 1305 | goto out_udpv6_protocol; |
1304 | out: | 1306 | out: |
1305 | return ret; | 1307 | return ret; |
1306 | 1308 | ||
1307 | out_udpv6_protocol: | 1309 | out_udpv6_protocol: |
1308 | inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); | 1310 | inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); |
1309 | goto out; | 1311 | goto out; |
1310 | } | 1312 | } |
1311 | 1313 | ||
1312 | void udpv6_exit(void) | 1314 | void udpv6_exit(void) |
1313 | { | 1315 | { |
1314 | inet6_unregister_protosw(&udpv6_protosw); | 1316 | inet6_unregister_protosw(&udpv6_protosw); |
1315 | inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); | 1317 | inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); |
1316 | } | 1318 | } |
1317 | 1319 |