Blame view

net/core/datagram.c 18.2 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
  /*
   *	SUCS NET3:
   *
   *	Generic datagram handling routines. These are generic for all
   *	protocols. Possibly a generic IP version on top of these would
   *	make sense. Not tonight however 8-).
   *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
   *	NetROM layer all have identical poll code and mostly
   *	identical recvmsg() code. So we share it here. The poll was
   *	shared before but buried in udp.c so I moved it.
   *
113aa838e   Alan Cox   net: Rationalise ...
12
   *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
   *						     udp.c code)
   *
   *	Fixes:
   *		Alan Cox	:	NULL return from skb_peek_copy()
   *					understood
   *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
   *					skb_peek_copy stuff.
   *		Alan Cox	:	Added support for SOCK_SEQPACKET.
   *					IPX can no longer use the SO_TYPE hack
   *					but AX.25 now works right, and SPX is
   *					feasible.
   *		Alan Cox	:	Fixed write poll of non IP protocol
   *					crash.
   *		Florian  La Roche:	Changed for my new skbuff handling.
   *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
   *		Linus Torvalds	:	BSD semantic fixes.
   *		Alan Cox	:	Datagram iovec handling
   *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
   *		Alan Cox	:	POSIXisms
   *		Pete Wyckoff    :       Unconnected accept() fix.
   *
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <linux/mm.h>
  #include <linux/interrupt.h>
  #include <linux/errno.h>
  #include <linux/sched.h>
  #include <linux/inet.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46
47
48
49
  #include <linux/netdevice.h>
  #include <linux/rtnetlink.h>
  #include <linux/poll.h>
  #include <linux/highmem.h>
3305b80c2   Herbert Xu   [IP]: Simplify an...
50
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
51
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
54
  
  #include <net/protocol.h>
  #include <linux/skbuff.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
55

c752f0739   Arnaldo Carvalho de Melo   [TCP]: Move the t...
56
57
58
  #include <net/checksum.h>
  #include <net/sock.h>
  #include <net/tcp_states.h>
e9b3cc1b3   Neil Horman   net: skb ftracer ...
59
  #include <trace/events/skb.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
60
61
62
63
64
65
66
67
  
  /*
   *	Is a socket 'connection oriented' ?
   */
  static inline int connection_based(struct sock *sk)
  {
  	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
  }
bf368e4e7   Eric Dumazet   net: Avoid extra ...
68
69
70
71
72
73
74
75
76
77
78
79
  static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
  				  void *key)
  {
  	unsigned long bits = (unsigned long)key;
  
  	/*
  	 * Avoid a wakeup if event not interesting for us
  	 */
  	if (bits && !(bits & (POLLIN | POLLERR)))
  		return 0;
  	return autoremove_wake_function(wait, mode, sync, key);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
81
82
83
84
85
  /*
   * Wait for a packet..
   */
  static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
  {
  	int error;
bf368e4e7   Eric Dumazet   net: Avoid extra ...
86
  	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87

aa3951451   Eric Dumazet   net: sk_sleep() h...
88
  	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  
  	/* Socket errors? */
  	error = sock_error(sk);
  	if (error)
  		goto out_err;
  
  	if (!skb_queue_empty(&sk->sk_receive_queue))
  		goto out;
  
  	/* Socket shut down? */
  	if (sk->sk_shutdown & RCV_SHUTDOWN)
  		goto out_noerr;
  
  	/* Sequenced packets can come disconnected.
  	 * If so we report the problem
  	 */
  	error = -ENOTCONN;
  	if (connection_based(sk) &&
  	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
  		goto out_err;
  
  	/* handle signals */
  	if (signal_pending(current))
  		goto interrupted;
  
  	error = 0;
  	*timeo_p = schedule_timeout(*timeo_p);
  out:
aa3951451   Eric Dumazet   net: sk_sleep() h...
117
  	finish_wait(sk_sleep(sk), &wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
118
119
120
121
122
123
124
125
126
127
128
129
130
  	return error;
  interrupted:
  	error = sock_intr_errno(*timeo_p);
  out_err:
  	*err = error;
  	goto out;
  out_noerr:
  	*err = 0;
  	error = 1;
  	goto out;
  }
  
  /**
a59322be0   Herbert Xu   [UDP]: Only incre...
131
   *	__skb_recv_datagram - Receive a datagram skbuff
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
132
133
   *	@sk: socket
   *	@flags: MSG_ flags
a59322be0   Herbert Xu   [UDP]: Only incre...
134
   *	@peeked: returns non-zero if this packet has been seen before
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
135
   *	@err: error code returned
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
   *
   *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
   *	and possible races. This replaces identical code in packet, raw and
   *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
   *	the long standing peek and read race for datagram sockets. If you
   *	alter this routine remember it must be re-entrant.
   *
   *	This function will lock the socket if a skb is returned, so the caller
   *	needs to unlock the socket in that case (usually by calling
   *	skb_free_datagram)
   *
   *	* It does not lock socket since today. This function is
   *	* free of race conditions. This measure should/can improve
   *	* significantly datagram socket latencies at high loads,
   *	* when data copying to user space takes lots of time.
   *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
   *	*  8) Great win.)
   *	*			                    --ANK (980729)
   *
   *	The order of the tests when we find no data waiting are specified
   *	quite explicitly by POSIX 1003.1g, don't change them without having
   *	the standard around please.
   */
a59322be0   Herbert Xu   [UDP]: Only incre...
159
160
  struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
  				    int *peeked, int *err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
161
162
163
164
165
166
167
168
169
170
  {
  	struct sk_buff *skb;
  	long timeo;
  	/*
  	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
  	 */
  	int error = sock_error(sk);
  
  	if (error)
  		goto no_packet;
a59322be0   Herbert Xu   [UDP]: Only incre...
171
  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
174
175
176
177
  
  	do {
  		/* Again only user level code calls this function, so nothing
  		 * interrupt level will suddenly eat the receive_queue.
  		 *
  		 * Look at current nfs client by the way...
8917a3c0b   David Shwatrz   Fix a typo in dat...
178
  		 * However, this function was correct in any case. 8)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
179
  		 */
a59322be0   Herbert Xu   [UDP]: Only incre...
180
181
182
183
184
185
186
187
  		unsigned long cpu_flags;
  
  		spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
  		skb = skb_peek(&sk->sk_receive_queue);
  		if (skb) {
  			*peeked = skb->peeked;
  			if (flags & MSG_PEEK) {
  				skb->peeked = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188
  				atomic_inc(&skb->users);
a59322be0   Herbert Xu   [UDP]: Only incre...
189
190
191
192
  			} else
  				__skb_unlink(skb, &sk->sk_receive_queue);
  		}
  		spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
  
  		if (skb)
  			return skb;
  
  		/* User doesn't want to wait */
  		error = -EAGAIN;
  		if (!timeo)
  			goto no_packet;
  
  	} while (!wait_for_packet(sk, err, &timeo));
  
  	return NULL;
  
  no_packet:
  	*err = error;
  	return NULL;
  }
a59322be0   Herbert Xu   [UDP]: Only incre...
210
211
212
213
214
215
216
217
218
219
  EXPORT_SYMBOL(__skb_recv_datagram);
  
  struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
  				  int noblock, int *err)
  {
  	int peeked;
  
  	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
  				   &peeked, err);
  }
9e34a5b51   Eric Dumazet   net/core: EXPORT_...
220
  EXPORT_SYMBOL(skb_recv_datagram);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
222
223
  
  void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
  {
ead2ceb0e   Neil Horman   Network Drop Moni...
224
  	consume_skb(skb);
270acefaf   Eric Dumazet   net: sk_free_data...
225
  	sk_mem_reclaim_partial(sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
  }
9d410c796   Eric Dumazet   net: fix sk_forwa...
227
228
229
230
  EXPORT_SYMBOL(skb_free_datagram);
  
  void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
  {
8a74ad60a   Eric Dumazet   net: fix lock_soc...
231
  	bool slow;
93bb64eac   Eric Dumazet   net: skb_free_dat...
232
233
234
235
  	if (likely(atomic_read(&skb->users) == 1))
  		smp_rmb();
  	else if (likely(!atomic_dec_and_test(&skb->users)))
  		return;
8a74ad60a   Eric Dumazet   net: fix lock_soc...
236
  	slow = lock_sock_fast(sk);
4b0b72f7d   Eric Dumazet   net: speedup udp ...
237
238
  	skb_orphan(skb);
  	sk_mem_reclaim_partial(sk);
8a74ad60a   Eric Dumazet   net: fix lock_soc...
239
  	unlock_sock_fast(sk, slow);
4b0b72f7d   Eric Dumazet   net: speedup udp ...
240

93bb64eac   Eric Dumazet   net: skb_free_dat...
241
  	/* skb is now orphaned, can be freed outside of locked section */
07dc22e72   Koki Sanagi   skb: Add tracepoi...
242
  	trace_kfree_skb(skb, skb_free_datagram_locked);
93bb64eac   Eric Dumazet   net: skb_free_dat...
243
  	__kfree_skb(skb);
9d410c796   Eric Dumazet   net: fix sk_forwa...
244
245
  }
  EXPORT_SYMBOL(skb_free_datagram_locked);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
247
  
  /**
3305b80c2   Herbert Xu   [IP]: Simplify an...
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
   *	skb_kill_datagram - Free a datagram skbuff forcibly
   *	@sk: socket
   *	@skb: datagram skbuff
   *	@flags: MSG_ flags
   *
   *	This function frees a datagram skbuff that was received by
   *	skb_recv_datagram.  The flags argument must match the one
   *	used for skb_recv_datagram.
   *
   *	If the MSG_PEEK flag is set, and the packet is still on the
   *	receive queue of the socket, it will be taken off the queue
   *	before it is freed.
   *
   *	This function currently only disables BH when acquiring the
   *	sk_receive_queue lock.  Therefore it must not be used in a
   *	context where that lock is acquired in an IRQ context.
27ab25686   Herbert Xu   [UDP]: Avoid repe...
264
265
   *
   *	It returns 0 if the packet was removed by us.
3305b80c2   Herbert Xu   [IP]: Simplify an...
266
   */
27ab25686   Herbert Xu   [UDP]: Avoid repe...
267
  int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
3305b80c2   Herbert Xu   [IP]: Simplify an...
268
  {
27ab25686   Herbert Xu   [UDP]: Avoid repe...
269
  	int err = 0;
3305b80c2   Herbert Xu   [IP]: Simplify an...
270
  	if (flags & MSG_PEEK) {
27ab25686   Herbert Xu   [UDP]: Avoid repe...
271
  		err = -ENOENT;
3305b80c2   Herbert Xu   [IP]: Simplify an...
272
273
274
275
  		spin_lock_bh(&sk->sk_receive_queue.lock);
  		if (skb == skb_peek(&sk->sk_receive_queue)) {
  			__skb_unlink(skb, &sk->sk_receive_queue);
  			atomic_dec(&skb->users);
27ab25686   Herbert Xu   [UDP]: Avoid repe...
276
  			err = 0;
3305b80c2   Herbert Xu   [IP]: Simplify an...
277
278
279
  		}
  		spin_unlock_bh(&sk->sk_receive_queue.lock);
  	}
61de71c67   John Dykstra   Network Drop Moni...
280
  	kfree_skb(skb);
8edf19c2f   Eric Dumazet   net: sk_drops con...
281
  	atomic_inc(&sk->sk_drops);
61de71c67   John Dykstra   Network Drop Moni...
282
  	sk_mem_reclaim_partial(sk);
27ab25686   Herbert Xu   [UDP]: Avoid repe...
283
  	return err;
3305b80c2   Herbert Xu   [IP]: Simplify an...
284
  }
3305b80c2   Herbert Xu   [IP]: Simplify an...
285
286
287
  EXPORT_SYMBOL(skb_kill_datagram);
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
288
   *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
289
290
   *	@skb: buffer to copy
   *	@offset: offset in the buffer to start copying from
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
291
   *	@to: io vector to copy to
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
292
   *	@len: amount of data to copy from buffer to iovec
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
296
297
298
   *
   *	Note: the iovec is modified during the copy.
   */
  int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
  			    struct iovec *to, int len)
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
299
300
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
5b1a002ad   David S. Miller   datagram: Use fra...
301
  	struct sk_buff *frag_iter;
c75d721c7   Herbert Xu   [NET]: Fix zero-s...
302

e9b3cc1b3   Neil Horman   net: skb ftracer ...
303
  	trace_skb_copy_datagram_iovec(skb, len);
b4d9eda02   David S. Miller   [NET]: Revert skb...
304
305
306
307
308
309
310
311
312
313
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
  		if (memcpy_toiovec(to, skb->data + offset, copy))
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  	}
c75d721c7   Herbert Xu   [NET]: Fix zero-s...
314

b4d9eda02   David S. Miller   [NET]: Revert skb...
315
316
  	/* Copy paged appendix. Hmm... why does this look so complicated? */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
317
  		int end;
9e903e085   Eric Dumazet   net: add skb frag...
318
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
319

547b792ca   Ilpo Järvinen   net: convert BUG_...
320
  		WARN_ON(start > offset + len);
1a028e507   David S. Miller   [NET]: Revert sk_...
321

9e903e085   Eric Dumazet   net: add skb frag...
322
  		end = start + skb_frag_size(frag);
b4d9eda02   David S. Miller   [NET]: Revert skb...
323
324
325
  		if ((copy = end - offset) > 0) {
  			int err;
  			u8  *vaddr;
ea2ab6937   Ian Campbell   net: convert core...
326
  			struct page *page = skb_frag_page(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
328
329
  
  			if (copy > len)
  				copy = len;
b4d9eda02   David S. Miller   [NET]: Revert skb...
330
  			vaddr = kmap(page);
1a028e507   David S. Miller   [NET]: Revert sk_...
331
332
  			err = memcpy_toiovec(to, vaddr + frag->page_offset +
  					     offset - start, copy);
b4d9eda02   David S. Miller   [NET]: Revert skb...
333
  			kunmap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
335
336
337
338
339
  			if (err)
  				goto fault;
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
340
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
341
  	}
b4d9eda02   David S. Miller   [NET]: Revert skb...
342

5b1a002ad   David S. Miller   datagram: Use fra...
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_datagram_iovec(frag_iter,
  						    offset - start,
  						    to, copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
b4d9eda02   David S. Miller   [NET]: Revert skb...
359
  		}
5b1a002ad   David S. Miller   datagram: Use fra...
360
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
  	}
b4d9eda02   David S. Miller   [NET]: Revert skb...
362
363
  	if (!len)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
364
365
366
  fault:
  	return -EFAULT;
  }
9e34a5b51   Eric Dumazet   net/core: EXPORT_...
367
  EXPORT_SYMBOL(skb_copy_datagram_iovec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
368

db543c1f9   Rusty Russell   net: skb_copy_dat...
369
  /**
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
   *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
   *	@skb: buffer to copy
   *	@offset: offset in the buffer to start copying from
   *	@to: io vector to copy to
   *	@to_offset: offset in the io vector to start copying to
   *	@len: amount of data to copy from buffer to iovec
   *
   *	Returns 0 or -EFAULT.
   *	Note: the iovec is not modified during the copy.
   */
  int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
  				  const struct iovec *to, int to_offset,
  				  int len)
  {
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
5b1a002ad   David S. Miller   datagram: Use fra...
386
  	struct sk_buff *frag_iter;
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
  		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  		to_offset += copy;
  	}
  
  	/* Copy paged appendix. Hmm... why does this look so complicated? */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  		int end;
9e903e085   Eric Dumazet   net: add skb frag...
403
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
404
405
  
  		WARN_ON(start > offset + len);
9e903e085   Eric Dumazet   net: add skb frag...
406
  		end = start + skb_frag_size(frag);
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
407
408
409
  		if ((copy = end - offset) > 0) {
  			int err;
  			u8  *vaddr;
ea2ab6937   Ian Campbell   net: convert core...
410
  			struct page *page = skb_frag_page(frag);
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
  
  			if (copy > len)
  				copy = len;
  			vaddr = kmap(page);
  			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
  						offset - start, to_offset, copy);
  			kunmap(page);
  			if (err)
  				goto fault;
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
  			to_offset += copy;
  		}
  		start = end;
  	}
5b1a002ad   David S. Miller   datagram: Use fra...
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_datagram_const_iovec(frag_iter,
  							  offset - start,
  							  to, to_offset,
  							  copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			to_offset += copy;
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
445
  		}
5b1a002ad   David S. Miller   datagram: Use fra...
446
  		start = end;
0a1ec07a6   Michael S. Tsirkin   net: skb_copy_dat...
447
448
449
450
451
452
453
454
455
456
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
  EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
  
  /**
db543c1f9   Rusty Russell   net: skb_copy_dat...
457
458
459
460
   *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
   *	@skb: buffer to copy
   *	@offset: offset in the buffer to start copying to
   *	@from: io vector to copy to
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
461
   *	@from_offset: offset in the io vector to start copying from
db543c1f9   Rusty Russell   net: skb_copy_dat...
462
463
464
   *	@len: amount of data to copy to buffer from iovec
   *
   *	Returns 0 or -EFAULT.
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
465
   *	Note: the iovec is not modified during the copy.
db543c1f9   Rusty Russell   net: skb_copy_dat...
466
467
   */
  int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
468
469
  				 const struct iovec *from, int from_offset,
  				 int len)
db543c1f9   Rusty Russell   net: skb_copy_dat...
470
471
472
  {
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
5b1a002ad   David S. Miller   datagram: Use fra...
473
  	struct sk_buff *frag_iter;
db543c1f9   Rusty Russell   net: skb_copy_dat...
474
475
476
477
478
  
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
d2d27bfd1   Sridhar Samudrala   net: Fix skb_copy...
479
480
  		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
  					copy))
db543c1f9   Rusty Russell   net: skb_copy_dat...
481
482
483
484
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
485
  		from_offset += copy;
db543c1f9   Rusty Russell   net: skb_copy_dat...
486
487
488
489
490
  	}
  
  	/* Copy paged appendix. Hmm... why does this look so complicated? */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  		int end;
9e903e085   Eric Dumazet   net: add skb frag...
491
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
db543c1f9   Rusty Russell   net: skb_copy_dat...
492
493
  
  		WARN_ON(start > offset + len);
9e903e085   Eric Dumazet   net: add skb frag...
494
  		end = start + skb_frag_size(frag);
db543c1f9   Rusty Russell   net: skb_copy_dat...
495
496
497
  		if ((copy = end - offset) > 0) {
  			int err;
  			u8  *vaddr;
ea2ab6937   Ian Campbell   net: convert core...
498
  			struct page *page = skb_frag_page(frag);
db543c1f9   Rusty Russell   net: skb_copy_dat...
499
500
501
502
  
  			if (copy > len)
  				copy = len;
  			vaddr = kmap(page);
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
503
504
505
  			err = memcpy_fromiovecend(vaddr + frag->page_offset +
  						  offset - start,
  						  from, from_offset, copy);
db543c1f9   Rusty Russell   net: skb_copy_dat...
506
507
508
509
510
511
512
  			kunmap(page);
  			if (err)
  				goto fault;
  
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
6f26c9a75   Michael S. Tsirkin   tun: fix tun_chr_...
513
  			from_offset += copy;
db543c1f9   Rusty Russell   net: skb_copy_dat...
514
515
516
  		}
  		start = end;
  	}
5b1a002ad   David S. Miller   datagram: Use fra...
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_datagram_from_iovec(frag_iter,
  							 offset - start,
  							 from,
  							 from_offset,
  							 copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			from_offset += copy;
db543c1f9   Rusty Russell   net: skb_copy_dat...
536
  		}
5b1a002ad   David S. Miller   datagram: Use fra...
537
  		start = end;
db543c1f9   Rusty Russell   net: skb_copy_dat...
538
539
540
541
542
543
544
545
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
  EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
546
547
  static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
  				      u8 __user *to, int len,
5084205fa   Al Viro   [NET]: Annotate c...
548
  				      __wsum *csump)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
550
  	int start = skb_headlen(skb);
1a028e507   David S. Miller   [NET]: Revert sk_...
551
  	int i, copy = start - offset;
5b1a002ad   David S. Miller   datagram: Use fra...
552
553
  	struct sk_buff *frag_iter;
  	int pos = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
  
  	/* Copy header. */
  	if (copy > 0) {
  		int err = 0;
  		if (copy > len)
  			copy = len;
  		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
  					       *csump, &err);
  		if (err)
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  		to += copy;
  		pos = copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
572
  		int end;
9e903e085   Eric Dumazet   net: add skb frag...
573
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
574

547b792ca   Ilpo Järvinen   net: convert BUG_...
575
  		WARN_ON(start > offset + len);
1a028e507   David S. Miller   [NET]: Revert sk_...
576

9e903e085   Eric Dumazet   net: add skb frag...
577
  		end = start + skb_frag_size(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
  		if ((copy = end - offset) > 0) {
5084205fa   Al Viro   [NET]: Annotate c...
579
  			__wsum csum2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
581
  			int err = 0;
  			u8  *vaddr;
ea2ab6937   Ian Campbell   net: convert core...
582
  			struct page *page = skb_frag_page(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
583
584
585
586
587
  
  			if (copy > len)
  				copy = len;
  			vaddr = kmap(page);
  			csum2 = csum_and_copy_to_user(vaddr +
1a028e507   David S. Miller   [NET]: Revert sk_...
588
589
  							frag->page_offset +
  							offset - start,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
591
592
593
594
595
596
597
598
599
600
  						      to, copy, 0, &err);
  			kunmap(page);
  			if (err)
  				goto fault;
  			*csump = csum_block_add(*csump, csum2, pos);
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
  			to += copy;
  			pos += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
601
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602
  	}
5b1a002ad   David S. Miller   datagram: Use fra...
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			__wsum csum2 = 0;
  			if (copy > len)
  				copy = len;
  			if (skb_copy_and_csum_datagram(frag_iter,
  						       offset - start,
  						       to, copy,
  						       &csum2))
  				goto fault;
  			*csump = csum_block_add(*csump, csum2, pos);
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			to += copy;
  			pos += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
624
  		}
5b1a002ad   David S. Miller   datagram: Use fra...
625
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
626
627
628
629
630
631
632
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
759e5d006   Herbert Xu   [UDP]: Clean up U...
633
  __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
fb286bb29   Herbert Xu   [NET]: Detect har...
634
  {
d3bc23e7e   Al Viro   [NET]: Annotate c...
635
  	__sum16 sum;
fb286bb29   Herbert Xu   [NET]: Detect har...
636

759e5d006   Herbert Xu   [UDP]: Clean up U...
637
  	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
fb286bb29   Herbert Xu   [NET]: Detect har...
638
  	if (likely(!sum)) {
84fa7933a   Patrick McHardy   [NET]: Replace CH...
639
  		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
fb286bb29   Herbert Xu   [NET]: Detect har...
640
641
642
643
644
  			netdev_rx_csum_fault(skb->dev);
  		skb->ip_summed = CHECKSUM_UNNECESSARY;
  	}
  	return sum;
  }
759e5d006   Herbert Xu   [UDP]: Clean up U...
645
646
647
648
649
650
  EXPORT_SYMBOL(__skb_checksum_complete_head);
  
  __sum16 __skb_checksum_complete(struct sk_buff *skb)
  {
  	return __skb_checksum_complete_head(skb, skb->len);
  }
fb286bb29   Herbert Xu   [NET]: Detect har...
651
  EXPORT_SYMBOL(__skb_checksum_complete);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
652
653
  /**
   *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
654
655
   *	@skb: skbuff
   *	@hlen: hardware length
67be2dd1b   Martin Waitz   [PATCH] DocBook: ...
656
   *	@iov: io vector
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
657
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
660
661
662
663
664
   *	Caller _must_ check that skb will fit to this iovec.
   *
   *	Returns: 0       - success.
   *		 -EINVAL - checksum failure.
   *		 -EFAULT - fault during copy. Beware, in this case iovec
   *			   can be modified!
   */
fb286bb29   Herbert Xu   [NET]: Detect har...
665
  int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
  				     int hlen, struct iovec *iov)
  {
d3bc23e7e   Al Viro   [NET]: Annotate c...
668
  	__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
669
  	int chunk = skb->len - hlen;
ef8aef55c   Herbert Xu   [NET]: Do not der...
670
671
  	if (!chunk)
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
672
673
674
675
676
677
678
  	/* Skip filled elements.
  	 * Pretty silly, look at memcpy_toiovec, though 8)
  	 */
  	while (!iov->iov_len)
  		iov++;
  
  	if (iov->iov_len < chunk) {
fb286bb29   Herbert Xu   [NET]: Detect har...
679
  		if (__skb_checksum_complete(skb))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
680
681
682
683
684
685
686
687
  			goto csum_error;
  		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
  			goto fault;
  	} else {
  		csum = csum_partial(skb->data, hlen, skb->csum);
  		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
  					       chunk, &csum))
  			goto fault;
d3bc23e7e   Al Viro   [NET]: Annotate c...
688
  		if (csum_fold(csum))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
  			goto csum_error;
84fa7933a   Patrick McHardy   [NET]: Replace CH...
690
  		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
fb286bb29   Herbert Xu   [NET]: Detect har...
691
  			netdev_rx_csum_fault(skb->dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
693
694
695
696
697
698
699
700
  		iov->iov_len -= chunk;
  		iov->iov_base += chunk;
  	}
  	return 0;
  csum_error:
  	return -EINVAL;
  fault:
  	return -EFAULT;
  }
9e34a5b51   Eric Dumazet   net/core: EXPORT_...
701
  EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
704
  
  /**
   * 	datagram_poll - generic datagram poll
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
705
706
707
   *	@file: file struct
   *	@sock: socket
   *	@wait: poll table
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
709
710
711
712
713
714
715
716
717
718
719
720
721
   *
   *	Datagram poll: Again totally generic. This also handles
   *	sequenced packet sockets providing the socket receive queue
   *	is only ever holding data ready to receive.
   *
   *	Note: when you _don't_ use this routine for this protocol,
   *	and you use a different write policy from sock_writeable()
   *	then please supply your own write_space callback.
   */
  unsigned int datagram_poll(struct file *file, struct socket *sock,
  			   poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	unsigned int mask;
aa3951451   Eric Dumazet   net: sk_sleep() h...
722
  	sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723
724
725
726
727
  	mask = 0;
  
  	/* exceptional events? */
  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
  		mask |= POLLERR;
f348d70a3   Davide Libenzi   [PATCH] POLLRDHUP...
728
  	if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980fc   Eric Dumazet   net: poll() optim...
729
  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
732
733
  	if (sk->sk_shutdown == SHUTDOWN_MASK)
  		mask |= POLLHUP;
  
  	/* readable? */
db40980fc   Eric Dumazet   net: poll() optim...
734
  	if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
  		mask |= POLLIN | POLLRDNORM;
  
  	/* Connection-based need to check for termination and startup */
  	if (connection_based(sk)) {
  		if (sk->sk_state == TCP_CLOSE)
  			mask |= POLLHUP;
  		/* connection hasn't started yet? */
  		if (sk->sk_state == TCP_SYN_SENT)
  			return mask;
  	}
  
  	/* writable? */
  	if (sock_writeable(sk))
  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
  	else
  		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
  
  	return mask;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
  EXPORT_SYMBOL(datagram_poll);