Blame view

net/core/datagram.c 19.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
  /*
   *	SUCS NET3:
   *
   *	Generic datagram handling routines. These are generic for all
   *	protocols. Possibly a generic IP version on top of these would
   *	make sense. Not tonight however 8-).
   *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
   *	NetROM layer all have identical poll code and mostly
   *	identical recvmsg() code. So we share it here. The poll was
   *	shared before but buried in udp.c so I moved it.
   *
113aa838e   Alan Cox   net: Rationalise ...
12
   *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
   *						     udp.c code)
   *
   *	Fixes:
   *		Alan Cox	:	NULL return from skb_peek_copy()
   *					understood
   *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
   *					skb_peek_copy stuff.
   *		Alan Cox	:	Added support for SOCK_SEQPACKET.
   *					IPX can no longer use the SO_TYPE hack
   *					but AX.25 now works right, and SPX is
   *					feasible.
   *		Alan Cox	:	Fixed write poll of non IP protocol
   *					crash.
   *		Florian  La Roche:	Changed for my new skbuff handling.
   *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
   *		Linus Torvalds	:	BSD semantic fixes.
   *		Alan Cox	:	Datagram iovec handling
   *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
   *		Alan Cox	:	POSIXisms
   *		Pete Wyckoff    :       Unconnected accept() fix.
   *
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <asm/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
43
44
  #include <linux/mm.h>
  #include <linux/interrupt.h>
  #include <linux/errno.h>
  #include <linux/sched.h>
  #include <linux/inet.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
  #include <linux/netdevice.h>
  #include <linux/rtnetlink.h>
  #include <linux/poll.h>
  #include <linux/highmem.h>
3305b80c2   Herbert Xu   [IP]: Simplify an...
49
  #include <linux/spinlock.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
50
  #include <linux/slab.h>
0433547aa   Jason Wang   net: use release_...
51
  #include <linux/pagemap.h>
a8f820aa4   Herbert Xu   inet: Add skb_cop...
52
  #include <linux/uio.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
  
  #include <net/protocol.h>
  #include <linux/skbuff.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
56

c752f0739   Arnaldo Carvalho de Melo   [TCP]: Move the t...
57
58
59
  #include <net/checksum.h>
  #include <net/sock.h>
  #include <net/tcp_states.h>
e9b3cc1b3   Neil Horman   net: skb ftracer ...
60
  #include <trace/events/skb.h>
076bb0c82   Eliezer Tamir   net: rename inclu...
61
  #include <net/busy_poll.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
64
65
66
67
68
69
  
  /*
   *	Is a socket 'connection oriented' ?
   */
  static inline int connection_based(struct sock *sk)
  {
  	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
  }
95c961747   Eric Dumazet   net: cleanup unsi...
70
  static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
bf368e4e7   Eric Dumazet   net: Avoid extra ...
71
72
73
74
75
76
77
78
79
80
81
  				  void *key)
  {
  	unsigned long bits = (unsigned long)key;
  
  	/*
  	 * Avoid a wakeup if event not interesting for us
  	 */
  	if (bits && !(bits & (POLLIN | POLLERR)))
  		return 0;
  	return autoremove_wake_function(wait, mode, sync, key);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  /*
39cc86130   Benjamin Poirier   unix/dgram: fix p...
83
   * Wait for the last received packet to be different from skb
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
84
   */
ea3793ee2   Rainer Weikusat   core: enable more...
85
86
  int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
  				const struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
87
88
  {
  	int error;
bf368e4e7   Eric Dumazet   net: Avoid extra ...
89
  	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90

aa3951451   Eric Dumazet   net: sk_sleep() h...
91
  	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
92
93
94
95
96
  
  	/* Socket errors? */
  	error = sock_error(sk);
  	if (error)
  		goto out_err;
39cc86130   Benjamin Poirier   unix/dgram: fix p...
97
  	if (sk->sk_receive_queue.prev != skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  		goto out;
  
  	/* Socket shut down? */
  	if (sk->sk_shutdown & RCV_SHUTDOWN)
  		goto out_noerr;
  
  	/* Sequenced packets can come disconnected.
  	 * If so we report the problem
  	 */
  	error = -ENOTCONN;
  	if (connection_based(sk) &&
  	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
  		goto out_err;
  
  	/* handle signals */
  	if (signal_pending(current))
  		goto interrupted;
  
  	error = 0;
  	*timeo_p = schedule_timeout(*timeo_p);
  out:
aa3951451   Eric Dumazet   net: sk_sleep() h...
119
  	finish_wait(sk_sleep(sk), &wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
120
121
122
123
124
125
126
127
128
129
130
  	return error;
  interrupted:
  	error = sock_intr_errno(*timeo_p);
  out_err:
  	*err = error;
  	goto out;
  out_noerr:
  	*err = 0;
  	error = 1;
  	goto out;
  }
ea3793ee2   Rainer Weikusat   core: enable more...
131
  EXPORT_SYMBOL(__skb_wait_for_more_packets);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
132

a0a2a6602   Herbert Xu   net: Fix skb_set_...
133
  static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
738ac1ebb   Herbert Xu   net: Clone skb be...
134
135
136
137
  {
  	struct sk_buff *nskb;
  
  	if (skb->peeked)
a0a2a6602   Herbert Xu   net: Fix skb_set_...
138
  		return skb;
738ac1ebb   Herbert Xu   net: Clone skb be...
139
140
141
142
143
144
145
  
  	/* We have to unshare an skb before modifying it. */
  	if (!skb_shared(skb))
  		goto done;
  
  	nskb = skb_clone(skb, GFP_ATOMIC);
  	if (!nskb)
a0a2a6602   Herbert Xu   net: Fix skb_set_...
146
  		return ERR_PTR(-ENOMEM);
738ac1ebb   Herbert Xu   net: Clone skb be...
147
148
149
150
151
152
153
154
155
156
157
  
  	skb->prev->next = nskb;
  	skb->next->prev = nskb;
  	nskb->prev = skb->prev;
  	nskb->next = skb->next;
  
  	consume_skb(skb);
  	skb = nskb;
  
  done:
  	skb->peeked = 1;
a0a2a6602   Herbert Xu   net: Fix skb_set_...
158
  	return skb;
738ac1ebb   Herbert Xu   net: Clone skb be...
159
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
160
  /**
ea3793ee2   Rainer Weikusat   core: enable more...
161
   *	__skb_try_recv_datagram - Receive a datagram skbuff
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
162
163
   *	@sk: socket
   *	@flags: MSG_ flags
39cc86130   Benjamin Poirier   unix/dgram: fix p...
164
   *	@peeked: returns non-zero if this packet has been seen before
3f518bf74   Pavel Emelyanov   datagram: Add off...
165
166
   *	@off: an offset in bytes to peek skb from. Returns an offset
   *	      within an skb where data actually starts
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
167
   *	@err: error code returned
ea3793ee2   Rainer Weikusat   core: enable more...
168
169
   *	@last: set to last peeked message to inform the wait function
   *	       what to look for when peeking
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
170
171
172
173
174
175
176
   *
   *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
   *	and possible races. This replaces identical code in packet, raw and
   *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
   *	the long standing peek and read race for datagram sockets. If you
   *	alter this routine remember it must be re-entrant.
   *
ea3793ee2   Rainer Weikusat   core: enable more...
177
178
179
180
181
   *	This function will lock the socket if a skb is returned, so
   *	the caller needs to unlock the socket in that case (usually by
   *	calling skb_free_datagram). Returns NULL with *err set to
   *	-EAGAIN if no data was available or to some other value if an
   *	error was detected.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
182
183
184
185
186
187
188
189
190
191
192
193
194
   *
   *	* It does not lock socket since today. This function is
   *	* free of race conditions. This measure should/can improve
   *	* significantly datagram socket latencies at high loads,
   *	* when data copying to user space takes lots of time.
   *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
   *	*  8) Great win.)
   *	*			                    --ANK (980729)
   *
   *	The order of the tests when we find no data waiting are specified
   *	quite explicitly by POSIX 1003.1g, don't change them without having
   *	the standard around please.
   */
ea3793ee2   Rainer Weikusat   core: enable more...
195
196
197
  struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
  					int *peeked, int *off, int *err,
  					struct sk_buff **last)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
198
  {
738ac1ebb   Herbert Xu   net: Clone skb be...
199
  	struct sk_buff_head *queue = &sk->sk_receive_queue;
ea3793ee2   Rainer Weikusat   core: enable more...
200
  	struct sk_buff *skb;
738ac1ebb   Herbert Xu   net: Clone skb be...
201
  	unsigned long cpu_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
202
203
204
205
206
207
208
  	/*
  	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
  	 */
  	int error = sock_error(sk);
  
  	if (error)
  		goto no_packet;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
212
213
  	do {
  		/* Again only user level code calls this function, so nothing
  		 * interrupt level will suddenly eat the receive_queue.
  		 *
  		 * Look at current nfs client by the way...
8917a3c0b   David Shwatrz   Fix a typo in dat...
214
  		 * However, this function was correct in any case. 8)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215
  		 */
39cc86130   Benjamin Poirier   unix/dgram: fix p...
216
  		int _off = *off;
a59322be0   Herbert Xu   [UDP]: Only incre...
217

ea3793ee2   Rainer Weikusat   core: enable more...
218
  		*last = (struct sk_buff *)queue;
4934b0329   Pavel Emelyanov   datagram: Factor ...
219
  		spin_lock_irqsave(&queue->lock, cpu_flags);
3f518bf74   Pavel Emelyanov   datagram: Add off...
220
  		skb_queue_walk(queue, skb) {
ea3793ee2   Rainer Weikusat   core: enable more...
221
  			*last = skb;
a59322be0   Herbert Xu   [UDP]: Only incre...
222
223
  			*peeked = skb->peeked;
  			if (flags & MSG_PEEK) {
39cc86130   Benjamin Poirier   unix/dgram: fix p...
224
  				if (_off >= skb->len && (skb->len || _off ||
add05ad4e   Benjamin Poirier   unix/dgram: peek ...
225
  							 skb->peeked)) {
39cc86130   Benjamin Poirier   unix/dgram: fix p...
226
  					_off -= skb->len;
3f518bf74   Pavel Emelyanov   datagram: Add off...
227
228
  					continue;
  				}
738ac1ebb   Herbert Xu   net: Clone skb be...
229

a0a2a6602   Herbert Xu   net: Fix skb_set_...
230
231
  				skb = skb_set_peeked(skb);
  				error = PTR_ERR(skb);
ea3793ee2   Rainer Weikusat   core: enable more...
232
233
234
235
236
  				if (IS_ERR(skb)) {
  					spin_unlock_irqrestore(&queue->lock,
  							       cpu_flags);
  					goto no_packet;
  				}
738ac1ebb   Herbert Xu   net: Clone skb be...
237

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
238
  				atomic_inc(&skb->users);
a59322be0   Herbert Xu   [UDP]: Only incre...
239
  			} else
4934b0329   Pavel Emelyanov   datagram: Factor ...
240
  				__skb_unlink(skb, queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241

3f518bf74   Pavel Emelyanov   datagram: Add off...
242
  			spin_unlock_irqrestore(&queue->lock, cpu_flags);
39cc86130   Benjamin Poirier   unix/dgram: fix p...
243
  			*off = _off;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244
  			return skb;
3f518bf74   Pavel Emelyanov   datagram: Add off...
245
  		}
ea3793ee2   Rainer Weikusat   core: enable more...
246

3f518bf74   Pavel Emelyanov   datagram: Add off...
247
  		spin_unlock_irqrestore(&queue->lock, cpu_flags);
ea3793ee2   Rainer Weikusat   core: enable more...
248
249
  	} while (sk_can_busy_loop(sk) &&
  		 sk_busy_loop(sk, flags & MSG_DONTWAIT));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250

ea3793ee2   Rainer Weikusat   core: enable more...
251
  	error = -EAGAIN;
a5b50476f   Eliezer Tamir   udp: add low late...
252

ea3793ee2   Rainer Weikusat   core: enable more...
253
254
255
256
257
  no_packet:
  	*err = error;
  	return NULL;
  }
  EXPORT_SYMBOL(__skb_try_recv_datagram);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258

ea3793ee2   Rainer Weikusat   core: enable more...
259
260
261
262
263
  struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
  				    int *peeked, int *off, int *err)
  {
  	struct sk_buff *skb, *last;
  	long timeo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264

ea3793ee2   Rainer Weikusat   core: enable more...
265
266
267
268
269
270
271
  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  
  	do {
  		skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
  					      &last);
  		if (skb)
  			return skb;
760a43224   Rainer Weikusat   net: Fix inverted...
272
  		if (*err != -EAGAIN)
ea3793ee2   Rainer Weikusat   core: enable more...
273
274
275
  			break;
  	} while (timeo &&
  		!__skb_wait_for_more_packets(sk, err, &timeo, last));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
276

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277
278
  	return NULL;
  }
a59322be0   Herbert Xu   [UDP]: Only incre...
279
  EXPORT_SYMBOL(__skb_recv_datagram);
95c961747   Eric Dumazet   net: cleanup unsi...
280
  struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
a59322be0   Herbert Xu   [UDP]: Only incre...
281
282
  				  int noblock, int *err)
  {
3f518bf74   Pavel Emelyanov   datagram: Add off...
283
  	int peeked, off = 0;
a59322be0   Herbert Xu   [UDP]: Only incre...
284
285
  
  	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
3f518bf74   Pavel Emelyanov   datagram: Add off...
286
  				   &peeked, &off, err);
a59322be0   Herbert Xu   [UDP]: Only incre...
287
  }
9e34a5b51   Eric Dumazet   net/core: EXPORT_...
288
  EXPORT_SYMBOL(skb_recv_datagram);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
291
  
  void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
  {
ead2ceb0e   Neil Horman   Network Drop Moni...
292
  	consume_skb(skb);
270acefaf   Eric Dumazet   net: sk_free_data...
293
  	sk_mem_reclaim_partial(sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
  }
9d410c796   Eric Dumazet   net: fix sk_forwa...
295
  EXPORT_SYMBOL(skb_free_datagram);
627d2d6b5   samanthakumar   udp: enable MSG_P...
296
  void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
9d410c796   Eric Dumazet   net: fix sk_forwa...
297
  {
8a74ad60a   Eric Dumazet   net: fix lock_soc...
298
  	bool slow;
93bb64eac   Eric Dumazet   net: skb_free_dat...
299
300
  	if (likely(atomic_read(&skb->users) == 1))
  		smp_rmb();
627d2d6b5   samanthakumar   udp: enable MSG_P...
301
302
  	else if (likely(!atomic_dec_and_test(&skb->users))) {
  		sk_peek_offset_bwd(sk, len);
93bb64eac   Eric Dumazet   net: skb_free_dat...
303
  		return;
627d2d6b5   samanthakumar   udp: enable MSG_P...
304
  	}
93bb64eac   Eric Dumazet   net: skb_free_dat...
305

8a74ad60a   Eric Dumazet   net: fix lock_soc...
306
  	slow = lock_sock_fast(sk);
627d2d6b5   samanthakumar   udp: enable MSG_P...
307
  	sk_peek_offset_bwd(sk, len);
4b0b72f7d   Eric Dumazet   net: speedup udp ...
308
309
  	skb_orphan(skb);
  	sk_mem_reclaim_partial(sk);
8a74ad60a   Eric Dumazet   net: fix lock_soc...
310
  	unlock_sock_fast(sk, slow);
4b0b72f7d   Eric Dumazet   net: speedup udp ...
311

93bb64eac   Eric Dumazet   net: skb_free_dat...
312
313
  	/* skb is now orphaned, can be freed outside of locked section */
  	__kfree_skb(skb);
9d410c796   Eric Dumazet   net: fix sk_forwa...
314
  }
627d2d6b5   samanthakumar   udp: enable MSG_P...
315
  EXPORT_SYMBOL(__skb_free_datagram_locked);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
316
317
  
  /**
3305b80c2   Herbert Xu   [IP]: Simplify an...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
   *	skb_kill_datagram - Free a datagram skbuff forcibly
   *	@sk: socket
   *	@skb: datagram skbuff
   *	@flags: MSG_ flags
   *
   *	This function frees a datagram skbuff that was received by
   *	skb_recv_datagram.  The flags argument must match the one
   *	used for skb_recv_datagram.
   *
   *	If the MSG_PEEK flag is set, and the packet is still on the
   *	receive queue of the socket, it will be taken off the queue
   *	before it is freed.
   *
   *	This function currently only disables BH when acquiring the
   *	sk_receive_queue lock.  Therefore it must not be used in a
   *	context where that lock is acquired in an IRQ context.
27ab25686   Herbert Xu   [UDP]: Avoid repe...
334
335
   *
   *	It returns 0 if the packet was removed by us.
3305b80c2   Herbert Xu   [IP]: Simplify an...
336
   */
27ab25686   Herbert Xu   [UDP]: Avoid repe...
337
  int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
3305b80c2   Herbert Xu   [IP]: Simplify an...
338
  {
27ab25686   Herbert Xu   [UDP]: Avoid repe...
339
  	int err = 0;
3305b80c2   Herbert Xu   [IP]: Simplify an...
340
  	if (flags & MSG_PEEK) {
27ab25686   Herbert Xu   [UDP]: Avoid repe...
341
  		err = -ENOENT;
3305b80c2   Herbert Xu   [IP]: Simplify an...
342
343
344
345
  		spin_lock_bh(&sk->sk_receive_queue.lock);
  		if (skb == skb_peek(&sk->sk_receive_queue)) {
  			__skb_unlink(skb, &sk->sk_receive_queue);
  			atomic_dec(&skb->users);
27ab25686   Herbert Xu   [UDP]: Avoid repe...
346
  			err = 0;
3305b80c2   Herbert Xu   [IP]: Simplify an...
347
348
349
  		}
  		spin_unlock_bh(&sk->sk_receive_queue.lock);
  	}
61de71c67   John Dykstra   Network Drop Moni...
350
  	kfree_skb(skb);
8edf19c2f   Eric Dumazet   net: sk_drops con...
351
  	atomic_inc(&sk->sk_drops);
61de71c67   John Dykstra   Network Drop Moni...
352
  	sk_mem_reclaim_partial(sk);
27ab25686   Herbert Xu   [UDP]: Avoid repe...
353
  	return err;
3305b80c2   Herbert Xu   [IP]: Simplify an...
354
  }
3305b80c2   Herbert Xu   [IP]: Simplify an...
355
356
357
  EXPORT_SYMBOL(skb_kill_datagram);
  
  /**
a8f820aa4   Herbert Xu   inet: Add skb_cop...
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
   *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
   *	@skb: buffer to copy
   *	@offset: offset in the buffer to start copying from
   *	@to: iovec iterator to copy to
   *	@len: amount of data to copy from buffer to iovec
   */
  int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
  			   struct iov_iter *to, int len)
  {
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
  	struct sk_buff *frag_iter;
  
  	trace_skb_copy_datagram_iovec(skb, len);
  
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
  		if (copy_to_iter(skb->data + offset, copy, to) != copy)
  			goto short_copy;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  	}
  
  	/* Copy paged appendix. Hmm... why does this look so complicated? */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  		int end;
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  
  		WARN_ON(start > offset + len);
  
  		end = start + skb_frag_size(frag);
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (copy_page_to_iter(skb_frag_page(frag),
  					      frag->page_offset + offset -
  					      start, copy, to) != copy)
  				goto short_copy;
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
  		}
  		start = end;
  	}
  
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_datagram_iter(frag_iter, offset - start,
  						   to, copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  		}
  		start = end;
  	}
  	if (!len)
  		return 0;
  
  	/* This is not really a user copy fault, but rather someone
  	 * gave us a bogus length on the skb.  We should probably
  	 * print a warning here as it may indicate a kernel bug.
  	 */
  
  fault:
  	return -EFAULT;
  
  short_copy:
  	if (iov_iter_count(to))
  		goto fault;
  
  	return 0;
  }
  EXPORT_SYMBOL(skb_copy_datagram_iter);
  
  /**
8feb2fb2b   Al Viro   switch AF_PACKET ...
444
   *	skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
db543c1f9   Rusty Russell   net: skb_copy_dat...
445
446
   *	@skb: buffer to copy
   *	@offset: offset in the buffer to start copying to
8feb2fb2b   Al Viro   switch AF_PACKET ...
447
   *	@from: the copy source
db543c1f9   Rusty Russell   net: skb_copy_dat...
448
449
450
   *	@len: amount of data to copy to buffer from iovec
   *
   *	Returns 0 or -EFAULT.
db543c1f9   Rusty Russell   net: skb_copy_dat...
451
   */
3a654f975   Al Viro   new helpers: skb_...
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
  int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
  				 struct iov_iter *from,
  				 int len)
  {
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
  	struct sk_buff *frag_iter;
  
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
  		if (copy_from_iter(skb->data + offset, copy, from) != copy)
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  	}
  
  	/* Copy paged appendix. Hmm... why does this look so complicated? */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  		int end;
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  
  		WARN_ON(start > offset + len);
  
  		end = start + skb_frag_size(frag);
  		if ((copy = end - offset) > 0) {
  			size_t copied;
  
  			if (copy > len)
  				copy = len;
  			copied = copy_page_from_iter(skb_frag_page(frag),
  					  frag->page_offset + offset - start,
  					  copy, from);
  			if (copied != copy)
  				goto fault;
  
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
  		}
  		start = end;
  	}
  
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_datagram_from_iter(frag_iter,
  							offset - start,
  							from, copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  		}
  		start = end;
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
  EXPORT_SYMBOL(skb_copy_datagram_from_iter);
c3bdeb5c7   Jason Wang   net: move zerocop...
523
  /**
195e952d0   Al Viro   kill zerocopy_sg_...
524
   *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
c3bdeb5c7   Jason Wang   net: move zerocop...
525
   *	@skb: buffer to copy
195e952d0   Al Viro   kill zerocopy_sg_...
526
   *	@from: the source to copy from
c3bdeb5c7   Jason Wang   net: move zerocop...
527
528
529
530
531
   *
   *	The function will first copy up to headlen, and then pin the userspace
   *	pages and build frags through them.
   *
   *	Returns 0, -EFAULT or -EMSGSIZE.
c3bdeb5c7   Jason Wang   net: move zerocop...
532
   */
3a654f975   Al Viro   new helpers: skb_...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
  int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
  {
  	int len = iov_iter_count(from);
  	int copy = min_t(int, skb_headlen(skb), len);
  	int frag = 0;
  
  	/* copy up to skb headlen */
  	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
  		return -EFAULT;
  
  	while (iov_iter_count(from)) {
  		struct page *pages[MAX_SKB_FRAGS];
  		size_t start;
  		ssize_t copied;
  		unsigned long truesize;
  		int n = 0;
  
  		if (frag == MAX_SKB_FRAGS)
  			return -EMSGSIZE;
  
  		copied = iov_iter_get_pages(from, pages, ~0U,
  					    MAX_SKB_FRAGS - frag, &start);
  		if (copied < 0)
  			return -EFAULT;
  
  		iov_iter_advance(from, copied);
  
  		truesize = PAGE_ALIGN(copied + start);
  		skb->data_len += copied;
  		skb->len += copied;
  		skb->truesize += truesize;
  		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
  		while (copied) {
  			int size = min_t(int, copied, PAGE_SIZE - start);
  			skb_fill_page_desc(skb, frag++, pages[n], start, size);
  			start = 0;
  			copied -= size;
  			n++;
  		}
  	}
  	return 0;
  }
  EXPORT_SYMBOL(zerocopy_sg_from_iter);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
e5a4b0bb8   Al Viro   switch memcpy_to_...
577
  				      struct iov_iter *to, int len,
5084205fa   Al Viro   [NET]: Annotate c...
578
  				      __wsum *csump)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
579
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
580
  	int start = skb_headlen(skb);
1a028e507   David S. Miller   [NET]: Revert sk_...
581
  	int i, copy = start - offset;
5b1a002ad   David S. Miller   datagram: Use fra...
582
583
  	struct sk_buff *frag_iter;
  	int pos = 0;
e5a4b0bb8   Al Viro   switch memcpy_to_...
584
  	int n;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
587
  
  	/* Copy header. */
  	if (copy > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588
589
  		if (copy > len)
  			copy = len;
e5a4b0bb8   Al Viro   switch memcpy_to_...
590
591
  		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
  		if (n != copy)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592
593
594
595
  			goto fault;
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
596
597
598
599
  		pos = copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
600
  		int end;
9e903e085   Eric Dumazet   net: add skb frag...
601
  		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602

547b792ca   Ilpo Järvinen   net: convert BUG_...
603
  		WARN_ON(start > offset + len);
1a028e507   David S. Miller   [NET]: Revert sk_...
604

9e903e085   Eric Dumazet   net: add skb frag...
605
  		end = start + skb_frag_size(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
  		if ((copy = end - offset) > 0) {
e5a4b0bb8   Al Viro   switch memcpy_to_...
607
  			__wsum csum2 = 0;
ea2ab6937   Ian Campbell   net: convert core...
608
  			struct page *page = skb_frag_page(frag);
e5a4b0bb8   Al Viro   switch memcpy_to_...
609
  			u8  *vaddr = kmap(page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
611
612
  
  			if (copy > len)
  				copy = len;
e5a4b0bb8   Al Viro   switch memcpy_to_...
613
614
615
  			n = csum_and_copy_to_iter(vaddr + frag->page_offset +
  						  offset - start, copy,
  						  &csum2, to);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
616
  			kunmap(page);
e5a4b0bb8   Al Viro   switch memcpy_to_...
617
  			if (n != copy)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
620
621
622
  				goto fault;
  			*csump = csum_block_add(*csump, csum2, pos);
  			if (!(len -= copy))
  				return 0;
  			offset += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
  			pos += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
625
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
626
  	}
5b1a002ad   David S. Miller   datagram: Use fra...
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
  	skb_walk_frags(skb, frag_iter) {
  		int end;
  
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			__wsum csum2 = 0;
  			if (copy > len)
  				copy = len;
  			if (skb_copy_and_csum_datagram(frag_iter,
  						       offset - start,
  						       to, copy,
  						       &csum2))
  				goto fault;
  			*csump = csum_block_add(*csump, csum2, pos);
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
5b1a002ad   David S. Miller   datagram: Use fra...
646
  			pos += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
647
  		}
5b1a002ad   David S. Miller   datagram: Use fra...
648
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649
650
651
652
653
654
655
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
759e5d006   Herbert Xu   [UDP]: Clean up U...
656
  __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
fb286bb29   Herbert Xu   [NET]: Detect har...
657
  {
d3bc23e7e   Al Viro   [NET]: Annotate c...
658
  	__sum16 sum;
fb286bb29   Herbert Xu   [NET]: Detect har...
659

759e5d006   Herbert Xu   [UDP]: Clean up U...
660
  	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
46fb51eb9   Tom Herbert   net: Fix save sof...
661
662
663
664
665
  	if (likely(!sum)) {
  		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
  		    !skb->csum_complete_sw)
  			netdev_rx_csum_fault(skb->dev);
  	}
89c22d8c3   Herbert Xu   net: Fix skb csum...
666
667
  	if (!skb_shared(skb))
  		skb->csum_valid = !sum;
fb286bb29   Herbert Xu   [NET]: Detect har...
668
669
  	return sum;
  }
759e5d006   Herbert Xu   [UDP]: Clean up U...
670
671
672
673
  EXPORT_SYMBOL(__skb_checksum_complete_head);
  
  __sum16 __skb_checksum_complete(struct sk_buff *skb)
  {
46fb51eb9   Tom Herbert   net: Fix save sof...
674
675
676
677
678
679
680
681
682
683
684
685
  	__wsum csum;
  	__sum16 sum;
  
  	csum = skb_checksum(skb, 0, skb->len, 0);
  
  	/* skb->csum holds pseudo checksum */
  	sum = csum_fold(csum_add(skb->csum, csum));
  	if (likely(!sum)) {
  		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
  		    !skb->csum_complete_sw)
  			netdev_rx_csum_fault(skb->dev);
  	}
89c22d8c3   Herbert Xu   net: Fix skb csum...
686
687
688
689
690
691
692
  	if (!skb_shared(skb)) {
  		/* Save full packet checksum */
  		skb->csum = csum;
  		skb->ip_summed = CHECKSUM_COMPLETE;
  		skb->csum_complete_sw = 1;
  		skb->csum_valid = !sum;
  	}
46fb51eb9   Tom Herbert   net: Fix save sof...
693
694
  
  	return sum;
759e5d006   Herbert Xu   [UDP]: Clean up U...
695
  }
fb286bb29   Herbert Xu   [NET]: Detect har...
696
  EXPORT_SYMBOL(__skb_checksum_complete);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
  /**
e5a4b0bb8   Al Viro   switch memcpy_to_...
698
   *	skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
699
700
   *	@skb: skbuff
   *	@hlen: hardware length
e5a4b0bb8   Al Viro   switch memcpy_to_...
701
   *	@msg: destination
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
702
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
   *	Caller _must_ check that skb will fit to this iovec.
   *
   *	Returns: 0       - success.
   *		 -EINVAL - checksum failure.
e5a4b0bb8   Al Viro   switch memcpy_to_...
707
   *		 -EFAULT - fault during copy.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
   */
e5a4b0bb8   Al Viro   switch memcpy_to_...
709
710
  int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
  				   int hlen, struct msghdr *msg)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
  {
d3bc23e7e   Al Viro   [NET]: Annotate c...
712
  	__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
  	int chunk = skb->len - hlen;
ef8aef55c   Herbert Xu   [NET]: Do not der...
714
715
  	if (!chunk)
  		return 0;
01e97e651   Al Viro   new helper: msg_d...
716
  	if (msg_data_left(msg) < chunk) {
fb286bb29   Herbert Xu   [NET]: Detect har...
717
  		if (__skb_checksum_complete(skb))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  			goto csum_error;
e5a4b0bb8   Al Viro   switch memcpy_to_...
719
  		if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
722
  			goto fault;
  	} else {
  		csum = csum_partial(skb->data, hlen, skb->csum);
e5a4b0bb8   Al Viro   switch memcpy_to_...
723
  		if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
  					       chunk, &csum))
  			goto fault;
d3bc23e7e   Al Viro   [NET]: Annotate c...
726
  		if (csum_fold(csum))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
727
  			goto csum_error;
84fa7933a   Patrick McHardy   [NET]: Replace CH...
728
  		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
fb286bb29   Herbert Xu   [NET]: Detect har...
729
  			netdev_rx_csum_fault(skb->dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
730
731
732
733
734
735
736
  	}
  	return 0;
  csum_error:
  	return -EINVAL;
  fault:
  	return -EFAULT;
  }
e5a4b0bb8   Al Viro   switch memcpy_to_...
737
  EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
738
739
740
  
  /**
   * 	datagram_poll - generic datagram poll
4dc3b16ba   Pavel Pisa   [PATCH] DocBook: ...
741
742
743
   *	@file: file struct
   *	@sock: socket
   *	@wait: poll table
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744
745
746
747
748
749
750
751
752
753
754
755
756
757
   *
   *	Datagram poll: Again totally generic. This also handles
   *	sequenced packet sockets providing the socket receive queue
   *	is only ever holding data ready to receive.
   *
   *	Note: when you _don't_ use this routine for this protocol,
   *	and you use a different write policy from sock_writeable()
   *	then please supply your own write_space callback.
   */
  unsigned int datagram_poll(struct file *file, struct socket *sock,
  			   poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	unsigned int mask;
aa3951451   Eric Dumazet   net: sk_sleep() h...
758
  	sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759
760
761
762
  	mask = 0;
  
  	/* exceptional events? */
  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
7d4c04fc1   Keller, Jacob E   net: add option t...
763
  		mask |= POLLERR |
8facd5fb7   Jacob Keller   net: fix smatch w...
764
  			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc1   Keller, Jacob E   net: add option t...
765

f348d70a3   Davide Libenzi   [PATCH] POLLRDHUP...
766
  	if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980fc   Eric Dumazet   net: poll() optim...
767
  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
770
771
  	if (sk->sk_shutdown == SHUTDOWN_MASK)
  		mask |= POLLHUP;
  
  	/* readable? */
db40980fc   Eric Dumazet   net: poll() optim...
772
  	if (!skb_queue_empty(&sk->sk_receive_queue))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
  		mask |= POLLIN | POLLRDNORM;
  
  	/* Connection-based need to check for termination and startup */
  	if (connection_based(sk)) {
  		if (sk->sk_state == TCP_CLOSE)
  			mask |= POLLHUP;
  		/* connection hasn't started yet? */
  		if (sk->sk_state == TCP_SYN_SENT)
  			return mask;
  	}
  
  	/* writable? */
  	if (sock_writeable(sk))
  		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
  	else
9cd3e072b   Eric Dumazet   net: rename SOCK_...
788
  		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
790
791
  
  	return mask;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
  EXPORT_SYMBOL(datagram_poll);