Blame view

net/core/skbuff.c 134 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
  /*
   *	Routines having to do with the 'struct sk_buff' memory handlers.
   *
113aa838e   Alan Cox   net: Rationalise ...
4
   *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
   *			Florian La Roche <rzsfl@rz.uni-sb.de>
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
   *	Fixes:
   *		Alan Cox	:	Fixed the worst of the load
   *					balancer bugs.
   *		Dave Platt	:	Interrupt stacking fix.
   *	Richard Kooijman	:	Timestamp fixes.
   *		Alan Cox	:	Changed buffer format.
   *		Alan Cox	:	destructor hook for AF_UNIX etc.
   *		Linus Torvalds	:	Better skb_clone.
   *		Alan Cox	:	Added skb_copy.
   *		Alan Cox	:	Added all the changed routines Linus
   *					only put in the headers
   *		Ray VanTassle	:	Fixed --skb->lock in free
   *		Alan Cox	:	skb_copy copy arp field
   *		Andi Kleen	:	slabified it.
   *		Robert Olsson	:	Removed skb_head_pool
   *
   *	NOTE:
   *		The __skb_ routines should be called with interrupts
   *	disabled, or you better be *real* sure that the operation is atomic
   *	with respect to whatever list is being frobbed (e.g. via lock_sock()
   *	or via disabling bottom half handlers, etc).
   *
   *	This program is free software; you can redistribute it and/or
   *	modify it under the terms of the GNU General Public License
   *	as published by the Free Software Foundation; either version
   *	2 of the License, or (at your option) any later version.
   */
  
  /*
   *	The functions in this file will not compile correctly with gcc 2.4.x
   */
e005d193d   Joe Perches   net: core: Use pr...
38
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
40
41
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
45
46
  #include <linux/mm.h>
  #include <linux/interrupt.h>
  #include <linux/in.h>
  #include <linux/inet.h>
  #include <linux/slab.h>
de960aa9a   Florian Westphal   net: add and use ...
47
48
  #include <linux/tcp.h>
  #include <linux/udp.h>
90017accf   Marcelo Ricardo Leitner   sctp: Add GSO sup...
49
  #include <linux/sctp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
50
51
52
53
54
55
  #include <linux/netdevice.h>
  #ifdef CONFIG_NET_CLS_ACT
  #include <net/pkt_sched.h>
  #endif
  #include <linux/string.h>
  #include <linux/skbuff.h>
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
56
  #include <linux/splice.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
57
58
59
  #include <linux/cache.h>
  #include <linux/rtnetlink.h>
  #include <linux/init.h>
716ea3a7a   David Howells   [NET]: Move gener...
60
  #include <linux/scatterlist.h>
ac45f602e   Patrick Ohly   net: infrastructu...
61
  #include <linux/errqueue.h>
268bb0ce3   Linus Torvalds   sanitize <linux/p...
62
  #include <linux/prefetch.h>
0d5501c1c   Vlad Yasevich   net: Always untag...
63
  #include <linux/if_vlan.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
65
66
67
68
  
  #include <net/protocol.h>
  #include <net/dst.h>
  #include <net/sock.h>
  #include <net/checksum.h>
ed1f50c3a   Paul Durrant   net: add skb_chec...
69
  #include <net/ip6_checksum.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
  #include <net/xfrm.h>
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
71
  #include <linux/uaccess.h>
ad8d75fff   Steven Rostedt   tracing/events: m...
72
  #include <trace/events/skb.h>
51c56b004   Eric Dumazet   net: remove k{un}...
73
  #include <linux/highmem.h>
b245be1f4   Willem de Bruijn   net-timestamp: no...
74
75
  #include <linux/capability.h>
  #include <linux/user_namespace.h>
a1f8e7f7f   Al Viro   [PATCH] severing ...
76

d7e8883cf   Eric Dumazet   net: make GRO awa...
77
  struct kmem_cache *skbuff_head_cache __read_mostly;
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
78
  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
5f74f82ea   Hans Westgaard Ry   net:Add sysctl_ma...
79
80
  int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
  EXPORT_SYMBOL(sysctl_max_skb_frags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82
  /**
f05de73bf   Jean Sacren   skbuff: create sk...
83
84
85
86
   *	skb_panic - private function for out-of-line support
   *	@skb:	buffer
   *	@sz:	size
   *	@addr:	address
99d5851ee   James Hogan   net: skbuff: fix ...
87
   *	@msg:	skb_over_panic or skb_under_panic
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
   *
f05de73bf   Jean Sacren   skbuff: create sk...
89
90
91
92
   *	Out-of-line support for skb_put() and skb_push().
   *	Called via the wrapper skb_over_panic() or skb_under_panic().
   *	Keep out of line to prevent kernel bloat.
   *	__builtin_return_address is not used because it is not always reliable.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
   */
f05de73bf   Jean Sacren   skbuff: create sk...
94
  static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99d5851ee   James Hogan   net: skbuff: fix ...
95
  		      const char msg[])
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
96
  {
e005d193d   Joe Perches   net: core: Use pr...
97
98
  	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s
  ",
99d5851ee   James Hogan   net: skbuff: fix ...
99
  		 msg, addr, skb->len, sz, skb->head, skb->data,
e005d193d   Joe Perches   net: core: Use pr...
100
101
  		 (unsigned long)skb->tail, (unsigned long)skb->end,
  		 skb->dev ? skb->dev->name : "<NULL>");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
102
103
  	BUG();
  }
f05de73bf   Jean Sacren   skbuff: create sk...
104
  static void skb_over_panic(struct sk_buff *skb, unsigned int sz, void *addr)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
105
  {
f05de73bf   Jean Sacren   skbuff: create sk...
106
  	skb_panic(skb, sz, addr, __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
  }
f05de73bf   Jean Sacren   skbuff: create sk...
108
109
110
111
  static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
  {
  	skb_panic(skb, sz, addr, __func__);
  }
c93bdd0e0   Mel Gorman   netvm: allow skb ...
112
113
114
115
116
117
118
119
120
121
  
  /*
   * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
   * the caller if emergency pfmemalloc reserves are being used. If it is and
   * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
   * may be used. Otherwise, the packet data may be discarded until enough
   * memory is free
   */
  #define kmalloc_reserve(size, gfp, node, pfmemalloc) \
  	 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
61c5e88ae   stephen hemminger   skbuff: make __km...
122
123
124
  
  static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
  			       unsigned long ip, bool *pfmemalloc)
c93bdd0e0   Mel Gorman   netvm: allow skb ...
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  {
  	void *obj;
  	bool ret_pfmemalloc = false;
  
  	/*
  	 * Try a regular allocation, when that fails and we're not entitled
  	 * to the reserves, fail.
  	 */
  	obj = kmalloc_node_track_caller(size,
  					flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
  					node);
  	if (obj || !(gfp_pfmemalloc_allowed(flags)))
  		goto out;
  
  	/* Try again but now we are using pfmemalloc reserves */
  	ret_pfmemalloc = true;
  	obj = kmalloc_node_track_caller(size, flags, node);
  
  out:
  	if (pfmemalloc)
  		*pfmemalloc = ret_pfmemalloc;
  
  	return obj;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
151
152
153
154
155
  /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   *	'private' fields and also do memory statistics to find all the
   *	[BEEP] leaks.
   *
   */
  
  /**
d179cd129   David S. Miller   [NET]: Implement ...
156
   *	__alloc_skb	-	allocate a network buffer
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
157
158
   *	@size: size to allocate
   *	@gfp_mask: allocation mask
c93bdd0e0   Mel Gorman   netvm: allow skb ...
159
160
161
162
   *	@flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
   *		instead of head cache and allocate a cloned (child) skb.
   *		If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
   *		allocations in case the data is required for writeback
b30973f87   Christoph Hellwig   [PATCH] node-awar...
163
   *	@node: numa node to allocate memory on
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
164
165
   *
   *	Allocate a new &sk_buff. The returned buffer has no headroom and a
94b6042cf   Ben Hutchings   net: Update kerne...
166
167
   *	tail room of at least size bytes. The object has a reference count
   *	of one. The return is the buffer. On a failure the return is %NULL.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
170
171
   *
   *	Buffers may only be allocated from interrupts using a @gfp_mask of
   *	%GFP_ATOMIC.
   */
dd0fc66fb   Al Viro   [PATCH] gfp flags...
172
  struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
c93bdd0e0   Mel Gorman   netvm: allow skb ...
173
  			    int flags, int node)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174
  {
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
175
  	struct kmem_cache *cache;
4947d3ef8   Benjamin LaHaise   [NET]: Speed up _...
176
  	struct skb_shared_info *shinfo;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
  	struct sk_buff *skb;
  	u8 *data;
c93bdd0e0   Mel Gorman   netvm: allow skb ...
179
  	bool pfmemalloc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
180

c93bdd0e0   Mel Gorman   netvm: allow skb ...
181
182
183
184
185
  	cache = (flags & SKB_ALLOC_FCLONE)
  		? skbuff_fclone_cache : skbuff_head_cache;
  
  	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
  		gfp_mask |= __GFP_MEMALLOC;
8798b3fb7   Herbert Xu   [NET]: Fix skb fc...
186

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
187
  	/* Get the HEAD */
b30973f87   Christoph Hellwig   [PATCH] node-awar...
188
  	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
189
190
  	if (!skb)
  		goto out;
ec7d2f2cf   Eric Dumazet   net: __alloc_skb(...
191
  	prefetchw(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
192

87fb4b7b5   Eric Dumazet   net: more accurat...
193
194
195
196
197
  	/* We do our best to align skb_shared_info on a separate cache
  	 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
  	 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
  	 * Both skb->head and skb_shared_info are cache line aligned.
  	 */
bc417e30f   Tony Lindgren   net: Add back ali...
198
  	size = SKB_DATA_ALIGN(size);
87fb4b7b5   Eric Dumazet   net: more accurat...
199
  	size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
c93bdd0e0   Mel Gorman   netvm: allow skb ...
200
  	data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
202
  	if (!data)
  		goto nodata;
87fb4b7b5   Eric Dumazet   net: more accurat...
203
204
205
206
207
  	/* kmalloc(size) might give us more room than requested.
  	 * Put skb_shared_info exactly at the end of allocated zone,
  	 * to allow max possible filling before reallocation.
  	 */
  	size = SKB_WITH_OVERHEAD(ksize(data));
ec7d2f2cf   Eric Dumazet   net: __alloc_skb(...
208
  	prefetchw(data + size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209

ca0605a7c   Arnaldo Carvalho de Melo   [SK_BUFF]: Adjust...
210
  	/*
c80057851   Johannes Berg   net: Fix useless ...
211
212
213
  	 * Only clear those fields we need to clear, not those that we will
  	 * actually initialise below. Hence, don't put any more fields after
  	 * the tail pointer in struct sk_buff!
ca0605a7c   Arnaldo Carvalho de Melo   [SK_BUFF]: Adjust...
214
215
  	 */
  	memset(skb, 0, offsetof(struct sk_buff, tail));
87fb4b7b5   Eric Dumazet   net: more accurat...
216
217
  	/* Account for allocated memory : skb + skb->head */
  	skb->truesize = SKB_TRUESIZE(size);
c93bdd0e0   Mel Gorman   netvm: allow skb ...
218
  	skb->pfmemalloc = pfmemalloc;
633547973   Reshetova, Elena   net: convert sk_b...
219
  	refcount_set(&skb->users, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
221
  	skb->head = data;
  	skb->data = data;
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
222
  	skb_reset_tail_pointer(skb);
4305b5413   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
223
  	skb->end = skb->tail + size;
35d046106   Cong Wang   net: clean up skb...
224
225
  	skb->mac_header = (typeof(skb->mac_header))~0U;
  	skb->transport_header = (typeof(skb->transport_header))~0U;
19633e129   Stephen Hemminger   skbuff: skb_mac_h...
226

4947d3ef8   Benjamin LaHaise   [NET]: Speed up _...
227
228
  	/* make sure we initialize shinfo sequentially */
  	shinfo = skb_shinfo(skb);
ec7d2f2cf   Eric Dumazet   net: __alloc_skb(...
229
  	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
4947d3ef8   Benjamin LaHaise   [NET]: Speed up _...
230
  	atomic_set(&shinfo->dataref, 1);
4947d3ef8   Benjamin LaHaise   [NET]: Speed up _...
231

c93bdd0e0   Mel Gorman   netvm: allow skb ...
232
  	if (flags & SKB_ALLOC_FCLONE) {
d0bf4a9e9   Eric Dumazet   net: cleanup and ...
233
  		struct sk_buff_fclones *fclones;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234

d0bf4a9e9   Eric Dumazet   net: cleanup and ...
235
  		fclones = container_of(skb, struct sk_buff_fclones, skb1);
d179cd129   David S. Miller   [NET]: Implement ...
236
  		skb->fclone = SKB_FCLONE_ORIG;
2638595af   Reshetova, Elena   net: convert sk_b...
237
  		refcount_set(&fclones->fclone_ref, 1);
d179cd129   David S. Miller   [NET]: Implement ...
238

6ffe75eb5   Eric Dumazet   net: avoid two at...
239
  		fclones->skb2.fclone = SKB_FCLONE_CLONE;
d179cd129   David S. Miller   [NET]: Implement ...
240
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241
242
243
  out:
  	return skb;
  nodata:
8798b3fb7   Herbert Xu   [NET]: Fix skb fc...
244
  	kmem_cache_free(cache, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
246
  	skb = NULL;
  	goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
247
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
248
  EXPORT_SYMBOL(__alloc_skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249

8af274564   Christoph Hellwig   [NET]: Add netdev...
250
  /**
2ea2f62c8   Eric Dumazet   net: fix crash in...
251
   * __build_skb - build a network buffer
b2b5ce9d1   Eric Dumazet   net: introduce bu...
252
   * @data: data buffer provided by caller
2ea2f62c8   Eric Dumazet   net: fix crash in...
253
   * @frag_size: size of data, or 0 if head was kmalloced
b2b5ce9d1   Eric Dumazet   net: introduce bu...
254
255
   *
   * Allocate a new &sk_buff. Caller provides space holding head and
deceb4c06   Florian Fainelli   net: fix comment ...
256
   * skb_shared_info. @data must have been allocated by kmalloc() only if
2ea2f62c8   Eric Dumazet   net: fix crash in...
257
258
   * @frag_size is 0, otherwise data should come from the page allocator
   *  or vmalloc()
b2b5ce9d1   Eric Dumazet   net: introduce bu...
259
260
261
262
263
264
265
266
267
268
   * The return is the new skb buffer.
   * On a failure the return is %NULL, and @data is not freed.
   * Notes :
   *  Before IO, driver allocates only data buffer where NIC put incoming frame
   *  Driver should add room at head (NET_SKB_PAD) and
   *  MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
   *  After IO, driver calls build_skb(), to allocate sk_buff and populate it
   *  before giving packet to stack.
   *  RX rings only contains data buffers, not full skbs.
   */
2ea2f62c8   Eric Dumazet   net: fix crash in...
269
  struct sk_buff *__build_skb(void *data, unsigned int frag_size)
b2b5ce9d1   Eric Dumazet   net: introduce bu...
270
271
272
  {
  	struct skb_shared_info *shinfo;
  	struct sk_buff *skb;
d3836f21b   Eric Dumazet   net: allow skb->h...
273
  	unsigned int size = frag_size ? : ksize(data);
b2b5ce9d1   Eric Dumazet   net: introduce bu...
274
275
276
277
  
  	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
  	if (!skb)
  		return NULL;
d3836f21b   Eric Dumazet   net: allow skb->h...
278
  	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
b2b5ce9d1   Eric Dumazet   net: introduce bu...
279
280
281
  
  	memset(skb, 0, offsetof(struct sk_buff, tail));
  	skb->truesize = SKB_TRUESIZE(size);
633547973   Reshetova, Elena   net: convert sk_b...
282
  	refcount_set(&skb->users, 1);
b2b5ce9d1   Eric Dumazet   net: introduce bu...
283
284
285
286
  	skb->head = data;
  	skb->data = data;
  	skb_reset_tail_pointer(skb);
  	skb->end = skb->tail + size;
35d046106   Cong Wang   net: clean up skb...
287
288
  	skb->mac_header = (typeof(skb->mac_header))~0U;
  	skb->transport_header = (typeof(skb->transport_header))~0U;
b2b5ce9d1   Eric Dumazet   net: introduce bu...
289
290
291
292
293
  
  	/* make sure we initialize shinfo sequentially */
  	shinfo = skb_shinfo(skb);
  	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
  	atomic_set(&shinfo->dataref, 1);
b2b5ce9d1   Eric Dumazet   net: introduce bu...
294
295
296
  
  	return skb;
  }
2ea2f62c8   Eric Dumazet   net: fix crash in...
297
298
299
300
301
302
303
304
305
306
307
308
  
  /* build_skb() is wrapper over __build_skb(), that specifically
   * takes care of skb->head and skb->pfmemalloc
   * This means that if @frag_size is not zero, then @data must be backed
   * by a page fragment, not kmalloc() or vmalloc()
   */
  struct sk_buff *build_skb(void *data, unsigned int frag_size)
  {
  	struct sk_buff *skb = __build_skb(data, frag_size);
  
  	if (skb && frag_size) {
  		skb->head_frag = 1;
2f064f348   Michal Hocko   mm: make page pfm...
309
  		if (page_is_pfmemalloc(virt_to_head_page(data)))
2ea2f62c8   Eric Dumazet   net: fix crash in...
310
311
312
313
  			skb->pfmemalloc = 1;
  	}
  	return skb;
  }
b2b5ce9d1   Eric Dumazet   net: introduce bu...
314
  EXPORT_SYMBOL(build_skb);
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
315
316
317
318
  #define NAPI_SKB_CACHE_SIZE	64
  
  struct napi_alloc_cache {
  	struct page_frag_cache page;
e0d7924a4   Alexey Dobriyan   net: make struct ...
319
  	unsigned int skb_count;
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
320
321
  	void *skb_cache[NAPI_SKB_CACHE_SIZE];
  };
b63ae8ca0   Alexander Duyck   mm/net: Rename an...
322
  static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
323
  static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
ffde7328a   Alexander Duyck   net: Split netdev...
324
325
326
  
  static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  {
b63ae8ca0   Alexander Duyck   mm/net: Rename an...
327
  	struct page_frag_cache *nc;
ffde7328a   Alexander Duyck   net: Split netdev...
328
329
330
331
  	unsigned long flags;
  	void *data;
  
  	local_irq_save(flags);
9451980a6   Alexander Duyck   net: Use cached c...
332
  	nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4a   Alexander Duyck   mm: rename __allo...
333
  	data = page_frag_alloc(nc, fragsz, gfp_mask);
6f532612c   Eric Dumazet   net: introduce ne...
334
335
336
  	local_irq_restore(flags);
  	return data;
  }
c93bdd0e0   Mel Gorman   netvm: allow skb ...
337
338
339
340
341
342
343
344
345
346
347
348
  
  /**
   * netdev_alloc_frag - allocate a page fragment
   * @fragsz: fragment size
   *
   * Allocates a frag from a page for receive buffer.
   * Uses GFP_ATOMIC allocations.
   */
  void *netdev_alloc_frag(unsigned int fragsz)
  {
  	return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
  }
6f532612c   Eric Dumazet   net: introduce ne...
349
  EXPORT_SYMBOL(netdev_alloc_frag);
ffde7328a   Alexander Duyck   net: Split netdev...
350
351
  static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  {
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
352
  	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
9451980a6   Alexander Duyck   net: Use cached c...
353

8c2dd3e4a   Alexander Duyck   mm: rename __allo...
354
  	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
ffde7328a   Alexander Duyck   net: Split netdev...
355
356
357
358
359
360
361
  }
  
  void *napi_alloc_frag(unsigned int fragsz)
  {
  	return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
  }
  EXPORT_SYMBOL(napi_alloc_frag);
6f532612c   Eric Dumazet   net: introduce ne...
362
  /**
fd11a83dd   Alexander Duyck   net: Pull out cor...
363
364
   *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
   *	@dev: network device to receive on
d74991601   Masanari Iida   net-next: Fix war...
365
   *	@len: length to allocate
fd11a83dd   Alexander Duyck   net: Pull out cor...
366
367
368
369
370
371
372
373
374
   *	@gfp_mask: get_free_pages mask, passed to alloc_skb
   *
   *	Allocate a new &sk_buff and assign it a usage count of one. The
   *	buffer has NET_SKB_PAD headroom built in. Users should allocate
   *	the headroom they think they need without accounting for the
   *	built in space. The built in space is used for optimisations.
   *
   *	%NULL is returned if there is no free memory.
   */
9451980a6   Alexander Duyck   net: Use cached c...
375
376
  struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
  				   gfp_t gfp_mask)
fd11a83dd   Alexander Duyck   net: Pull out cor...
377
  {
b63ae8ca0   Alexander Duyck   mm/net: Rename an...
378
  	struct page_frag_cache *nc;
9451980a6   Alexander Duyck   net: Use cached c...
379
  	unsigned long flags;
fd11a83dd   Alexander Duyck   net: Pull out cor...
380
  	struct sk_buff *skb;
9451980a6   Alexander Duyck   net: Use cached c...
381
382
383
384
  	bool pfmemalloc;
  	void *data;
  
  	len += NET_SKB_PAD;
fd11a83dd   Alexander Duyck   net: Pull out cor...
385

9451980a6   Alexander Duyck   net: Use cached c...
386
  	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc8   Mel Gorman   mm, page_alloc: d...
387
  	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd0   Alexander Duyck   net: Reserve skb ...
388
389
390
391
392
  		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
  		if (!skb)
  			goto skb_fail;
  		goto skb_success;
  	}
fd11a83dd   Alexander Duyck   net: Pull out cor...
393

9451980a6   Alexander Duyck   net: Use cached c...
394
395
396
397
398
399
400
401
402
  	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  	len = SKB_DATA_ALIGN(len);
  
  	if (sk_memalloc_socks())
  		gfp_mask |= __GFP_MEMALLOC;
  
  	local_irq_save(flags);
  
  	nc = this_cpu_ptr(&netdev_alloc_cache);
8c2dd3e4a   Alexander Duyck   mm: rename __allo...
403
  	data = page_frag_alloc(nc, len, gfp_mask);
9451980a6   Alexander Duyck   net: Use cached c...
404
405
406
407
408
409
410
411
412
  	pfmemalloc = nc->pfmemalloc;
  
  	local_irq_restore(flags);
  
  	if (unlikely(!data))
  		return NULL;
  
  	skb = __build_skb(data, len);
  	if (unlikely(!skb)) {
181edb2bf   Alexander Duyck   net: Add skb_free...
413
  		skb_free_frag(data);
9451980a6   Alexander Duyck   net: Use cached c...
414
  		return NULL;
7b2e497a0   Christoph Hellwig   [NET]: Assign skb...
415
  	}
fd11a83dd   Alexander Duyck   net: Pull out cor...
416

9451980a6   Alexander Duyck   net: Use cached c...
417
418
419
420
  	/* use OR instead of assignment to avoid clearing of bits in mask */
  	if (pfmemalloc)
  		skb->pfmemalloc = 1;
  	skb->head_frag = 1;
a080e7bd0   Alexander Duyck   net: Reserve skb ...
421
  skb_success:
9451980a6   Alexander Duyck   net: Use cached c...
422
423
  	skb_reserve(skb, NET_SKB_PAD);
  	skb->dev = dev;
a080e7bd0   Alexander Duyck   net: Reserve skb ...
424
  skb_fail:
8af274564   Christoph Hellwig   [NET]: Add netdev...
425
426
  	return skb;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
427
  EXPORT_SYMBOL(__netdev_alloc_skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428

fd11a83dd   Alexander Duyck   net: Pull out cor...
429
430
431
  /**
   *	__napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
   *	@napi: napi instance this buffer was allocated for
d74991601   Masanari Iida   net-next: Fix war...
432
   *	@len: length to allocate
fd11a83dd   Alexander Duyck   net: Pull out cor...
433
434
435
436
437
438
439
440
441
   *	@gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
   *
   *	Allocate a new sk_buff for use in NAPI receive.  This buffer will
   *	attempt to allocate the head from a special reserved region used
   *	only for NAPI Rx allocation.  By doing this we can save several
   *	CPU cycles by avoiding having to disable and re-enable IRQs.
   *
   *	%NULL is returned if there is no free memory.
   */
9451980a6   Alexander Duyck   net: Use cached c...
442
443
  struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
  				 gfp_t gfp_mask)
fd11a83dd   Alexander Duyck   net: Pull out cor...
444
  {
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
445
  	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
fd11a83dd   Alexander Duyck   net: Pull out cor...
446
  	struct sk_buff *skb;
9451980a6   Alexander Duyck   net: Use cached c...
447
448
449
  	void *data;
  
  	len += NET_SKB_PAD + NET_IP_ALIGN;
fd11a83dd   Alexander Duyck   net: Pull out cor...
450

9451980a6   Alexander Duyck   net: Use cached c...
451
  	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
d0164adc8   Mel Gorman   mm, page_alloc: d...
452
  	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
a080e7bd0   Alexander Duyck   net: Reserve skb ...
453
454
455
456
457
  		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
  		if (!skb)
  			goto skb_fail;
  		goto skb_success;
  	}
9451980a6   Alexander Duyck   net: Use cached c...
458
459
460
461
462
463
  
  	len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  	len = SKB_DATA_ALIGN(len);
  
  	if (sk_memalloc_socks())
  		gfp_mask |= __GFP_MEMALLOC;
fd11a83dd   Alexander Duyck   net: Pull out cor...
464

8c2dd3e4a   Alexander Duyck   mm: rename __allo...
465
  	data = page_frag_alloc(&nc->page, len, gfp_mask);
9451980a6   Alexander Duyck   net: Use cached c...
466
467
468
469
470
  	if (unlikely(!data))
  		return NULL;
  
  	skb = __build_skb(data, len);
  	if (unlikely(!skb)) {
181edb2bf   Alexander Duyck   net: Add skb_free...
471
  		skb_free_frag(data);
9451980a6   Alexander Duyck   net: Use cached c...
472
  		return NULL;
fd11a83dd   Alexander Duyck   net: Pull out cor...
473
  	}
9451980a6   Alexander Duyck   net: Use cached c...
474
  	/* use OR instead of assignment to avoid clearing of bits in mask */
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
475
  	if (nc->page.pfmemalloc)
9451980a6   Alexander Duyck   net: Use cached c...
476
477
  		skb->pfmemalloc = 1;
  	skb->head_frag = 1;
a080e7bd0   Alexander Duyck   net: Reserve skb ...
478
  skb_success:
9451980a6   Alexander Duyck   net: Use cached c...
479
480
  	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
  	skb->dev = napi->dev;
a080e7bd0   Alexander Duyck   net: Reserve skb ...
481
  skb_fail:
fd11a83dd   Alexander Duyck   net: Pull out cor...
482
483
484
  	return skb;
  }
  EXPORT_SYMBOL(__napi_alloc_skb);
654bed16c   Peter Zijlstra   net: packet split...
485
  void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
50269e19a   Eric Dumazet   net: add a truesi...
486
  		     int size, unsigned int truesize)
654bed16c   Peter Zijlstra   net: packet split...
487
488
489
490
  {
  	skb_fill_page_desc(skb, i, page, off, size);
  	skb->len += size;
  	skb->data_len += size;
50269e19a   Eric Dumazet   net: add a truesi...
491
  	skb->truesize += truesize;
654bed16c   Peter Zijlstra   net: packet split...
492
493
  }
  EXPORT_SYMBOL(skb_add_rx_frag);
f8e617e10   Jason Wang   net: introduce sk...
494
495
496
497
498
499
500
501
502
503
504
  void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
  			  unsigned int truesize)
  {
  	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  
  	skb_frag_size_add(frag, size);
  	skb->len += size;
  	skb->data_len += size;
  	skb->truesize += truesize;
  }
  EXPORT_SYMBOL(skb_coalesce_rx_frag);
27b437c8b   Herbert Xu   [NET]: Update fra...
505
  static void skb_drop_list(struct sk_buff **listp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
506
  {
bd8a7036c   Eric Dumazet   gre: fix a possib...
507
  	kfree_skb_list(*listp);
27b437c8b   Herbert Xu   [NET]: Update fra...
508
  	*listp = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
509
  }
27b437c8b   Herbert Xu   [NET]: Update fra...
510
511
512
513
  static inline void skb_drop_fraglist(struct sk_buff *skb)
  {
  	skb_drop_list(&skb_shinfo(skb)->frag_list);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
514
515
516
  static void skb_clone_fraglist(struct sk_buff *skb)
  {
  	struct sk_buff *list;
fbb398a83   David S. Miller   net/core/skbuff.c...
517
  	skb_walk_frags(skb, list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
  		skb_get(list);
  }
d3836f21b   Eric Dumazet   net: allow skb->h...
520
521
  static void skb_free_head(struct sk_buff *skb)
  {
181edb2bf   Alexander Duyck   net: Add skb_free...
522
  	unsigned char *head = skb->head;
d3836f21b   Eric Dumazet   net: allow skb->h...
523
  	if (skb->head_frag)
181edb2bf   Alexander Duyck   net: Add skb_free...
524
  		skb_free_frag(head);
d3836f21b   Eric Dumazet   net: allow skb->h...
525
  	else
181edb2bf   Alexander Duyck   net: Add skb_free...
526
  		kfree(head);
d3836f21b   Eric Dumazet   net: allow skb->h...
527
  }
5bba17127   Adrian Bunk   [NET]: make skb_r...
528
  static void skb_release_data(struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529
  {
ff04a771a   Eric Dumazet   net : optimize sk...
530
531
  	struct skb_shared_info *shinfo = skb_shinfo(skb);
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532

ff04a771a   Eric Dumazet   net : optimize sk...
533
534
535
536
  	if (skb->cloned &&
  	    atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
  			      &shinfo->dataref))
  		return;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
537

ff04a771a   Eric Dumazet   net : optimize sk...
538
539
  	for (i = 0; i < shinfo->nr_frags; i++)
  		__skb_frag_unref(&shinfo->frags[i]);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
540

ff04a771a   Eric Dumazet   net : optimize sk...
541
542
  	if (shinfo->frag_list)
  		kfree_skb_list(shinfo->frag_list);
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
543
  	skb_zcopy_clear(skb, true);
ff04a771a   Eric Dumazet   net : optimize sk...
544
  	skb_free_head(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
546
547
548
549
  }
  
  /*
   *	Free an skbuff by memory without cleaning the state.
   */
2d4baff8d   Herbert Xu   [SKBUFF]: Free ol...
550
  static void kfree_skbmem(struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
  {
d0bf4a9e9   Eric Dumazet   net: cleanup and ...
552
  	struct sk_buff_fclones *fclones;
d179cd129   David S. Miller   [NET]: Implement ...
553

d179cd129   David S. Miller   [NET]: Implement ...
554
555
556
  	switch (skb->fclone) {
  	case SKB_FCLONE_UNAVAILABLE:
  		kmem_cache_free(skbuff_head_cache, skb);
6ffe75eb5   Eric Dumazet   net: avoid two at...
557
  		return;
d179cd129   David S. Miller   [NET]: Implement ...
558
559
  
  	case SKB_FCLONE_ORIG:
d0bf4a9e9   Eric Dumazet   net: cleanup and ...
560
  		fclones = container_of(skb, struct sk_buff_fclones, skb1);
d179cd129   David S. Miller   [NET]: Implement ...
561

6ffe75eb5   Eric Dumazet   net: avoid two at...
562
563
564
  		/* We usually free the clone (TX completion) before original skb
  		 * This test would have no chance to be true for the clone,
  		 * while here, branch prediction will be good.
d179cd129   David S. Miller   [NET]: Implement ...
565
  		 */
2638595af   Reshetova, Elena   net: convert sk_b...
566
  		if (refcount_read(&fclones->fclone_ref) == 1)
6ffe75eb5   Eric Dumazet   net: avoid two at...
567
568
  			goto fastpath;
  		break;
e7820e39b   Eric Dumazet   net: Revert "net:...
569

6ffe75eb5   Eric Dumazet   net: avoid two at...
570
571
  	default: /* SKB_FCLONE_CLONE */
  		fclones = container_of(skb, struct sk_buff_fclones, skb2);
d179cd129   David S. Miller   [NET]: Implement ...
572
  		break;
3ff50b799   Stephen Hemminger   [NET]: cleanup ex...
573
  	}
2638595af   Reshetova, Elena   net: convert sk_b...
574
  	if (!refcount_dec_and_test(&fclones->fclone_ref))
6ffe75eb5   Eric Dumazet   net: avoid two at...
575
576
577
  		return;
  fastpath:
  	kmem_cache_free(skbuff_fclone_cache, fclones);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578
  }
0a463c78d   Paolo Abeni   udp: avoid a cach...
579
  void skb_release_head_state(struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
  {
adf30907d   Eric Dumazet   net: skb->dst acc...
581
  	skb_dst_drop(skb);
0a463c78d   Paolo Abeni   udp: avoid a cach...
582
  	secpath_reset(skb);
9c2b3328f   Stephen Hemminger   [NET]: skbuff: re...
583
584
  	if (skb->destructor) {
  		WARN_ON(in_irq());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585
586
  		skb->destructor(skb);
  	}
a3bf7ae9a   Igor Maravić   net:core: use IS_...
587
  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
cb9c68363   Florian Westphal   skbuff: add and u...
588
  	nf_conntrack_put(skb_nfct(skb));
2fc72c7b8   KOVACS Krisztian   netfilter: fix co...
589
  #endif
1109a90c0   Pablo Neira Ayuso   netfilter: use IS...
590
  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
591
592
  	nf_bridge_put(skb->nf_bridge);
  #endif
04a4bb55b   Lennert Buytenhek   net: add skb_recy...
593
594
595
596
597
598
  }
  
  /* Free everything but the sk_buff shell. */
  static void skb_release_all(struct sk_buff *skb)
  {
  	skb_release_head_state(skb);
a28b1b90d   Florian Westphal   skbuff: re-add ch...
599
600
  	if (likely(skb->head))
  		skb_release_data(skb);
2d4baff8d   Herbert Xu   [SKBUFF]: Free ol...
601
602
603
604
605
606
607
608
609
610
  }
  
  /**
   *	__kfree_skb - private function
   *	@skb: buffer
   *
   *	Free an sk_buff. Release anything attached to the buffer.
   *	Clean the state. This is an internal helper function. Users should
   *	always call kfree_skb
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611

2d4baff8d   Herbert Xu   [SKBUFF]: Free ol...
612
613
614
  void __kfree_skb(struct sk_buff *skb)
  {
  	skb_release_all(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
616
  	kfree_skbmem(skb);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
617
  EXPORT_SYMBOL(__kfree_skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
  
  /**
231d06ae8   Jörn Engel   [NET]: Uninline k...
620
621
622
623
624
625
626
627
   *	kfree_skb - free an sk_buff
   *	@skb: buffer to free
   *
   *	Drop a reference to the buffer and free it if the usage count has
   *	hit zero.
   */
  void kfree_skb(struct sk_buff *skb)
  {
3889a803e   Paolo Abeni   net: factor out a...
628
  	if (!skb_unref(skb))
231d06ae8   Jörn Engel   [NET]: Uninline k...
629
  		return;
3889a803e   Paolo Abeni   net: factor out a...
630

ead2ceb0e   Neil Horman   Network Drop Moni...
631
  	trace_kfree_skb(skb, __builtin_return_address(0));
231d06ae8   Jörn Engel   [NET]: Uninline k...
632
633
  	__kfree_skb(skb);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
634
  EXPORT_SYMBOL(kfree_skb);
231d06ae8   Jörn Engel   [NET]: Uninline k...
635

bd8a7036c   Eric Dumazet   gre: fix a possib...
636
637
638
639
640
641
642
643
644
645
  void kfree_skb_list(struct sk_buff *segs)
  {
  	while (segs) {
  		struct sk_buff *next = segs->next;
  
  		kfree_skb(segs);
  		segs = next;
  	}
  }
  EXPORT_SYMBOL(kfree_skb_list);
d1a203eac   Stephen Hemminger   net: add document...
646
  /**
25121173f   Michael S. Tsirkin   skb: api to repor...
647
648
649
650
651
652
653
654
   *	skb_tx_error - report an sk_buff xmit error
   *	@skb: buffer that triggered an error
   *
   *	Report xmit error if a device callback is tracking this skb.
   *	skb must be freed afterwards.
   */
  void skb_tx_error(struct sk_buff *skb)
  {
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
655
  	skb_zcopy_clear(skb, true);
25121173f   Michael S. Tsirkin   skb: api to repor...
656
657
658
659
  }
  EXPORT_SYMBOL(skb_tx_error);
  
  /**
ead2ceb0e   Neil Horman   Network Drop Moni...
660
661
662
663
664
665
666
667
668
   *	consume_skb - free an skbuff
   *	@skb: buffer to free
   *
   *	Drop a ref to the buffer and free it if the usage count has hit zero
   *	Functions identically to kfree_skb, but kfree_skb assumes that the frame
   *	is being dropped after a failure and notes that
   */
  void consume_skb(struct sk_buff *skb)
  {
3889a803e   Paolo Abeni   net: factor out a...
669
  	if (!skb_unref(skb))
ead2ceb0e   Neil Horman   Network Drop Moni...
670
  		return;
3889a803e   Paolo Abeni   net: factor out a...
671

07dc22e72   Koki Sanagi   skb: Add tracepoi...
672
  	trace_consume_skb(skb);
ead2ceb0e   Neil Horman   Network Drop Moni...
673
674
675
  	__kfree_skb(skb);
  }
  EXPORT_SYMBOL(consume_skb);
0a463c78d   Paolo Abeni   udp: avoid a cach...
676
677
678
679
  /**
   *	consume_stateless_skb - free an skbuff, assuming it is stateless
   *	@skb: buffer to free
   *
ca2c1418e   Paolo Abeni   udp: drop head st...
680
681
   *	Alike consume_skb(), but this variant assumes that this is the last
   *	skb reference and all the head states have been already dropped
0a463c78d   Paolo Abeni   udp: avoid a cach...
682
   */
ca2c1418e   Paolo Abeni   udp: drop head st...
683
  void __consume_stateless_skb(struct sk_buff *skb)
0a463c78d   Paolo Abeni   udp: avoid a cach...
684
  {
0a463c78d   Paolo Abeni   udp: avoid a cach...
685
  	trace_consume_skb(skb);
06dc75ab0   Florian Westphal   net: Revert "net:...
686
  	skb_release_data(skb);
0a463c78d   Paolo Abeni   udp: avoid a cach...
687
688
  	kfree_skbmem(skb);
  }
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
689
690
691
692
693
694
695
696
697
698
699
  void __kfree_skb_flush(void)
  {
  	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  
  	/* flush skb_cache if containing objects */
  	if (nc->skb_count) {
  		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
  				     nc->skb_cache);
  		nc->skb_count = 0;
  	}
  }
15fad714b   Jesper Dangaard Brouer   net: bulk free SK...
700
  static inline void _kfree_skb_defer(struct sk_buff *skb)
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
  {
  	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
  
  	/* drop skb->head and call any destructors for packet */
  	skb_release_all(skb);
  
  	/* record skb to CPU local list */
  	nc->skb_cache[nc->skb_count++] = skb;
  
  #ifdef CONFIG_SLUB
  	/* SLUB writes into objects when freeing */
  	prefetchw(skb);
  #endif
  
  	/* flush skb_cache if it is filled */
  	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
  		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
  				     nc->skb_cache);
  		nc->skb_count = 0;
  	}
  }
15fad714b   Jesper Dangaard Brouer   net: bulk free SK...
722
723
724
725
  void __kfree_skb_defer(struct sk_buff *skb)
  {
  	_kfree_skb_defer(skb);
  }
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
726
727
728
729
730
  
  void napi_consume_skb(struct sk_buff *skb, int budget)
  {
  	if (unlikely(!skb))
  		return;
885eb0a51   Jesper Dangaard Brouer   net: adjust napi_...
731
  	/* Zero budget indicate non-NAPI context called us, like netpoll */
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
732
  	if (unlikely(!budget)) {
885eb0a51   Jesper Dangaard Brouer   net: adjust napi_...
733
  		dev_consume_skb_any(skb);
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
734
735
  		return;
  	}
7608894e4   Paolo Abeni   net: use skb_unre...
736
  	if (!skb_unref(skb))
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
737
  		return;
7608894e4   Paolo Abeni   net: use skb_unre...
738

795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
739
740
741
742
  	/* if reaching here SKB is ready to free */
  	trace_consume_skb(skb);
  
  	/* if SKB is a clone, don't handle this case */
abbdb5a74   Eric Dumazet   net: remove a dub...
743
  	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
744
745
746
  		__kfree_skb(skb);
  		return;
  	}
15fad714b   Jesper Dangaard Brouer   net: bulk free SK...
747
  	_kfree_skb_defer(skb);
795bb1c00   Jesper Dangaard Brouer   net: bulk free in...
748
749
  }
  EXPORT_SYMBOL(napi_consume_skb);
b19372273   Eric Dumazet   net: reorganize s...
750
751
752
753
754
755
  /* Make sure a field is enclosed inside headers_start/headers_end section */
  #define CHECK_SKB_FIELD(field) \
  	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
  		     offsetof(struct sk_buff, headers_start));	\
  	BUILD_BUG_ON(offsetof(struct sk_buff, field) >		\
  		     offsetof(struct sk_buff, headers_end));	\
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
756
757
758
  static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  {
  	new->tstamp		= old->tstamp;
b19372273   Eric Dumazet   net: reorganize s...
759
  	/* We do not copy old->sk */
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
760
  	new->dev		= old->dev;
b19372273   Eric Dumazet   net: reorganize s...
761
  	memcpy(new->cb, old->cb, sizeof(old->cb));
7fee226ad   Eric Dumazet   net: add a noref ...
762
  	skb_dst_copy(new, old);
def8b4faf   Alexey Dobriyan   net: reduce struc...
763
  #ifdef CONFIG_XFRM
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
764
765
  	new->sp			= secpath_get(old->sp);
  #endif
b19372273   Eric Dumazet   net: reorganize s...
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
  	__nf_copy(new, old, false);
  
  	/* Note : this field could be in headers_start/headers_end section
  	 * It is not yet because we do not want to have a 16 bit hole
  	 */
  	new->queue_mapping = old->queue_mapping;
  
  	memcpy(&new->headers_start, &old->headers_start,
  	       offsetof(struct sk_buff, headers_end) -
  	       offsetof(struct sk_buff, headers_start));
  	CHECK_SKB_FIELD(protocol);
  	CHECK_SKB_FIELD(csum);
  	CHECK_SKB_FIELD(hash);
  	CHECK_SKB_FIELD(priority);
  	CHECK_SKB_FIELD(skb_iif);
  	CHECK_SKB_FIELD(vlan_proto);
  	CHECK_SKB_FIELD(vlan_tci);
  	CHECK_SKB_FIELD(transport_header);
  	CHECK_SKB_FIELD(network_header);
  	CHECK_SKB_FIELD(mac_header);
  	CHECK_SKB_FIELD(inner_protocol);
  	CHECK_SKB_FIELD(inner_transport_header);
  	CHECK_SKB_FIELD(inner_network_header);
  	CHECK_SKB_FIELD(inner_mac_header);
  	CHECK_SKB_FIELD(mark);
  #ifdef CONFIG_NETWORK_SECMARK
  	CHECK_SKB_FIELD(secmark);
  #endif
  #ifdef CONFIG_NET_RX_BUSY_POLL
  	CHECK_SKB_FIELD(napi_id);
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
796
  #endif
2bd82484b   Eric Dumazet   xps: fix xps for ...
797
798
799
  #ifdef CONFIG_XPS
  	CHECK_SKB_FIELD(sender_cpu);
  #endif
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
800
  #ifdef CONFIG_NET_SCHED
b19372273   Eric Dumazet   net: reorganize s...
801
  	CHECK_SKB_FIELD(tc_index);
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
802
  #endif
060212928   Eliezer Tamir   net: add low late...
803

dec18810c   Herbert Xu   [SKBUFF]: Merge c...
804
  }
82c49a352   Herbert Xu   skbuff: Move new ...
805
806
807
808
  /*
   * You should not add any new code to this function.  Add it to
   * __copy_skb_header above instead.
   */
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
809
  static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
810
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811
812
813
  #define C(x) n->x = skb->x
  
  	n->next = n->prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
814
  	n->sk = NULL;
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
815
  	__copy_skb_header(n, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
816
817
  	C(len);
  	C(data_len);
3e6b3b2e3   Alexey Dobriyan   [NET]: Copy mac_l...
818
  	C(mac_len);
334a8132d   Patrick McHardy   [SKBUFF]: Keep tr...
819
  	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
02f1c89d6   Paul Moore   [NET]: Clone the ...
820
  	n->cloned = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
821
  	n->nohdr = 0;
5652aed1d   Eric Dumazet   net: initialize s...
822
  	n->peeked = 0;
829f4fd66   Stefano Brivio   skbuff: Unconditi...
823
  	C(pfmemalloc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
  	n->destructor = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
825
826
  	C(tail);
  	C(end);
02f1c89d6   Paul Moore   [NET]: Clone the ...
827
  	C(head);
d3836f21b   Eric Dumazet   net: allow skb->h...
828
  	C(head_frag);
02f1c89d6   Paul Moore   [NET]: Clone the ...
829
830
  	C(data);
  	C(truesize);
633547973   Reshetova, Elena   net: convert sk_b...
831
  	refcount_set(&n->users, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
832
833
834
835
836
  
  	atomic_inc(&(skb_shinfo(skb)->dataref));
  	skb->cloned = 1;
  
  	return n;
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
  #undef C
  }
  
  /**
   *	skb_morph	-	morph one skb into another
   *	@dst: the skb to receive the contents
   *	@src: the skb to supply the contents
   *
   *	This is identical to skb_clone except that the target skb is
   *	supplied by the user.
   *
   *	The target skb is returned upon exit.
   */
  struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
  {
2d4baff8d   Herbert Xu   [SKBUFF]: Free ol...
852
  	skb_release_all(dst);
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
853
854
855
  	return __skb_clone(dst, src);
  }
  EXPORT_SYMBOL_GPL(skb_morph);
a91dbff55   Willem de Bruijn   sock: ulimit on M...
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
  static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
  {
  	unsigned long max_pg, num_pg, new_pg, old_pg;
  	struct user_struct *user;
  
  	if (capable(CAP_IPC_LOCK) || !size)
  		return 0;
  
  	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
  	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  	user = mmp->user ? : current_user();
  
  	do {
  		old_pg = atomic_long_read(&user->locked_vm);
  		new_pg = old_pg + num_pg;
  		if (new_pg > max_pg)
  			return -ENOBUFS;
  	} while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
  		 old_pg);
  
  	if (!mmp->user) {
  		mmp->user = get_uid(user);
  		mmp->num_pg = num_pg;
  	} else {
  		mmp->num_pg += num_pg;
  	}
  
  	return 0;
  }
  
  static void mm_unaccount_pinned_pages(struct mmpin *mmp)
  {
  	if (mmp->user) {
  		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
  		free_uid(mmp->user);
  	}
  }
52267790e   Willem de Bruijn   sock: add MSG_ZER...
893
894
895
896
897
898
899
900
901
902
903
904
905
  struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
  {
  	struct ubuf_info *uarg;
  	struct sk_buff *skb;
  
  	WARN_ON_ONCE(!in_task());
  
  	skb = sock_omalloc(sk, 0, GFP_KERNEL);
  	if (!skb)
  		return NULL;
  
  	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
  	uarg = (void *)skb->cb;
a91dbff55   Willem de Bruijn   sock: ulimit on M...
906
907
908
909
910
911
  	uarg->mmp.user = NULL;
  
  	if (mm_account_pinned_pages(&uarg->mmp, size)) {
  		kfree_skb(skb);
  		return NULL;
  	}
52267790e   Willem de Bruijn   sock: add MSG_ZER...
912
913
  
  	uarg->callback = sock_zerocopy_callback;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
914
915
916
  	uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
  	uarg->len = 1;
  	uarg->bytelen = size;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
917
  	uarg->zerocopy = 1;
c1d1b4378   Eric Dumazet   net: convert (str...
918
  	refcount_set(&uarg->refcnt, 1);
52267790e   Willem de Bruijn   sock: add MSG_ZER...
919
920
921
922
923
924
925
926
927
928
  	sock_hold(sk);
  
  	return uarg;
  }
  EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
  
  static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
  {
  	return container_of((void *)uarg, struct sk_buff, cb);
  }
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
  struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
  					struct ubuf_info *uarg)
  {
  	if (uarg) {
  		const u32 byte_limit = 1 << 19;		/* limit to a few TSO */
  		u32 bytelen, next;
  
  		/* realloc only when socket is locked (TCP, UDP cork),
  		 * so uarg->len and sk_zckey access is serialized
  		 */
  		if (!sock_owned_by_user(sk)) {
  			WARN_ON_ONCE(1);
  			return NULL;
  		}
  
  		bytelen = uarg->bytelen + size;
  		if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
  			/* TCP can create new skb to attach new uarg */
  			if (sk->sk_type == SOCK_STREAM)
  				goto new_alloc;
  			return NULL;
  		}
  
  		next = (u32)atomic_read(&sk->sk_zckey);
  		if ((u32)(uarg->id + uarg->len) == next) {
a91dbff55   Willem de Bruijn   sock: ulimit on M...
954
955
  			if (mm_account_pinned_pages(&uarg->mmp, size))
  				return NULL;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
956
957
958
  			uarg->len++;
  			uarg->bytelen = bytelen;
  			atomic_set(&sk->sk_zckey, ++next);
db5bce32f   Eric Dumazet   net: prepare (str...
959
  			sock_zerocopy_get(uarg);
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
  			return uarg;
  		}
  	}
  
  new_alloc:
  	return sock_zerocopy_alloc(sk, size);
  }
  EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
  
  static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
  {
  	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
  	u32 old_lo, old_hi;
  	u64 sum_len;
  
  	old_lo = serr->ee.ee_info;
  	old_hi = serr->ee.ee_data;
  	sum_len = old_hi - old_lo + 1ULL + len;
  
  	if (sum_len >= (1ULL << 32))
  		return false;
  
  	if (lo != old_hi + 1)
  		return false;
  
  	serr->ee.ee_data += len;
  	return true;
  }
52267790e   Willem de Bruijn   sock: add MSG_ZER...
988
989
  void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
  {
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
990
  	struct sk_buff *tail, *skb = skb_from_uarg(uarg);
52267790e   Willem de Bruijn   sock: add MSG_ZER...
991
992
  	struct sock_exterr_skb *serr;
  	struct sock *sk = skb->sk;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
993
994
995
996
  	struct sk_buff_head *q;
  	unsigned long flags;
  	u32 lo, hi;
  	u16 len;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
997

ccaffff18   Willem de Bruijn   sock: fix zerocop...
998
  	mm_unaccount_pinned_pages(&uarg->mmp);
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
999
1000
1001
1002
  	/* if !len, there was only 1 call, and it was aborted
  	 * so do not queue a completion notification
  	 */
  	if (!uarg->len || sock_flag(sk, SOCK_DEAD))
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1003
  		goto release;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1004
1005
1006
  	len = uarg->len;
  	lo = uarg->id;
  	hi = uarg->id + len - 1;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1007
1008
1009
1010
  	serr = SKB_EXT_ERR(skb);
  	memset(serr, 0, sizeof(*serr));
  	serr->ee.ee_errno = 0;
  	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1011
1012
  	serr->ee.ee_data = hi;
  	serr->ee.ee_info = lo;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1013
1014
  	if (!success)
  		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1015
1016
1017
1018
1019
1020
1021
1022
1023
  	q = &sk->sk_error_queue;
  	spin_lock_irqsave(&q->lock, flags);
  	tail = skb_peek_tail(q);
  	if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
  	    !skb_zerocopy_notify_extend(tail, lo, len)) {
  		__skb_queue_tail(q, skb);
  		skb = NULL;
  	}
  	spin_unlock_irqrestore(&q->lock, flags);
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
  
  	sk->sk_error_report(sk);
  
  release:
  	consume_skb(skb);
  	sock_put(sk);
  }
  EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
  
  void sock_zerocopy_put(struct ubuf_info *uarg)
  {
c1d1b4378   Eric Dumazet   net: convert (str...
1035
  	if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
  		if (uarg->callback)
  			uarg->callback(uarg, uarg->zerocopy);
  		else
  			consume_skb(skb_from_uarg(uarg));
  	}
  }
  EXPORT_SYMBOL_GPL(sock_zerocopy_put);
  
  void sock_zerocopy_put_abort(struct ubuf_info *uarg)
  {
  	if (uarg) {
  		struct sock *sk = skb_from_uarg(uarg)->sk;
  
  		atomic_dec(&sk->sk_zckey);
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1050
  		uarg->len--;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1051

52267790e   Willem de Bruijn   sock: add MSG_ZER...
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
  		sock_zerocopy_put(uarg);
  	}
  }
  EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
  
  extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
  				   struct iov_iter *from, size_t length);
  
  int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
  			     struct msghdr *msg, int len,
  			     struct ubuf_info *uarg)
  {
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1064
  	struct ubuf_info *orig_uarg = skb_zcopy(skb);
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1065
1066
  	struct iov_iter orig_iter = msg->msg_iter;
  	int err, orig_len = skb->len;
4ab6c99d9   Willem de Bruijn   sock: MSG_ZEROCOP...
1067
1068
1069
1070
1071
  	/* An skb can only point to one uarg. This edge case happens when
  	 * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
  	 */
  	if (orig_uarg && uarg != orig_uarg)
  		return -EEXIST;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1072
1073
  	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
  	if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
54d431176   Willem de Bruijn   sock: correct sk_...
1074
  		struct sock *save_sk = skb->sk;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1075
1076
  		/* Streams do not free skb on error. Reset to prev state. */
  		msg->msg_iter = orig_iter;
54d431176   Willem de Bruijn   sock: correct sk_...
1077
  		skb->sk = sk;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1078
  		___pskb_trim(skb, orig_len);
54d431176   Willem de Bruijn   sock: correct sk_...
1079
  		skb->sk = save_sk;
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1080
1081
1082
1083
1084
1085
1086
  		return err;
  	}
  
  	skb_zcopy_set(skb, uarg);
  	return skb->len - orig_len;
  }
  EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
1087
  static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
52267790e   Willem de Bruijn   sock: add MSG_ZER...
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
  			      gfp_t gfp_mask)
  {
  	if (skb_zcopy(orig)) {
  		if (skb_zcopy(nskb)) {
  			/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
  			if (!gfp_mask) {
  				WARN_ON_ONCE(1);
  				return -ENOMEM;
  			}
  			if (skb_uarg(nskb) == skb_uarg(orig))
  				return 0;
  			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
  				return -EIO;
  		}
  		skb_zcopy_set(nskb, skb_uarg(orig));
  	}
  	return 0;
  }
2c53040f0   Ben Hutchings   net: Fix (nearly-...
1106
1107
  /**
   *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
48c830120   Michael S. Tsirkin   net: copy userspa...
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
   *	@skb: the skb to modify
   *	@gfp_mask: allocation priority
   *
   *	This must be called on SKBTX_DEV_ZEROCOPY skb.
   *	It will copy all frags into kernel and drop the reference
   *	to userspace pages.
   *
   *	If this function is called from an interrupt gfp_mask() must be
   *	%GFP_ATOMIC.
   *
   *	Returns 0 on success or a negative error code on failure
   *	to allocate kernel memory to copy to.
   */
  int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1122
  {
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1123
1124
  	int num_frags = skb_shinfo(skb)->nr_frags;
  	struct page *page, *head = NULL;
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1125
1126
  	int i, new_frags;
  	u32 d_off;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1127

3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1128
1129
  	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
  		return -EINVAL;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1130

b9edd6bf0   Willem de Bruijn   skbuff: in skb_co...
1131
1132
  	if (!num_frags)
  		goto release;
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1133
1134
  	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
  	for (i = 0; i < new_frags; i++) {
02756ed4a   Krishna Kumar   skbuff: Use corre...
1135
  		page = alloc_page(gfp_mask);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1136
1137
  		if (!page) {
  			while (head) {
40dadff26   Sunghan Suh   net: access page-...
1138
  				struct page *next = (struct page *)page_private(head);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1139
1140
1141
1142
1143
  				put_page(head);
  				head = next;
  			}
  			return -ENOMEM;
  		}
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
  		set_page_private(page, (unsigned long)head);
  		head = page;
  	}
  
  	page = head;
  	d_off = 0;
  	for (i = 0; i < num_frags; i++) {
  		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
  		u32 p_off, p_len, copied;
  		struct page *p;
  		u8 *vaddr;
c613c209c   Willem de Bruijn   net: add skb_frag...
1155
1156
1157
  
  		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
  				      p, p_off, p_len, copied) {
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1158
  			u32 copy, done = 0;
c613c209c   Willem de Bruijn   net: add skb_frag...
1159
  			vaddr = kmap_atomic(p);
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
  
  			while (done < p_len) {
  				if (d_off == PAGE_SIZE) {
  					d_off = 0;
  					page = (struct page *)page_private(page);
  				}
  				copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
  				memcpy(page_address(page) + d_off,
  				       vaddr + p_off + done, copy);
  				done += copy;
  				d_off += copy;
  			}
c613c209c   Willem de Bruijn   net: add skb_frag...
1172
1173
  			kunmap_atomic(vaddr);
  		}
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1174
1175
1176
  	}
  
  	/* skb frags release userspace buffers */
02756ed4a   Krishna Kumar   skbuff: Use corre...
1177
  	for (i = 0; i < num_frags; i++)
a8605c606   Ian Campbell   net: add opaque s...
1178
  		skb_frag_unref(skb, i);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1179

a6686f2f3   Shirley Ma   skbuff: skb suppo...
1180
  	/* skb frags point to kernel buffers */
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1181
1182
  	for (i = 0; i < new_frags - 1; i++) {
  		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
40dadff26   Sunghan Suh   net: access page-...
1183
  		head = (struct page *)page_private(head);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1184
  	}
3ece78269   Willem de Bruijn   sock: skb_copy_ub...
1185
1186
  	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
  	skb_shinfo(skb)->nr_frags = new_frags;
48c830120   Michael S. Tsirkin   net: copy userspa...
1187

49cd180d4   Willem de Bruijn   skbuff: skb_copy_...
1188
  release:
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
1189
  	skb_zcopy_clear(skb, false);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1190
1191
  	return 0;
  }
dcc0fb782   Michael S. Tsirkin   skbuff: export sk...
1192
  EXPORT_SYMBOL_GPL(skb_copy_ubufs);
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1193

e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
  /**
   *	skb_clone	-	duplicate an sk_buff
   *	@skb: buffer to clone
   *	@gfp_mask: allocation priority
   *
   *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
   *	copies share the same packet data but not structure. The new
   *	buffer has a reference count of 1. If the allocation fails the
   *	function returns %NULL otherwise the new buffer is returned.
   *
   *	If this function is called from an interrupt gfp_mask() must be
   *	%GFP_ATOMIC.
   */
  
  struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
  {
d0bf4a9e9   Eric Dumazet   net: cleanup and ...
1210
1211
1212
  	struct sk_buff_fclones *fclones = container_of(skb,
  						       struct sk_buff_fclones,
  						       skb1);
6ffe75eb5   Eric Dumazet   net: avoid two at...
1213
  	struct sk_buff *n;
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1214

70008aa50   Michael S. Tsirkin   skbuff: convert t...
1215
1216
  	if (skb_orphan_frags(skb, gfp_mask))
  		return NULL;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1217

e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1218
  	if (skb->fclone == SKB_FCLONE_ORIG &&
2638595af   Reshetova, Elena   net: convert sk_b...
1219
  	    refcount_read(&fclones->fclone_ref) == 1) {
6ffe75eb5   Eric Dumazet   net: avoid two at...
1220
  		n = &fclones->skb2;
2638595af   Reshetova, Elena   net: convert sk_b...
1221
  		refcount_set(&fclones->fclone_ref, 2);
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1222
  	} else {
c93bdd0e0   Mel Gorman   netvm: allow skb ...
1223
1224
  		if (skb_pfmemalloc(skb))
  			gfp_mask |= __GFP_MEMALLOC;
e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1225
1226
1227
  		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  		if (!n)
  			return NULL;
fe55f6d5c   Vegard Nossum   net: use kmemchec...
1228

e0053ec07   Herbert Xu   [SKBUFF]: Add skb...
1229
1230
1231
1232
  		n->fclone = SKB_FCLONE_UNAVAILABLE;
  	}
  
  	return __skb_clone(n, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1233
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1234
  EXPORT_SYMBOL(skb_clone);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235

f5b172944   Pravin B Shelar   net: Add skb_head...
1236
1237
  static void skb_headers_offset_update(struct sk_buff *skb, int off)
  {
030737bcc   Eric Dumazet   net: generalize s...
1238
1239
1240
  	/* Only adjust this if it actually is csum_start rather than csum */
  	if (skb->ip_summed == CHECKSUM_PARTIAL)
  		skb->csum_start += off;
f5b172944   Pravin B Shelar   net: Add skb_head...
1241
1242
1243
1244
1245
1246
1247
  	/* {transport,network,mac}_header and tail are relative to skb->head */
  	skb->transport_header += off;
  	skb->network_header   += off;
  	if (skb_mac_header_was_set(skb))
  		skb->mac_header += off;
  	skb->inner_transport_header += off;
  	skb->inner_network_header += off;
aefbd2b3c   Pravin B Shelar   tunneling: Captur...
1248
  	skb->inner_mac_header += off;
f5b172944   Pravin B Shelar   net: Add skb_head...
1249
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1250
1251
  static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  {
dec18810c   Herbert Xu   [SKBUFF]: Merge c...
1252
  	__copy_skb_header(new, old);
7967168ce   Herbert Xu   [NET]: Merge TSO/...
1253
1254
1255
  	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
  	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
  	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1256
  }
c93bdd0e0   Mel Gorman   netvm: allow skb ...
1257
1258
1259
1260
1261
1262
  static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
  {
  	if (skb_pfmemalloc(skb))
  		return SKB_ALLOC_RX;
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
  /**
   *	skb_copy	-	create private copy of an sk_buff
   *	@skb: buffer to copy
   *	@gfp_mask: allocation priority
   *
   *	Make a copy of both an &sk_buff and its data. This is used when the
   *	caller wishes to modify the data and needs a private copy of the
   *	data to alter. Returns %NULL on failure or the pointer to the buffer
   *	on success. The returned buffer has a reference count of 1.
   *
   *	As by-product this function converts non-linear &sk_buff to linear
   *	one, so that &sk_buff becomes completely private and caller is allowed
   *	to modify all the data of returned buffer. This means that this
   *	function is not recommended for use in circumstances when only
   *	header is going to be modified. Use pskb_copy() instead.
   */
dd0fc66fb   Al Viro   [PATCH] gfp flags...
1279
  struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1280
  {
6602cebb5   Eric Dumazet   net: skbuff.c cle...
1281
  	int headerlen = skb_headroom(skb);
ec47ea824   Alexander Duyck   skb: Add inline h...
1282
  	unsigned int size = skb_end_offset(skb) + skb->data_len;
c93bdd0e0   Mel Gorman   netvm: allow skb ...
1283
1284
  	struct sk_buff *n = __alloc_skb(size, gfp_mask,
  					skb_alloc_rx_flag(skb), NUMA_NO_NODE);
6602cebb5   Eric Dumazet   net: skbuff.c cle...
1285

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1286
1287
1288
1289
1290
1291
1292
  	if (!n)
  		return NULL;
  
  	/* Set the data pointer */
  	skb_reserve(n, headerlen);
  	/* Set the tail pointer and length */
  	skb_put(n, skb->len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1293
1294
1295
1296
1297
1298
1299
  
  	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
  		BUG();
  
  	copy_skb_header(n, skb);
  	return n;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1300
  EXPORT_SYMBOL(skb_copy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
  
  /**
bad93e9d4   Octavian Purdila   net: add __pskb_c...
1303
   *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1304
   *	@skb: buffer to copy
117632e64   Eric Dumazet   tcp: take care of...
1305
   *	@headroom: headroom of new skb
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1306
   *	@gfp_mask: allocation priority
bad93e9d4   Octavian Purdila   net: add __pskb_c...
1307
1308
1309
   *	@fclone: if true allocate the copy of the skb from the fclone
   *	cache instead of the head cache; it is recommended to set this
   *	to true for the cases where the copy will likely be cloned
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1310
1311
1312
1313
1314
1315
1316
1317
   *
   *	Make a copy of both an &sk_buff and part of its data, located
   *	in header. Fragmented data remain shared. This is used when
   *	the caller wishes to modify only header of &sk_buff and needs
   *	private copy of the header to alter. Returns %NULL on failure
   *	or the pointer to the buffer on success.
   *	The returned buffer has a reference count of 1.
   */
bad93e9d4   Octavian Purdila   net: add __pskb_c...
1318
1319
  struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
  				   gfp_t gfp_mask, bool fclone)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
  {
117632e64   Eric Dumazet   tcp: take care of...
1321
  	unsigned int size = skb_headlen(skb) + headroom;
bad93e9d4   Octavian Purdila   net: add __pskb_c...
1322
1323
  	int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
  	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
6602cebb5   Eric Dumazet   net: skbuff.c cle...
1324

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1325
1326
1327
1328
  	if (!n)
  		goto out;
  
  	/* Set the data pointer */
117632e64   Eric Dumazet   tcp: take care of...
1329
  	skb_reserve(n, headroom);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1330
1331
1332
  	/* Set the tail pointer and length */
  	skb_put(n, skb_headlen(skb));
  	/* Copy the bytes */
d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1333
  	skb_copy_from_linear_data(skb, n->data, n->len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1334

25f484a62   Herbert Xu   [NET]: Set truesi...
1335
  	n->truesize += skb->data_len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1336
1337
1338
1339
1340
  	n->data_len  = skb->data_len;
  	n->len	     = skb->len;
  
  	if (skb_shinfo(skb)->nr_frags) {
  		int i;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
1341
1342
  		if (skb_orphan_frags(skb, gfp_mask) ||
  		    skb_zerocopy_clone(n, skb, gfp_mask)) {
70008aa50   Michael S. Tsirkin   skbuff: convert t...
1343
1344
1345
  			kfree_skb(n);
  			n = NULL;
  			goto out;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1346
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1347
1348
  		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
ea2ab6937   Ian Campbell   net: convert core...
1349
  			skb_frag_ref(skb, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1350
1351
1352
  		}
  		skb_shinfo(n)->nr_frags = i;
  	}
21dc33015   David S. Miller   net: Rename skb_h...
1353
  	if (skb_has_frag_list(skb)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1354
1355
1356
1357
1358
1359
1360
1361
  		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  		skb_clone_fraglist(n);
  	}
  
  	copy_skb_header(n, skb);
  out:
  	return n;
  }
bad93e9d4   Octavian Purdila   net: add __pskb_c...
1362
  EXPORT_SYMBOL(__pskb_copy_fclone);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363
1364
1365
1366
1367
1368
1369
1370
  
  /**
   *	pskb_expand_head - reallocate header of &sk_buff
   *	@skb: buffer to reallocate
   *	@nhead: room to add at head
   *	@ntail: room to add at tail
   *	@gfp_mask: allocation priority
   *
bc32383cd   Mathias Krause   net: skbuff - ker...
1371
1372
   *	Expands (or creates identical copy, if @nhead and @ntail are zero)
   *	header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373
1374
1375
1376
1377
1378
   *	reference count of 1. Returns zero in the case of success or error,
   *	if expansion failed. In the last case, &sk_buff is not changed.
   *
   *	All the pointers pointing into skb header may change and must be
   *	reloaded after call to this function.
   */
86a76caf8   Victor Fusco   [NET]: Fix sparse...
1379
  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
dd0fc66fb   Al Viro   [PATCH] gfp flags...
1380
  		     gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1381
  {
158f323b9   Eric Dumazet   net: adjust skb->...
1382
1383
  	int i, osize = skb_end_offset(skb);
  	int size = osize + nhead + ntail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384
  	long off;
158f323b9   Eric Dumazet   net: adjust skb->...
1385
  	u8 *data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386

4edd87ad5   Herbert Xu   net: BUG instead ...
1387
  	BUG_ON(nhead < 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1388
1389
1390
1391
  	if (skb_shared(skb))
  		BUG();
  
  	size = SKB_DATA_ALIGN(size);
c93bdd0e0   Mel Gorman   netvm: allow skb ...
1392
1393
1394
1395
  	if (skb_pfmemalloc(skb))
  		gfp_mask |= __GFP_MEMALLOC;
  	data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
  			       gfp_mask, NUMA_NO_NODE, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396
1397
  	if (!data)
  		goto nodata;
87151b868   Eric Dumazet   net: allow pskb_e...
1398
  	size = SKB_WITH_OVERHEAD(ksize(data));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1399
1400
  
  	/* Copy only real data... and, alas, header. This should be
6602cebb5   Eric Dumazet   net: skbuff.c cle...
1401
1402
1403
1404
1405
1406
  	 * optimized for the cases when header is void.
  	 */
  	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
  
  	memcpy((struct skb_shared_info *)(data + size),
  	       skb_shinfo(skb),
fed66381d   Eric Dumazet   net: pskb_expand_...
1407
  	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1408

3e24591a1   Alexander Duyck   skb: Drop "fastpa...
1409
1410
1411
1412
1413
1414
  	/*
  	 * if shinfo is shared we must drop the old head gracefully, but if it
  	 * is not we can just drop the old head and let the existing refcount
  	 * be since all we did is relocate the values
  	 */
  	if (skb_cloned(skb)) {
70008aa50   Michael S. Tsirkin   skbuff: convert t...
1415
1416
  		if (skb_orphan_frags(skb, gfp_mask))
  			goto nofrags;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
1417
  		if (skb_zcopy(skb))
c1d1b4378   Eric Dumazet   net: convert (str...
1418
  			refcount_inc(&skb_uarg(skb)->refcnt);
1fd63041c   Eric Dumazet   net: pskb_expand_...
1419
  		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
ea2ab6937   Ian Campbell   net: convert core...
1420
  			skb_frag_ref(skb, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1421

1fd63041c   Eric Dumazet   net: pskb_expand_...
1422
1423
  		if (skb_has_frag_list(skb))
  			skb_clone_fraglist(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1424

1fd63041c   Eric Dumazet   net: pskb_expand_...
1425
  		skb_release_data(skb);
3e24591a1   Alexander Duyck   skb: Drop "fastpa...
1426
1427
  	} else {
  		skb_free_head(skb);
1fd63041c   Eric Dumazet   net: pskb_expand_...
1428
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1429
1430
1431
  	off = (data + nhead) - skb->head;
  
  	skb->head     = data;
d3836f21b   Eric Dumazet   net: allow skb->h...
1432
  	skb->head_frag = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1433
  	skb->data    += off;
4305b5413   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1434
1435
  #ifdef NET_SKBUFF_DATA_USES_OFFSET
  	skb->end      = size;
56eb88828   Patrick McHardy   [SK_BUFF]: Fix mi...
1436
  	off           = nhead;
4305b5413   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1437
1438
  #else
  	skb->end      = skb->head + size;
56eb88828   Patrick McHardy   [SK_BUFF]: Fix mi...
1439
  #endif
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1440
  	skb->tail	      += off;
b41abb42b   Peter Pan(潘卫平)   net: pass correct...
1441
  	skb_headers_offset_update(skb, nhead);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1442
  	skb->cloned   = 0;
334a8132d   Patrick McHardy   [SKBUFF]: Keep tr...
1443
  	skb->hdr_len  = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1444
1445
  	skb->nohdr    = 0;
  	atomic_set(&skb_shinfo(skb)->dataref, 1);
158f323b9   Eric Dumazet   net: adjust skb->...
1446
1447
1448
1449
1450
1451
1452
  
  	/* It is not generally safe to change skb->truesize.
  	 * For the moment, we really care of rx path, or
  	 * when skb is orphaned (not attached to a socket).
  	 */
  	if (!skb->sk || skb->destructor == sock_edemux)
  		skb->truesize += size - osize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1453
  	return 0;
a6686f2f3   Shirley Ma   skbuff: skb suppo...
1454
1455
  nofrags:
  	kfree(data);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
1457
1458
  nodata:
  	return -ENOMEM;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1459
  EXPORT_SYMBOL(pskb_expand_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
  
  /* Make private copy of skb with writable head and some headroom */
  
  struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  {
  	struct sk_buff *skb2;
  	int delta = headroom - skb_headroom(skb);
  
  	if (delta <= 0)
  		skb2 = pskb_copy(skb, GFP_ATOMIC);
  	else {
  		skb2 = skb_clone(skb, GFP_ATOMIC);
  		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
  					     GFP_ATOMIC)) {
  			kfree_skb(skb2);
  			skb2 = NULL;
  		}
  	}
  	return skb2;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1480
  EXPORT_SYMBOL(skb_realloc_headroom);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
  
  /**
   *	skb_copy_expand	-	copy and expand sk_buff
   *	@skb: buffer to copy
   *	@newheadroom: new free bytes at head
   *	@newtailroom: new free bytes at tail
   *	@gfp_mask: allocation priority
   *
   *	Make a copy of both an &sk_buff and its data and while doing so
   *	allocate additional space.
   *
   *	This is used when the caller wishes to modify the data and needs a
   *	private copy of the data to alter as well as more space for new fields.
   *	Returns %NULL on failure or the pointer to the buffer
   *	on success. The returned buffer has a reference count of 1.
   *
   *	You must pass %GFP_ATOMIC as the allocation priority if this function
   *	is called from an interrupt.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1499
1500
   */
  struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
86a76caf8   Victor Fusco   [NET]: Fix sparse...
1501
  				int newheadroom, int newtailroom,
dd0fc66fb   Al Viro   [PATCH] gfp flags...
1502
  				gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1503
1504
1505
1506
  {
  	/*
  	 *	Allocate the copy buffer
  	 */
c93bdd0e0   Mel Gorman   netvm: allow skb ...
1507
1508
1509
  	struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
  					gfp_mask, skb_alloc_rx_flag(skb),
  					NUMA_NO_NODE);
efd1e8d56   Patrick McHardy   [SK_BUFF]: Fix mi...
1510
  	int oldheadroom = skb_headroom(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1511
1512
1513
1514
1515
1516
1517
1518
1519
  	int head_copy_len, head_copy_off;
  
  	if (!n)
  		return NULL;
  
  	skb_reserve(n, newheadroom);
  
  	/* Set the tail pointer and length */
  	skb_put(n, skb->len);
efd1e8d56   Patrick McHardy   [SK_BUFF]: Fix mi...
1520
  	head_copy_len = oldheadroom;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
  	head_copy_off = 0;
  	if (newheadroom <= head_copy_len)
  		head_copy_len = newheadroom;
  	else
  		head_copy_off = newheadroom - head_copy_len;
  
  	/* Copy the linear header and data. */
  	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
  			  skb->len + head_copy_len))
  		BUG();
  
  	copy_skb_header(n, skb);
030737bcc   Eric Dumazet   net: generalize s...
1533
  	skb_headers_offset_update(n, newheadroom - oldheadroom);
efd1e8d56   Patrick McHardy   [SK_BUFF]: Fix mi...
1534

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
1536
  	return n;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1537
  EXPORT_SYMBOL(skb_copy_expand);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1538
1539
  
  /**
cd0a137ac   Florian Fainelli   net: core: Specif...
1540
   *	__skb_pad		-	zero pad the tail of an skb
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1541
1542
   *	@skb: buffer to pad
   *	@pad: space to pad
cd0a137ac   Florian Fainelli   net: core: Specif...
1543
   *	@free_on_error: free buffer on error
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1544
1545
1546
1547
1548
   *
   *	Ensure that a buffer is followed by a padding area that is zero
   *	filled. Used by network drivers which may DMA or transfer data
   *	beyond the buffer end onto the wire.
   *
cd0a137ac   Florian Fainelli   net: core: Specif...
1549
1550
   *	May return error in out of memory cases. The skb is freed on error
   *	if @free_on_error is true.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
   */
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
1552

cd0a137ac   Florian Fainelli   net: core: Specif...
1553
  int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1554
  {
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1555
1556
  	int err;
  	int ntail;
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
1557

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558
  	/* If the skbuff is non linear tailroom is always zero.. */
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1559
  	if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1560
  		memset(skb->data+skb->len, 0, pad);
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1561
  		return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
  	}
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1563

4305b5413   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1564
  	ntail = skb->data_len + pad - (skb->end - skb->tail);
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
  	if (likely(skb_cloned(skb) || ntail > 0)) {
  		err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
  		if (unlikely(err))
  			goto free_skb;
  	}
  
  	/* FIXME: The use of this function with non-linear skb's really needs
  	 * to be audited.
  	 */
  	err = skb_linearize(skb);
  	if (unlikely(err))
  		goto free_skb;
  
  	memset(skb->data + skb->len, 0, pad);
  	return 0;
  
  free_skb:
cd0a137ac   Florian Fainelli   net: core: Specif...
1582
1583
  	if (free_on_error)
  		kfree_skb(skb);
5b057c6b1   Herbert Xu   [NET]: Avoid allo...
1584
  	return err;
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
1585
  }
cd0a137ac   Florian Fainelli   net: core: Specif...
1586
  EXPORT_SYMBOL(__skb_pad);
4ec93edb1   YOSHIFUJI Hideaki   [NET] CORE: Fix w...
1587

0dde3e164   Ilpo Järvinen   [NET]: uninline s...
1588
  /**
0c7ddf36c   Mathias Krause   net: move pskb_pu...
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
   *	pskb_put - add data to the tail of a potentially fragmented buffer
   *	@skb: start of the buffer to use
   *	@tail: tail fragment of the buffer to use
   *	@len: amount of data to add
   *
   *	This function extends the used data area of the potentially
   *	fragmented buffer. @tail must be the last fragment of @skb -- or
   *	@skb itself. If this would exceed the total buffer size the kernel
   *	will panic. A pointer to the first byte of the extra data is
   *	returned.
   */
4df864c1d   Johannes Berg   networking: make ...
1600
  void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
0c7ddf36c   Mathias Krause   net: move pskb_pu...
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
  {
  	if (tail != skb) {
  		skb->data_len += len;
  		skb->len += len;
  	}
  	return skb_put(tail, len);
  }
  EXPORT_SYMBOL_GPL(pskb_put);
  
  /**
0dde3e164   Ilpo Järvinen   [NET]: uninline s...
1611
1612
1613
1614
1615
1616
1617
1618
   *	skb_put - add data to a buffer
   *	@skb: buffer to use
   *	@len: amount of data to add
   *
   *	This function extends the used data area of the buffer. If this would
   *	exceed the total buffer size the kernel will panic. A pointer to the
   *	first byte of the extra data is returned.
   */
4df864c1d   Johannes Berg   networking: make ...
1619
  void *skb_put(struct sk_buff *skb, unsigned int len)
0dde3e164   Ilpo Järvinen   [NET]: uninline s...
1620
  {
4df864c1d   Johannes Berg   networking: make ...
1621
  	void *tmp = skb_tail_pointer(skb);
0dde3e164   Ilpo Järvinen   [NET]: uninline s...
1622
1623
1624
1625
1626
1627
1628
1629
  	SKB_LINEAR_ASSERT(skb);
  	skb->tail += len;
  	skb->len  += len;
  	if (unlikely(skb->tail > skb->end))
  		skb_over_panic(skb, len, __builtin_return_address(0));
  	return tmp;
  }
  EXPORT_SYMBOL(skb_put);
6be8ac2fd   Ilpo Järvinen   [NET]: uninline s...
1630
  /**
c2aa270ad   Ilpo Järvinen   [NET]: uninline s...
1631
1632
1633
1634
1635
1636
1637
1638
   *	skb_push - add data to the start of a buffer
   *	@skb: buffer to use
   *	@len: amount of data to add
   *
   *	This function extends the used data area of the buffer at the buffer
   *	start. If this would exceed the total buffer headroom the kernel will
   *	panic. A pointer to the first byte of the extra data is returned.
   */
d58ff3512   Johannes Berg   networking: make ...
1639
  void *skb_push(struct sk_buff *skb, unsigned int len)
c2aa270ad   Ilpo Järvinen   [NET]: uninline s...
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
  {
  	skb->data -= len;
  	skb->len  += len;
  	if (unlikely(skb->data<skb->head))
  		skb_under_panic(skb, len, __builtin_return_address(0));
  	return skb->data;
  }
  EXPORT_SYMBOL(skb_push);
  
  /**
6be8ac2fd   Ilpo Järvinen   [NET]: uninline s...
1650
1651
1652
1653
1654
1655
1656
1657
1658
   *	skb_pull - remove data from the start of a buffer
   *	@skb: buffer to use
   *	@len: amount of data to remove
   *
   *	This function removes data from the start of a buffer, returning
   *	the memory to the headroom. A pointer to the next data in the buffer
   *	is returned. Once the data has been pulled future pushes will overwrite
   *	the old data.
   */
af72868b9   Johannes Berg   networking: make ...
1659
  void *skb_pull(struct sk_buff *skb, unsigned int len)
6be8ac2fd   Ilpo Järvinen   [NET]: uninline s...
1660
  {
47d29646a   David S. Miller   net: Inline skb_p...
1661
  	return skb_pull_inline(skb, len);
6be8ac2fd   Ilpo Järvinen   [NET]: uninline s...
1662
1663
  }
  EXPORT_SYMBOL(skb_pull);
419ae74ec   Ilpo Järvinen   [NET]: uninline s...
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
  /**
   *	skb_trim - remove end from a buffer
   *	@skb: buffer to alter
   *	@len: new length
   *
   *	Cut the length of a buffer down by removing data from the tail. If
   *	the buffer is already under the length specified it is not modified.
   *	The skb must be linear.
   */
  void skb_trim(struct sk_buff *skb, unsigned int len)
  {
  	if (skb->len > len)
  		__skb_trim(skb, len);
  }
  EXPORT_SYMBOL(skb_trim);
3cc0e8739   Herbert Xu   [NET]: Warn in __...
1679
  /* Trims skb to length len. It can change skb pointers.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1680
   */
3cc0e8739   Herbert Xu   [NET]: Warn in __...
1681
  int ___pskb_trim(struct sk_buff *skb, unsigned int len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1682
  {
27b437c8b   Herbert Xu   [NET]: Update fra...
1683
1684
  	struct sk_buff **fragp;
  	struct sk_buff *frag;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1685
1686
1687
  	int offset = skb_headlen(skb);
  	int nfrags = skb_shinfo(skb)->nr_frags;
  	int i;
27b437c8b   Herbert Xu   [NET]: Update fra...
1688
1689
1690
1691
1692
  	int err;
  
  	if (skb_cloned(skb) &&
  	    unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
  		return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1693

f4d26fb33   Herbert Xu   [NET]: Fix ___psk...
1694
1695
1696
1697
1698
  	i = 0;
  	if (offset >= len)
  		goto drop_pages;
  
  	for (; i < nfrags; i++) {
9e903e085   Eric Dumazet   net: add skb frag...
1699
  		int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
27b437c8b   Herbert Xu   [NET]: Update fra...
1700
1701
1702
1703
1704
  
  		if (end < len) {
  			offset = end;
  			continue;
  		}
9e903e085   Eric Dumazet   net: add skb frag...
1705
  		skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
27b437c8b   Herbert Xu   [NET]: Update fra...
1706

f4d26fb33   Herbert Xu   [NET]: Fix ___psk...
1707
  drop_pages:
27b437c8b   Herbert Xu   [NET]: Update fra...
1708
1709
1710
  		skb_shinfo(skb)->nr_frags = i;
  
  		for (; i < nfrags; i++)
ea2ab6937   Ian Campbell   net: convert core...
1711
  			skb_frag_unref(skb, i);
27b437c8b   Herbert Xu   [NET]: Update fra...
1712

21dc33015   David S. Miller   net: Rename skb_h...
1713
  		if (skb_has_frag_list(skb))
27b437c8b   Herbert Xu   [NET]: Update fra...
1714
  			skb_drop_fraglist(skb);
f4d26fb33   Herbert Xu   [NET]: Fix ___psk...
1715
  		goto done;
27b437c8b   Herbert Xu   [NET]: Update fra...
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
  	}
  
  	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
  	     fragp = &frag->next) {
  		int end = offset + frag->len;
  
  		if (skb_shared(frag)) {
  			struct sk_buff *nfrag;
  
  			nfrag = skb_clone(frag, GFP_ATOMIC);
  			if (unlikely(!nfrag))
  				return -ENOMEM;
  
  			nfrag->next = frag->next;
85bb2a60f   Eric Dumazet   net: dont drop pa...
1730
  			consume_skb(frag);
27b437c8b   Herbert Xu   [NET]: Update fra...
1731
1732
  			frag = nfrag;
  			*fragp = frag;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1733
  		}
27b437c8b   Herbert Xu   [NET]: Update fra...
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
  
  		if (end < len) {
  			offset = end;
  			continue;
  		}
  
  		if (end > len &&
  		    unlikely((err = pskb_trim(frag, len - offset))))
  			return err;
  
  		if (frag->next)
  			skb_drop_list(&frag->next);
  		break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1747
  	}
f4d26fb33   Herbert Xu   [NET]: Fix ___psk...
1748
  done:
27b437c8b   Herbert Xu   [NET]: Update fra...
1749
  	if (len > skb_headlen(skb)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1750
1751
1752
  		skb->data_len -= skb->len - len;
  		skb->len       = len;
  	} else {
27b437c8b   Herbert Xu   [NET]: Update fra...
1753
1754
  		skb->len       = len;
  		skb->data_len  = 0;
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1755
  		skb_set_tail_pointer(skb, len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1756
  	}
c21b48cc1   Eric Dumazet   net: adjust skb->...
1757
1758
  	if (!skb->sk || skb->destructor == sock_edemux)
  		skb_condense(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1759
1760
  	return 0;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1761
  EXPORT_SYMBOL(___pskb_trim);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1762

6bf32cda4   Eric Dumazet   net: pskb_trim_rc...
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
  /* Note : use pskb_trim_rcsum() instead of calling this directly
   */
  int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
  {
  	if (skb->ip_summed == CHECKSUM_COMPLETE) {
  		int delta = skb->len - len;
  
  		skb->csum = csum_sub(skb->csum,
  				     skb_checksum(skb, len, delta, 0));
  	}
  	return __pskb_trim(skb, len);
  }
  EXPORT_SYMBOL(pskb_trim_rcsum_slow);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
  /**
   *	__pskb_pull_tail - advance tail of skb header
   *	@skb: buffer to reallocate
   *	@delta: number of bytes to advance tail
   *
   *	The function makes a sense only on a fragmented &sk_buff,
   *	it expands header moving its tail forward and copying necessary
   *	data from fragmented part.
   *
   *	&sk_buff MUST have reference count of 1.
   *
   *	Returns %NULL (and &sk_buff does not change) if pull failed
   *	or value of new tail of skb in the case of success.
   *
   *	All the pointers pointing into skb header may change and must be
   *	reloaded after call to this function.
   */
  
  /* Moves tail of skb head forward, copying data from fragmented part,
   * when it is necessary.
   * 1. It may fail due to malloc failure.
   * 2. It may change skb pointers.
   *
   * It is pretty complicated. Luckily, it is called only in exceptional cases.
   */
af72868b9   Johannes Berg   networking: make ...
1801
  void *__pskb_pull_tail(struct sk_buff *skb, int delta)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1802
1803
1804
1805
1806
  {
  	/* If skb has not enough free space at tail, get new one
  	 * plus 128 bytes for future expansions. If we have enough
  	 * room at tail, reallocate without expansion only if skb is cloned.
  	 */
4305b5413   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1807
  	int i, k, eat = (skb->tail + delta) - skb->end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1808
1809
1810
1811
1812
1813
  
  	if (eat > 0 || skb_cloned(skb)) {
  		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
  				     GFP_ATOMIC))
  			return NULL;
  	}
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1814
  	if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1815
1816
1817
1818
1819
  		BUG();
  
  	/* Optimization: no fragments, no reasons to preestimate
  	 * size of pulled pages. Superb.
  	 */
21dc33015   David S. Miller   net: Rename skb_h...
1820
  	if (!skb_has_frag_list(skb))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1821
1822
1823
1824
1825
  		goto pull_pages;
  
  	/* Estimate size of pulled pages. */
  	eat = delta;
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
9e903e085   Eric Dumazet   net: add skb frag...
1826
1827
1828
  		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
  
  		if (size >= eat)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1829
  			goto pull_pages;
9e903e085   Eric Dumazet   net: add skb frag...
1830
  		eat -= size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1831
1832
1833
  	}
  
  	/* If we need update frag list, we are in troubles.
09001b03f   Wenhua Shi   net: fix typo in ...
1834
  	 * Certainly, it is possible to add an offset to skb data,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
  	 * but taking into account that pulling is expected to
  	 * be very rare operation, it is worth to fight against
  	 * further bloating skb head and crucify ourselves here instead.
  	 * Pure masohism, indeed. 8)8)
  	 */
  	if (eat) {
  		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  		struct sk_buff *clone = NULL;
  		struct sk_buff *insp = NULL;
  
  		do {
09a626600   Kris Katterjohn   [NET]: Change som...
1846
  			BUG_ON(!list);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
  
  			if (list->len <= eat) {
  				/* Eaten as whole. */
  				eat -= list->len;
  				list = list->next;
  				insp = list;
  			} else {
  				/* Eaten partially. */
  
  				if (skb_shared(list)) {
  					/* Sucks! We need to fork list. :-( */
  					clone = skb_clone(list, GFP_ATOMIC);
  					if (!clone)
  						return NULL;
  					insp = list->next;
  					list = clone;
  				} else {
  					/* This may be pulled without
  					 * problems. */
  					insp = list;
  				}
  				if (!pskb_pull(list, eat)) {
f3fbbe0f6   Wei Yongjun   core: remove some...
1869
  					kfree_skb(clone);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
  					return NULL;
  				}
  				break;
  			}
  		} while (eat);
  
  		/* Free pulled out fragments. */
  		while ((list = skb_shinfo(skb)->frag_list) != insp) {
  			skb_shinfo(skb)->frag_list = list->next;
  			kfree_skb(list);
  		}
  		/* And insert new clone at head. */
  		if (clone) {
  			clone->next = list;
  			skb_shinfo(skb)->frag_list = clone;
  		}
  	}
  	/* Success! Now we may commit changes to skb data. */
  
  pull_pages:
  	eat = delta;
  	k = 0;
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
9e903e085   Eric Dumazet   net: add skb frag...
1893
1894
1895
  		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
  
  		if (size <= eat) {
ea2ab6937   Ian Campbell   net: convert core...
1896
  			skb_frag_unref(skb, i);
9e903e085   Eric Dumazet   net: add skb frag...
1897
  			eat -= size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1898
1899
1900
1901
  		} else {
  			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
  			if (eat) {
  				skb_shinfo(skb)->frags[k].page_offset += eat;
9e903e085   Eric Dumazet   net: add skb frag...
1902
  				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
3ccc6c6fa   linzhang   skbuff: optimize ...
1903
1904
  				if (!i)
  					goto end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1905
1906
1907
1908
1909
1910
  				eat = 0;
  			}
  			k++;
  		}
  	}
  	skb_shinfo(skb)->nr_frags = k;
3ccc6c6fa   linzhang   skbuff: optimize ...
1911
  end:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1912
1913
  	skb->tail     += delta;
  	skb->data_len -= delta;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
1914
1915
  	if (!skb->data_len)
  		skb_zcopy_clear(skb, false);
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1916
  	return skb_tail_pointer(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1917
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
1918
  EXPORT_SYMBOL(__pskb_pull_tail);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1919

22019b178   Eric Dumazet   net: add kerneldo...
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
  /**
   *	skb_copy_bits - copy bits from skb to kernel buffer
   *	@skb: source skb
   *	@offset: offset in source
   *	@to: destination buffer
   *	@len: number of bytes to copy
   *
   *	Copy the specified number of bytes from the source skb to the
   *	destination buffer.
   *
   *	CAUTION ! :
   *		If its prototype is ever changed,
   *		check arch/{*}/net/{*}.S files,
   *		since it is called from BPF assembly code.
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1935
1936
  int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
1937
  	int start = skb_headlen(skb);
fbb398a83   David S. Miller   net/core/skbuff.c...
1938
1939
  	struct sk_buff *frag_iter;
  	int i, copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1940
1941
1942
1943
1944
  
  	if (offset > (int)skb->len - len)
  		goto fault;
  
  	/* Copy header. */
1a028e507   David S. Miller   [NET]: Revert sk_...
1945
  	if ((copy = start - offset) > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1946
1947
  		if (copy > len)
  			copy = len;
d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1948
  		skb_copy_from_linear_data_offset(skb, offset, to, copy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1949
1950
1951
1952
1953
1954
1955
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  		to     += copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
1956
  		int end;
51c56b004   Eric Dumazet   net: remove k{un}...
1957
  		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1958

547b792ca   Ilpo Järvinen   net: convert BUG_...
1959
  		WARN_ON(start > offset + len);
1a028e507   David S. Miller   [NET]: Revert sk_...
1960

51c56b004   Eric Dumazet   net: remove k{un}...
1961
  		end = start + skb_frag_size(f);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1962
  		if ((copy = end - offset) > 0) {
c613c209c   Willem de Bruijn   net: add skb_frag...
1963
1964
  			u32 p_off, p_len, copied;
  			struct page *p;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1965
1966
1967
1968
  			u8 *vaddr;
  
  			if (copy > len)
  				copy = len;
c613c209c   Willem de Bruijn   net: add skb_frag...
1969
1970
1971
1972
1973
1974
1975
  			skb_frag_foreach_page(f,
  					      f->page_offset + offset - start,
  					      copy, p, p_off, p_len, copied) {
  				vaddr = kmap_atomic(p);
  				memcpy(to + copied, vaddr + p_off, p_len);
  				kunmap_atomic(vaddr);
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1976
1977
1978
1979
1980
1981
  
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			to     += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
1982
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1983
  	}
fbb398a83   David S. Miller   net/core/skbuff.c...
1984
1985
  	skb_walk_frags(skb, frag_iter) {
  		int end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1986

fbb398a83   David S. Miller   net/core/skbuff.c...
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_copy_bits(frag_iter, offset - start, to, copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			to     += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1999
  		}
fbb398a83   David S. Miller   net/core/skbuff.c...
2000
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2001
  	}
a6686f2f3   Shirley Ma   skbuff: skb suppo...
2002

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2003
2004
2005
2006
2007
2008
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2009
  EXPORT_SYMBOL(skb_copy_bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2010

9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2011
2012
2013
2014
2015
2016
  /*
   * Callback from splice_to_pipe(), if we need to release some pages
   * at the end of the spd in case we error'ed out in filling the pipe.
   */
  static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
  {
8b9d37289   Jarek Poplawski   net: Fix data cor...
2017
2018
  	put_page(spd->pages[i]);
  }
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2019

a108d5f35   David S. Miller   net: Use bool and...
2020
2021
  static struct page *linear_to_page(struct page *page, unsigned int *len,
  				   unsigned int *offset,
18aafc622   Eric Dumazet   net: splice: fix ...
2022
  				   struct sock *sk)
8b9d37289   Jarek Poplawski   net: Fix data cor...
2023
  {
5640f7685   Eric Dumazet   net: use a per ta...
2024
  	struct page_frag *pfrag = sk_page_frag(sk);
4fb669948   Jarek Poplawski   net: Optimize mem...
2025

5640f7685   Eric Dumazet   net: use a per ta...
2026
2027
  	if (!sk_page_frag_refill(sk, pfrag))
  		return NULL;
4fb669948   Jarek Poplawski   net: Optimize mem...
2028

5640f7685   Eric Dumazet   net: use a per ta...
2029
  	*len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
4fb669948   Jarek Poplawski   net: Optimize mem...
2030

5640f7685   Eric Dumazet   net: use a per ta...
2031
2032
2033
2034
  	memcpy(page_address(pfrag->page) + pfrag->offset,
  	       page_address(page) + *offset, *len);
  	*offset = pfrag->offset;
  	pfrag->offset += *len;
8b9d37289   Jarek Poplawski   net: Fix data cor...
2035

5640f7685   Eric Dumazet   net: use a per ta...
2036
  	return pfrag->page;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2037
  }
41c73a0d4   Eric Dumazet   net: speedup skb_...
2038
2039
2040
2041
2042
2043
2044
2045
2046
  static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
  			     struct page *page,
  			     unsigned int offset)
  {
  	return	spd->nr_pages &&
  		spd->pages[spd->nr_pages - 1] == page &&
  		(spd->partial[spd->nr_pages - 1].offset +
  		 spd->partial[spd->nr_pages - 1].len == offset);
  }
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2047
2048
2049
  /*
   * Fill page/offset/length into spd, if it can hold more pages.
   */
a108d5f35   David S. Miller   net: Use bool and...
2050
2051
2052
  static bool spd_fill_page(struct splice_pipe_desc *spd,
  			  struct pipe_inode_info *pipe, struct page *page,
  			  unsigned int *len, unsigned int offset,
18aafc622   Eric Dumazet   net: splice: fix ...
2053
  			  bool linear,
a108d5f35   David S. Miller   net: Use bool and...
2054
  			  struct sock *sk)
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2055
  {
41c73a0d4   Eric Dumazet   net: speedup skb_...
2056
  	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
a108d5f35   David S. Miller   net: Use bool and...
2057
  		return true;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2058

8b9d37289   Jarek Poplawski   net: Fix data cor...
2059
  	if (linear) {
18aafc622   Eric Dumazet   net: splice: fix ...
2060
  		page = linear_to_page(page, len, &offset, sk);
8b9d37289   Jarek Poplawski   net: Fix data cor...
2061
  		if (!page)
a108d5f35   David S. Miller   net: Use bool and...
2062
  			return true;
41c73a0d4   Eric Dumazet   net: speedup skb_...
2063
2064
2065
  	}
  	if (spd_can_coalesce(spd, page, offset)) {
  		spd->partial[spd->nr_pages - 1].len += *len;
a108d5f35   David S. Miller   net: Use bool and...
2066
  		return false;
41c73a0d4   Eric Dumazet   net: speedup skb_...
2067
2068
  	}
  	get_page(page);
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2069
  	spd->pages[spd->nr_pages] = page;
4fb669948   Jarek Poplawski   net: Optimize mem...
2070
  	spd->partial[spd->nr_pages].len = *len;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2071
  	spd->partial[spd->nr_pages].offset = offset;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2072
  	spd->nr_pages++;
8b9d37289   Jarek Poplawski   net: Fix data cor...
2073

a108d5f35   David S. Miller   net: Use bool and...
2074
  	return false;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2075
  }
a108d5f35   David S. Miller   net: Use bool and...
2076
2077
  static bool __splice_segment(struct page *page, unsigned int poff,
  			     unsigned int plen, unsigned int *off,
18aafc622   Eric Dumazet   net: splice: fix ...
2078
  			     unsigned int *len,
d7ccf7c0a   Eric Dumazet   net: make spd_fil...
2079
  			     struct splice_pipe_desc *spd, bool linear,
a108d5f35   David S. Miller   net: Use bool and...
2080
2081
  			     struct sock *sk,
  			     struct pipe_inode_info *pipe)
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2082
  {
2870c43d1   Octavian Purdila   net: refactor tcp...
2083
  	if (!*len)
a108d5f35   David S. Miller   net: Use bool and...
2084
  		return true;
2870c43d1   Octavian Purdila   net: refactor tcp...
2085
2086
2087
2088
  
  	/* skip this segment if already processed */
  	if (*off >= plen) {
  		*off -= plen;
a108d5f35   David S. Miller   net: Use bool and...
2089
  		return false;
db43a282d   Octavian Purdila   tcp: fix for spli...
2090
  	}
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2091

2870c43d1   Octavian Purdila   net: refactor tcp...
2092
  	/* ignore any bits we already processed */
9ca1b22d6   Eric Dumazet   net: splice: avoi...
2093
2094
2095
  	poff += *off;
  	plen -= *off;
  	*off = 0;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2096

18aafc622   Eric Dumazet   net: splice: fix ...
2097
2098
  	do {
  		unsigned int flen = min(*len, plen);
2870c43d1   Octavian Purdila   net: refactor tcp...
2099

18aafc622   Eric Dumazet   net: splice: fix ...
2100
2101
2102
2103
2104
2105
2106
  		if (spd_fill_page(spd, pipe, page, &flen, poff,
  				  linear, sk))
  			return true;
  		poff += flen;
  		plen -= flen;
  		*len -= flen;
  	} while (*len && plen);
2870c43d1   Octavian Purdila   net: refactor tcp...
2107

a108d5f35   David S. Miller   net: Use bool and...
2108
  	return false;
2870c43d1   Octavian Purdila   net: refactor tcp...
2109
2110
2111
  }
  
  /*
a108d5f35   David S. Miller   net: Use bool and...
2112
   * Map linear and fragment data from the skb to spd. It reports true if the
2870c43d1   Octavian Purdila   net: refactor tcp...
2113
2114
   * pipe is full or if we already spliced the requested length.
   */
a108d5f35   David S. Miller   net: Use bool and...
2115
2116
2117
  static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
  			      unsigned int *offset, unsigned int *len,
  			      struct splice_pipe_desc *spd, struct sock *sk)
2870c43d1   Octavian Purdila   net: refactor tcp...
2118
2119
  {
  	int seg;
fa9835e52   Tom Herbert   net: Walk fragmen...
2120
  	struct sk_buff *iter;
2870c43d1   Octavian Purdila   net: refactor tcp...
2121

1d0c0b328   Eric Dumazet   net: makes skb_sp...
2122
  	/* map the linear part :
2996d31f9   Alexander Duyck   net: Stop decapit...
2123
2124
2125
  	 * If skb->head_frag is set, this 'linear' part is backed by a
  	 * fragment, and if the head is not shared with any clones then
  	 * we can avoid a copy since we own the head portion of this page.
2870c43d1   Octavian Purdila   net: refactor tcp...
2126
2127
2128
2129
  	 */
  	if (__splice_segment(virt_to_page(skb->data),
  			     (unsigned long) skb->data & (PAGE_SIZE - 1),
  			     skb_headlen(skb),
18aafc622   Eric Dumazet   net: splice: fix ...
2130
  			     offset, len, spd,
3a7c1ee4a   Alexander Duyck   skb: Add skb_head...
2131
  			     skb_head_is_locked(skb),
1d0c0b328   Eric Dumazet   net: makes skb_sp...
2132
  			     sk, pipe))
a108d5f35   David S. Miller   net: Use bool and...
2133
  		return true;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2134
2135
2136
2137
  
  	/*
  	 * then map the fragments
  	 */
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2138
2139
  	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
  		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
ea2ab6937   Ian Campbell   net: convert core...
2140
  		if (__splice_segment(skb_frag_page(f),
9e903e085   Eric Dumazet   net: add skb frag...
2141
  				     f->page_offset, skb_frag_size(f),
18aafc622   Eric Dumazet   net: splice: fix ...
2142
  				     offset, len, spd, false, sk, pipe))
a108d5f35   David S. Miller   net: Use bool and...
2143
  			return true;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2144
  	}
fa9835e52   Tom Herbert   net: Walk fragmen...
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
  	skb_walk_frags(skb, iter) {
  		if (*offset >= iter->len) {
  			*offset -= iter->len;
  			continue;
  		}
  		/* __skb_splice_bits() only fails if the output has no room
  		 * left, so no point in going over the frag_list for the error
  		 * case.
  		 */
  		if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
  			return true;
  	}
a108d5f35   David S. Miller   net: Use bool and...
2157
  	return false;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2158
2159
2160
2161
  }
  
  /*
   * Map data from the skb to a pipe. Should handle both the linear part,
fa9835e52   Tom Herbert   net: Walk fragmen...
2162
   * the fragments, and the frag list.
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2163
   */
a60e3cc7c   Hannes Frederic Sowa   net: make skb_spl...
2164
  int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2165
  		    struct pipe_inode_info *pipe, unsigned int tlen,
25869262e   Al Viro   skb_splice_bits()...
2166
  		    unsigned int flags)
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2167
  {
41c73a0d4   Eric Dumazet   net: speedup skb_...
2168
2169
  	struct partial_page partial[MAX_SKB_FRAGS];
  	struct page *pages[MAX_SKB_FRAGS];
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2170
2171
2172
  	struct splice_pipe_desc spd = {
  		.pages = pages,
  		.partial = partial,
047fe3605   Eric Dumazet   splice: fix racy ...
2173
  		.nr_pages_max = MAX_SKB_FRAGS,
28a625cbc   Miklos Szeredi   fuse: fix pipe_bu...
2174
  		.ops = &nosteal_pipe_buf_ops,
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2175
2176
  		.spd_release = sock_spd_release,
  	};
35f3d14db   Jens Axboe   pipe: add support...
2177
  	int ret = 0;
fa9835e52   Tom Herbert   net: Walk fragmen...
2178
  	__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2179

a60e3cc7c   Hannes Frederic Sowa   net: make skb_spl...
2180
  	if (spd.nr_pages)
25869262e   Al Viro   skb_splice_bits()...
2181
  		ret = splice_to_pipe(pipe, &spd);
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2182

35f3d14db   Jens Axboe   pipe: add support...
2183
  	return ret;
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2184
  }
2b514574f   Hannes Frederic Sowa   net: af_unix: imp...
2185
  EXPORT_SYMBOL_GPL(skb_splice_bits);
9c55e01c0   Jens Axboe   [TCP]: Splice rec...
2186

20bf50de3   Tom Herbert   skbuff: Function ...
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
  /* Send skb data on a socket. Socket must be locked. */
  int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
  			 int len)
  {
  	unsigned int orig_len = len;
  	struct sk_buff *head = skb;
  	unsigned short fragidx;
  	int slen, ret;
  
  do_frag_list:
  
  	/* Deal with head data */
  	while (offset < skb_headlen(skb) && len) {
  		struct kvec kv;
  		struct msghdr msg;
  
  		slen = min_t(int, len, skb_headlen(skb) - offset);
  		kv.iov_base = skb->data + offset;
db5980d80   John Fastabend   net: fixes for sk...
2205
  		kv.iov_len = slen;
20bf50de3   Tom Herbert   skbuff: Function ...
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
  		memset(&msg, 0, sizeof(msg));
  
  		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
  		if (ret <= 0)
  			goto error;
  
  		offset += ret;
  		len -= ret;
  	}
  
  	/* All the data was skb head? */
  	if (!len)
  		goto out;
  
  	/* Make offset relative to start of frags */
  	offset -= skb_headlen(skb);
  
  	/* Find where we are in frag list */
  	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
  		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
  
  		if (offset < frag->size)
  			break;
  
  		offset -= frag->size;
  	}
  
  	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
  		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
  
  		slen = min_t(size_t, len, frag->size - offset);
  
  		while (slen) {
  			ret = kernel_sendpage_locked(sk, frag->page.p,
  						     frag->page_offset + offset,
  						     slen, MSG_DONTWAIT);
  			if (ret <= 0)
  				goto error;
  
  			len -= ret;
  			offset += ret;
  			slen -= ret;
  		}
  
  		offset = 0;
  	}
  
  	if (len) {
  		/* Process any frag lists */
  
  		if (skb == head) {
  			if (skb_has_frag_list(skb)) {
  				skb = skb_shinfo(skb)->frag_list;
  				goto do_frag_list;
  			}
  		} else if (skb->next) {
  			skb = skb->next;
  			goto do_frag_list;
  		}
  	}
  
  out:
  	return orig_len - len;
  
  error:
  	return orig_len == len ? ret : orig_len - len;
  }
  EXPORT_SYMBOL_GPL(skb_send_sock_locked);
  
  /* Send skb data on a socket. */
  int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
  {
  	int ret = 0;
  
  	lock_sock(sk);
  	ret = skb_send_sock_locked(sk, skb, offset, len);
  	release_sock(sk);
  
  	return ret;
  }
  EXPORT_SYMBOL_GPL(skb_send_sock);
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
  /**
   *	skb_store_bits - store bits from kernel buffer to skb
   *	@skb: destination buffer
   *	@offset: offset in destination
   *	@from: source buffer
   *	@len: number of bytes to copy
   *
   *	Copy the specified number of bytes from the source buffer to the
   *	destination skb.  This function handles all the messy bits of
   *	traversing fragment lists and such.
   */
0c6fcc8a8   Stephen Hemminger   [NET] skbuff: skb...
2298
  int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2299
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
2300
  	int start = skb_headlen(skb);
fbb398a83   David S. Miller   net/core/skbuff.c...
2301
2302
  	struct sk_buff *frag_iter;
  	int i, copy;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2303
2304
2305
  
  	if (offset > (int)skb->len - len)
  		goto fault;
1a028e507   David S. Miller   [NET]: Revert sk_...
2306
  	if ((copy = start - offset) > 0) {
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2307
2308
  		if (copy > len)
  			copy = len;
27d7ff46a   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
2309
  		skb_copy_to_linear_data_offset(skb, offset, from, copy);
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2310
2311
2312
2313
2314
2315
2316
2317
  		if ((len -= copy) == 0)
  			return 0;
  		offset += copy;
  		from += copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1a028e507   David S. Miller   [NET]: Revert sk_...
2318
  		int end;
547b792ca   Ilpo Järvinen   net: convert BUG_...
2319
  		WARN_ON(start > offset + len);
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2320

9e903e085   Eric Dumazet   net: add skb frag...
2321
  		end = start + skb_frag_size(frag);
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2322
  		if ((copy = end - offset) > 0) {
c613c209c   Willem de Bruijn   net: add skb_frag...
2323
2324
  			u32 p_off, p_len, copied;
  			struct page *p;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2325
2326
2327
2328
  			u8 *vaddr;
  
  			if (copy > len)
  				copy = len;
c613c209c   Willem de Bruijn   net: add skb_frag...
2329
2330
2331
2332
2333
2334
2335
  			skb_frag_foreach_page(frag,
  					      frag->page_offset + offset - start,
  					      copy, p, p_off, p_len, copied) {
  				vaddr = kmap_atomic(p);
  				memcpy(vaddr + p_off, from + copied, p_len);
  				kunmap_atomic(vaddr);
  			}
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2336
2337
2338
2339
2340
2341
  
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			from += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
2342
  		start = end;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2343
  	}
fbb398a83   David S. Miller   net/core/skbuff.c...
2344
2345
  	skb_walk_frags(skb, frag_iter) {
  		int end;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2346

fbb398a83   David S. Miller   net/core/skbuff.c...
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			if (skb_store_bits(frag_iter, offset - start,
  					   from, copy))
  				goto fault;
  			if ((len -= copy) == 0)
  				return 0;
  			offset += copy;
  			from += copy;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2360
  		}
fbb398a83   David S. Miller   net/core/skbuff.c...
2361
  		start = end;
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2362
2363
2364
2365
2366
2367
2368
  	}
  	if (!len)
  		return 0;
  
  fault:
  	return -EFAULT;
  }
357b40a18   Herbert Xu   [IPV6]: IPV6_CHEC...
2369
  EXPORT_SYMBOL(skb_store_bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2370
  /* Checksum skb data. */
2817a336d   Daniel Borkmann   net: skb_checksum...
2371
2372
  __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
  		      __wsum csum, const struct skb_checksum_ops *ops)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2373
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
2374
2375
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
fbb398a83   David S. Miller   net/core/skbuff.c...
2376
  	struct sk_buff *frag_iter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2377
2378
2379
2380
2381
2382
  	int pos = 0;
  
  	/* Checksum header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
2817a336d   Daniel Borkmann   net: skb_checksum...
2383
  		csum = ops->update(skb->data + offset, copy, csum);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2384
2385
2386
2387
2388
2389
2390
  		if ((len -= copy) == 0)
  			return csum;
  		offset += copy;
  		pos	= copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
2391
  		int end;
51c56b004   Eric Dumazet   net: remove k{un}...
2392
  		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1a028e507   David S. Miller   [NET]: Revert sk_...
2393

547b792ca   Ilpo Järvinen   net: convert BUG_...
2394
  		WARN_ON(start > offset + len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2395

51c56b004   Eric Dumazet   net: remove k{un}...
2396
  		end = start + skb_frag_size(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2397
  		if ((copy = end - offset) > 0) {
c613c209c   Willem de Bruijn   net: add skb_frag...
2398
2399
  			u32 p_off, p_len, copied;
  			struct page *p;
44bb93633   Al Viro   [NET]: Annotate c...
2400
  			__wsum csum2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2401
  			u8 *vaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2402
2403
2404
  
  			if (copy > len)
  				copy = len;
c613c209c   Willem de Bruijn   net: add skb_frag...
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
  
  			skb_frag_foreach_page(frag,
  					      frag->page_offset + offset - start,
  					      copy, p, p_off, p_len, copied) {
  				vaddr = kmap_atomic(p);
  				csum2 = ops->update(vaddr + p_off, p_len, 0);
  				kunmap_atomic(vaddr);
  				csum = ops->combine(csum, csum2, pos, p_len);
  				pos += p_len;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2415
2416
2417
  			if (!(len -= copy))
  				return csum;
  			offset += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2418
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
2419
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2420
  	}
fbb398a83   David S. Miller   net/core/skbuff.c...
2421
2422
  	skb_walk_frags(skb, frag_iter) {
  		int end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2423

fbb398a83   David S. Miller   net/core/skbuff.c...
2424
2425
2426
2427
2428
2429
2430
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			__wsum csum2;
  			if (copy > len)
  				copy = len;
2817a336d   Daniel Borkmann   net: skb_checksum...
2431
2432
2433
  			csum2 = __skb_checksum(frag_iter, offset - start,
  					       copy, 0, ops);
  			csum = ops->combine(csum, csum2, pos, copy);
fbb398a83   David S. Miller   net/core/skbuff.c...
2434
2435
2436
2437
  			if ((len -= copy) == 0)
  				return csum;
  			offset += copy;
  			pos    += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2438
  		}
fbb398a83   David S. Miller   net/core/skbuff.c...
2439
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2440
  	}
09a626600   Kris Katterjohn   [NET]: Change som...
2441
  	BUG_ON(len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2442
2443
2444
  
  	return csum;
  }
2817a336d   Daniel Borkmann   net: skb_checksum...
2445
2446
2447
2448
2449
2450
  EXPORT_SYMBOL(__skb_checksum);
  
  __wsum skb_checksum(const struct sk_buff *skb, int offset,
  		    int len, __wsum csum)
  {
  	const struct skb_checksum_ops ops = {
cea80ea8d   Daniel Borkmann   net: checksum: fi...
2451
  		.update  = csum_partial_ext,
2817a336d   Daniel Borkmann   net: skb_checksum...
2452
2453
2454
2455
2456
  		.combine = csum_block_add_ext,
  	};
  
  	return __skb_checksum(skb, offset, len, csum, &ops);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2457
  EXPORT_SYMBOL(skb_checksum);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2458
2459
  
  /* Both of above in one bottle. */
81d776627   Al Viro   [NET]: Annotate s...
2460
2461
  __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
  				    u8 *to, int len, __wsum csum)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2462
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
2463
2464
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
fbb398a83   David S. Miller   net/core/skbuff.c...
2465
  	struct sk_buff *frag_iter;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
  	int pos = 0;
  
  	/* Copy header. */
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
  		csum = csum_partial_copy_nocheck(skb->data + offset, to,
  						 copy, csum);
  		if ((len -= copy) == 0)
  			return csum;
  		offset += copy;
  		to     += copy;
  		pos	= copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
2482
  		int end;
547b792ca   Ilpo Järvinen   net: convert BUG_...
2483
  		WARN_ON(start > offset + len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2484

9e903e085   Eric Dumazet   net: add skb frag...
2485
  		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2486
  		if ((copy = end - offset) > 0) {
c613c209c   Willem de Bruijn   net: add skb_frag...
2487
2488
2489
  			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  			u32 p_off, p_len, copied;
  			struct page *p;
5084205fa   Al Viro   [NET]: Annotate c...
2490
  			__wsum csum2;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2491
  			u8 *vaddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2492
2493
2494
  
  			if (copy > len)
  				copy = len;
c613c209c   Willem de Bruijn   net: add skb_frag...
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
  
  			skb_frag_foreach_page(frag,
  					      frag->page_offset + offset - start,
  					      copy, p, p_off, p_len, copied) {
  				vaddr = kmap_atomic(p);
  				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
  								  to + copied,
  								  p_len, 0);
  				kunmap_atomic(vaddr);
  				csum = csum_block_add(csum, csum2, pos);
  				pos += p_len;
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2507
2508
2509
2510
  			if (!(len -= copy))
  				return csum;
  			offset += copy;
  			to     += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2511
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
2512
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2513
  	}
fbb398a83   David S. Miller   net/core/skbuff.c...
2514
2515
2516
  	skb_walk_frags(skb, frag_iter) {
  		__wsum csum2;
  		int end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2517

fbb398a83   David S. Miller   net/core/skbuff.c...
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
  		WARN_ON(start > offset + len);
  
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
  			if (copy > len)
  				copy = len;
  			csum2 = skb_copy_and_csum_bits(frag_iter,
  						       offset - start,
  						       to, copy, 0);
  			csum = csum_block_add(csum, csum2, pos);
  			if ((len -= copy) == 0)
  				return csum;
  			offset += copy;
  			to     += copy;
  			pos    += copy;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2533
  		}
fbb398a83   David S. Miller   net/core/skbuff.c...
2534
  		start = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2535
  	}
09a626600   Kris Katterjohn   [NET]: Change som...
2536
  	BUG_ON(len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2537
2538
  	return csum;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2539
  EXPORT_SYMBOL(skb_copy_and_csum_bits);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2540

9617813db   Davide Caratti   skbuff: add stub ...
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
  static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
  {
  	net_warn_ratelimited(
  		"%s: attempt to compute crc32c without libcrc32c.ko
  ",
  		__func__);
  	return 0;
  }
  
  static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
  				       int offset, int len)
  {
  	net_warn_ratelimited(
  		"%s: attempt to compute crc32c without libcrc32c.ko
  ",
  		__func__);
  	return 0;
  }
  
  static const struct skb_checksum_ops default_crc32c_ops = {
  	.update  = warn_crc32c_csum_update,
  	.combine = warn_crc32c_csum_combine,
  };
  
  const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
  	&default_crc32c_ops;
  EXPORT_SYMBOL(crc32c_csum_stub);
af2806f8f   Thomas Graf   net: Export skb_z...
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
   /**
   *	skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
   *	@from: source buffer
   *
   *	Calculates the amount of linear headroom needed in the 'to' skb passed
   *	into skb_zerocopy().
   */
  unsigned int
  skb_zerocopy_headlen(const struct sk_buff *from)
  {
  	unsigned int hlen = 0;
  
  	if (!from->head_frag ||
  	    skb_headlen(from) < L1_CACHE_BYTES ||
  	    skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
  		hlen = skb_headlen(from);
  
  	if (skb_has_frag_list(from))
  		hlen = from->len;
  
  	return hlen;
  }
  EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
  
  /**
   *	skb_zerocopy - Zero copy skb to skb
   *	@to: destination buffer
7fceb4de7   Masanari Iida   net: Fix warning ...
2595
   *	@from: source buffer
af2806f8f   Thomas Graf   net: Export skb_z...
2596
2597
2598
2599
2600
2601
2602
2603
   *	@len: number of bytes to copy from source buffer
   *	@hlen: size of linear headroom in destination buffer
   *
   *	Copies up to `len` bytes from `from` to `to` by creating references
   *	to the frags in the source buffer.
   *
   *	The `hlen` as calculated by skb_zerocopy_headlen() specifies the
   *	headroom in the `to` buffer.
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2604
2605
2606
2607
2608
   *
   *	Return value:
   *	0: everything is OK
   *	-ENOMEM: couldn't orphan frags of @from due to lack of memory
   *	-EFAULT: skb_copy_bits() found some problem with skb geometry
af2806f8f   Thomas Graf   net: Export skb_z...
2609
   */
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2610
2611
  int
  skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
af2806f8f   Thomas Graf   net: Export skb_z...
2612
2613
2614
  {
  	int i, j = 0;
  	int plen = 0; /* length of skb->head fragment */
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2615
  	int ret;
af2806f8f   Thomas Graf   net: Export skb_z...
2616
2617
2618
2619
2620
2621
  	struct page *page;
  	unsigned int offset;
  
  	BUG_ON(!from->head_frag && !hlen);
  
  	/* dont bother with small payloads */
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2622
2623
  	if (len <= skb_tailroom(to))
  		return skb_copy_bits(from, 0, skb_put(to, len), len);
af2806f8f   Thomas Graf   net: Export skb_z...
2624
2625
  
  	if (hlen) {
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2626
2627
2628
  		ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
  		if (unlikely(ret))
  			return ret;
af2806f8f   Thomas Graf   net: Export skb_z...
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
  		len -= hlen;
  	} else {
  		plen = min_t(int, skb_headlen(from), len);
  		if (plen) {
  			page = virt_to_head_page(from->head);
  			offset = from->data - (unsigned char *)page_address(page);
  			__skb_fill_page_desc(to, 0, page, offset, plen);
  			get_page(page);
  			j = 1;
  			len -= plen;
  		}
  	}
  
  	to->truesize += len + plen;
  	to->len += len + plen;
  	to->data_len += len + plen;
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2645
2646
2647
2648
  	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
  		skb_tx_error(from);
  		return -ENOMEM;
  	}
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
2649
  	skb_zerocopy_clone(to, from, GFP_ATOMIC);
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2650

af2806f8f   Thomas Graf   net: Export skb_z...
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
  	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
  		if (!len)
  			break;
  		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
  		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
  		len -= skb_shinfo(to)->frags[j].size;
  		skb_frag_ref(to, j);
  		j++;
  	}
  	skb_shinfo(to)->nr_frags = j;
36d5fe6a0   Zoltan Kiss   core, nfqueue, op...
2661
2662
  
  	return 0;
af2806f8f   Thomas Graf   net: Export skb_z...
2663
2664
  }
  EXPORT_SYMBOL_GPL(skb_zerocopy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2665
2666
  void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
  {
d3bc23e7e   Al Viro   [NET]: Annotate c...
2667
  	__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2668
  	long csstart;
84fa7933a   Patrick McHardy   [NET]: Replace CH...
2669
  	if (skb->ip_summed == CHECKSUM_PARTIAL)
55508d601   Michał Mirosław   net: Use skb_chec...
2670
  		csstart = skb_checksum_start_offset(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2671
2672
  	else
  		csstart = skb_headlen(skb);
09a626600   Kris Katterjohn   [NET]: Change som...
2673
  	BUG_ON(csstart > skb_headlen(skb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2674

d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
2675
  	skb_copy_from_linear_data(skb, to, csstart);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2676
2677
2678
2679
2680
  
  	csum = 0;
  	if (csstart != skb->len)
  		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
  					      skb->len - csstart, 0);
84fa7933a   Patrick McHardy   [NET]: Replace CH...
2681
  	if (skb->ip_summed == CHECKSUM_PARTIAL) {
ff1dcadb1   Al Viro   [NET]: Split skb-...
2682
  		long csstuff = csstart + skb->csum_offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2683

d3bc23e7e   Al Viro   [NET]: Annotate c...
2684
  		*((__sum16 *)(to + csstuff)) = csum_fold(csum);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2685
2686
  	}
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2687
  EXPORT_SYMBOL(skb_copy_and_csum_dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
  
  /**
   *	skb_dequeue - remove from the head of the queue
   *	@list: list to dequeue from
   *
   *	Remove the head of the list. The list lock is taken so the function
   *	may be used safely with other locking list functions. The head item is
   *	returned or %NULL if the list is empty.
   */
  
  struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  {
  	unsigned long flags;
  	struct sk_buff *result;
  
  	spin_lock_irqsave(&list->lock, flags);
  	result = __skb_dequeue(list);
  	spin_unlock_irqrestore(&list->lock, flags);
  	return result;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2708
  EXPORT_SYMBOL(skb_dequeue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
  
  /**
   *	skb_dequeue_tail - remove from the tail of the queue
   *	@list: list to dequeue from
   *
   *	Remove the tail of the list. The list lock is taken so the function
   *	may be used safely with other locking list functions. The tail item is
   *	returned or %NULL if the list is empty.
   */
  struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  {
  	unsigned long flags;
  	struct sk_buff *result;
  
  	spin_lock_irqsave(&list->lock, flags);
  	result = __skb_dequeue_tail(list);
  	spin_unlock_irqrestore(&list->lock, flags);
  	return result;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2728
  EXPORT_SYMBOL(skb_dequeue_tail);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
  
  /**
   *	skb_queue_purge - empty a list
   *	@list: list to empty
   *
   *	Delete all buffers on an &sk_buff list. Each buffer is removed from
   *	the list and one reference dropped. This function takes the list
   *	lock and is atomic with respect to other list locking functions.
   */
  void skb_queue_purge(struct sk_buff_head *list)
  {
  	struct sk_buff *skb;
  	while ((skb = skb_dequeue(list)) != NULL)
  		kfree_skb(skb);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2744
  EXPORT_SYMBOL(skb_queue_purge);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2745
2746
  
  /**
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2747
2748
   *	skb_rbtree_purge - empty a skb rbtree
   *	@root: root of the rbtree to empty
3bde783ec   Peter Oskolkov   net: modify skb_r...
2749
   *	Return value: the sum of truesizes of all purged skbs.
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2750
2751
2752
2753
2754
2755
   *
   *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
   *	the list and one reference dropped. This function does not take
   *	any lock. Synchronization should be handled by the caller (e.g., TCP
   *	out-of-order queue is protected by the socket lock).
   */
3bde783ec   Peter Oskolkov   net: modify skb_r...
2756
  unsigned int skb_rbtree_purge(struct rb_root *root)
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2757
  {
7750c414b   Eric Dumazet   net: speed up skb...
2758
  	struct rb_node *p = rb_first(root);
3bde783ec   Peter Oskolkov   net: modify skb_r...
2759
  	unsigned int sum = 0;
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2760

7750c414b   Eric Dumazet   net: speed up skb...
2761
2762
  	while (p) {
  		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2763

7750c414b   Eric Dumazet   net: speed up skb...
2764
2765
  		p = rb_next(p);
  		rb_erase(&skb->rbnode, root);
3bde783ec   Peter Oskolkov   net: modify skb_r...
2766
  		sum += skb->truesize;
7750c414b   Eric Dumazet   net: speed up skb...
2767
2768
  		kfree_skb(skb);
  	}
3bde783ec   Peter Oskolkov   net: modify skb_r...
2769
  	return sum;
9f5afeae5   Yaogong Wang   tcp: use an RB tr...
2770
2771
2772
  }
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
   *	skb_queue_head - queue a buffer at the list head
   *	@list: list to use
   *	@newsk: buffer to queue
   *
   *	Queue a buffer at the start of the list. This function takes the
   *	list lock and can be used safely with other locking &sk_buff functions
   *	safely.
   *
   *	A buffer cannot be placed on two lists at the same time.
   */
  void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  {
  	unsigned long flags;
  
  	spin_lock_irqsave(&list->lock, flags);
  	__skb_queue_head(list, newsk);
  	spin_unlock_irqrestore(&list->lock, flags);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2791
  EXPORT_SYMBOL(skb_queue_head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
  
  /**
   *	skb_queue_tail - queue a buffer at the list tail
   *	@list: list to use
   *	@newsk: buffer to queue
   *
   *	Queue a buffer at the tail of the list. This function takes the
   *	list lock and can be used safely with other locking &sk_buff functions
   *	safely.
   *
   *	A buffer cannot be placed on two lists at the same time.
   */
  void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  {
  	unsigned long flags;
  
  	spin_lock_irqsave(&list->lock, flags);
  	__skb_queue_tail(list, newsk);
  	spin_unlock_irqrestore(&list->lock, flags);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2812
  EXPORT_SYMBOL(skb_queue_tail);
8728b834b   David S. Miller   [NET]: Kill skb->...
2813

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2814
2815
2816
  /**
   *	skb_unlink	-	remove a buffer from a list
   *	@skb: buffer to remove
8728b834b   David S. Miller   [NET]: Kill skb->...
2817
   *	@list: list to use
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2818
   *
8728b834b   David S. Miller   [NET]: Kill skb->...
2819
2820
   *	Remove a packet from a list. The list locks are taken and this
   *	function is atomic with respect to other list locked calls
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2821
   *
8728b834b   David S. Miller   [NET]: Kill skb->...
2822
   *	You must know what list the SKB is on.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2823
   */
8728b834b   David S. Miller   [NET]: Kill skb->...
2824
  void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2825
  {
8728b834b   David S. Miller   [NET]: Kill skb->...
2826
  	unsigned long flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2827

8728b834b   David S. Miller   [NET]: Kill skb->...
2828
2829
2830
  	spin_lock_irqsave(&list->lock, flags);
  	__skb_unlink(skb, list);
  	spin_unlock_irqrestore(&list->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2831
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2832
  EXPORT_SYMBOL(skb_unlink);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2833

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2834
2835
2836
2837
  /**
   *	skb_append	-	append a buffer
   *	@old: buffer to insert after
   *	@newsk: buffer to insert
8728b834b   David S. Miller   [NET]: Kill skb->...
2838
   *	@list: list to use
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2839
2840
2841
2842
2843
   *
   *	Place a packet after a given packet in a list. The list locks are taken
   *	and this function is atomic with respect to other list locked calls.
   *	A buffer cannot be placed on two lists at the same time.
   */
8728b834b   David S. Miller   [NET]: Kill skb->...
2844
  void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2845
2846
  {
  	unsigned long flags;
8728b834b   David S. Miller   [NET]: Kill skb->...
2847
  	spin_lock_irqsave(&list->lock, flags);
7de6c0333   Gerrit Renker   [SKB]: __skb_appe...
2848
  	__skb_queue_after(list, old, newsk);
8728b834b   David S. Miller   [NET]: Kill skb->...
2849
  	spin_unlock_irqrestore(&list->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2850
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2851
  EXPORT_SYMBOL(skb_append);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2852
2853
2854
2855
2856
  
  /**
   *	skb_insert	-	insert a buffer
   *	@old: buffer to insert before
   *	@newsk: buffer to insert
8728b834b   David S. Miller   [NET]: Kill skb->...
2857
2858
2859
2860
2861
   *	@list: list to use
   *
   *	Place a packet before a given packet in a list. The list locks are
   * 	taken and this function is atomic with respect to other list locked
   *	calls.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2862
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2863
2864
   *	A buffer cannot be placed on two lists at the same time.
   */
8728b834b   David S. Miller   [NET]: Kill skb->...
2865
  void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2866
2867
  {
  	unsigned long flags;
8728b834b   David S. Miller   [NET]: Kill skb->...
2868
2869
2870
  	spin_lock_irqsave(&list->lock, flags);
  	__skb_insert(newsk, old->prev, old, list);
  	spin_unlock_irqrestore(&list->lock, flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2871
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2872
  EXPORT_SYMBOL(skb_insert);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2873

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2874
2875
2876
2877
2878
  static inline void skb_split_inside_header(struct sk_buff *skb,
  					   struct sk_buff* skb1,
  					   const u32 len, const int pos)
  {
  	int i;
d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
2879
2880
  	skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
  					 pos - len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
  	/* And move data appendix as is. */
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
  
  	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
  	skb_shinfo(skb)->nr_frags  = 0;
  	skb1->data_len		   = skb->data_len;
  	skb1->len		   += skb1->data_len;
  	skb->data_len		   = 0;
  	skb->len		   = len;
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
2891
  	skb_set_tail_pointer(skb, len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
  }
  
  static inline void skb_split_no_header(struct sk_buff *skb,
  				       struct sk_buff* skb1,
  				       const u32 len, int pos)
  {
  	int i, k = 0;
  	const int nfrags = skb_shinfo(skb)->nr_frags;
  
  	skb_shinfo(skb)->nr_frags = 0;
  	skb1->len		  = skb1->data_len = skb->len - len;
  	skb->len		  = len;
  	skb->data_len		  = len - pos;
  
  	for (i = 0; i < nfrags; i++) {
9e903e085   Eric Dumazet   net: add skb frag...
2907
  		int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
  
  		if (pos + size > len) {
  			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
  
  			if (pos < len) {
  				/* Split frag.
  				 * We have two variants in this case:
  				 * 1. Move all the frag to the second
  				 *    part, if it is possible. F.e.
  				 *    this approach is mandatory for TUX,
  				 *    where splitting is expensive.
  				 * 2. Split is accurately. We make this.
  				 */
ea2ab6937   Ian Campbell   net: convert core...
2921
  				skb_frag_ref(skb, i);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2922
  				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
9e903e085   Eric Dumazet   net: add skb frag...
2923
2924
  				skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
  				skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
  				skb_shinfo(skb)->nr_frags++;
  			}
  			k++;
  		} else
  			skb_shinfo(skb)->nr_frags++;
  		pos += size;
  	}
  	skb_shinfo(skb1)->nr_frags = k;
  }
  
  /**
   * skb_split - Split fragmented skb to two parts at length len.
   * @skb: the buffer to split
   * @skb1: the buffer to receive the second part
   * @len: new length for skb
   */
  void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
  {
  	int pos = skb_headlen(skb);
fff88030b   Willem de Bruijn   skbuff: only inhe...
2944
2945
  	skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
  				      SKBTX_SHARED_FRAG;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
2946
  	skb_zerocopy_clone(skb1, skb, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2947
2948
2949
2950
2951
  	if (len < pos)	/* Split line is inside header. */
  		skb_split_inside_header(skb, skb1, len, pos);
  	else		/* Second chunk has no header, nothing to copy. */
  		skb_split_no_header(skb, skb1, len, pos);
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
2952
  EXPORT_SYMBOL(skb_split);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2953

9f782db3f   Ilpo Järvinen   tcp: skb_shift ca...
2954
2955
2956
2957
  /* Shifting from/to a cloned skb is a no-go.
   *
   * Caller cannot keep skb_shinfo related pointers past calling here!
   */
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
2958
2959
  static int skb_prepare_for_shift(struct sk_buff *skb)
  {
0ace28560   Ilpo Järvinen   tcp: handle shift...
2960
  	return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
2961
2962
2963
2964
2965
2966
2967
2968
2969
  }
  
  /**
   * skb_shift - Shifts paged data partially from skb to another
   * @tgt: buffer into which tail data gets added
   * @skb: buffer from which the paged data comes from
   * @shiftlen: shift up to this many bytes
   *
   * Attempts to shift up to shiftlen worth of bytes, which may be less than
20e994a05   Feng King   net: correct comm...
2970
   * the length of the skb, from skb to tgt. Returns number bytes shifted.
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
   * It's up to caller to free skb if everything was shifted.
   *
   * If @tgt runs out of frags, the whole operation is aborted.
   *
   * Skb cannot include anything else but paged data while tgt is allowed
   * to have non-paged data as well.
   *
   * TODO: full sized shift could be optimized but that would need
   * specialized skb free'er to handle frags without up-to-date nr_frags.
   */
  int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
  {
  	int from, to, merge, todo;
  	struct skb_frag_struct *fragfrom, *fragto;
  
  	BUG_ON(shiftlen > skb->len);
f8071cde7   Eric Dumazet   tcp: enhance tcp_...
2987
2988
2989
  
  	if (skb_headlen(skb))
  		return 0;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
2990
2991
  	if (skb_zcopy(tgt) || skb_zcopy(skb))
  		return 0;
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
  
  	todo = shiftlen;
  	from = 0;
  	to = skb_shinfo(tgt)->nr_frags;
  	fragfrom = &skb_shinfo(skb)->frags[from];
  
  	/* Actual merge is delayed until the point when we know we can
  	 * commit all, so that we don't have to undo partial changes
  	 */
  	if (!to ||
ea2ab6937   Ian Campbell   net: convert core...
3002
3003
  	    !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
  			      fragfrom->page_offset)) {
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3004
3005
3006
  		merge = -1;
  	} else {
  		merge = to - 1;
9e903e085   Eric Dumazet   net: add skb frag...
3007
  		todo -= skb_frag_size(fragfrom);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3008
3009
3010
3011
  		if (todo < 0) {
  			if (skb_prepare_for_shift(skb) ||
  			    skb_prepare_for_shift(tgt))
  				return 0;
9f782db3f   Ilpo Järvinen   tcp: skb_shift ca...
3012
3013
  			/* All previous frag pointers might be stale! */
  			fragfrom = &skb_shinfo(skb)->frags[from];
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3014
  			fragto = &skb_shinfo(tgt)->frags[merge];
9e903e085   Eric Dumazet   net: add skb frag...
3015
3016
  			skb_frag_size_add(fragto, shiftlen);
  			skb_frag_size_sub(fragfrom, shiftlen);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
  			fragfrom->page_offset += shiftlen;
  
  			goto onlymerged;
  		}
  
  		from++;
  	}
  
  	/* Skip full, not-fitting skb to avoid expensive operations */
  	if ((shiftlen == skb->len) &&
  	    (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
  		return 0;
  
  	if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt))
  		return 0;
  
  	while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
  		if (to == MAX_SKB_FRAGS)
  			return 0;
  
  		fragfrom = &skb_shinfo(skb)->frags[from];
  		fragto = &skb_shinfo(tgt)->frags[to];
9e903e085   Eric Dumazet   net: add skb frag...
3039
  		if (todo >= skb_frag_size(fragfrom)) {
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3040
  			*fragto = *fragfrom;
9e903e085   Eric Dumazet   net: add skb frag...
3041
  			todo -= skb_frag_size(fragfrom);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3042
3043
3044
3045
  			from++;
  			to++;
  
  		} else {
ea2ab6937   Ian Campbell   net: convert core...
3046
  			__skb_frag_ref(fragfrom);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3047
3048
  			fragto->page = fragfrom->page;
  			fragto->page_offset = fragfrom->page_offset;
9e903e085   Eric Dumazet   net: add skb frag...
3049
  			skb_frag_size_set(fragto, todo);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3050
3051
  
  			fragfrom->page_offset += todo;
9e903e085   Eric Dumazet   net: add skb frag...
3052
  			skb_frag_size_sub(fragfrom, todo);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
  			todo = 0;
  
  			to++;
  			break;
  		}
  	}
  
  	/* Ready to "commit" this state change to tgt */
  	skb_shinfo(tgt)->nr_frags = to;
  
  	if (merge >= 0) {
  		fragfrom = &skb_shinfo(skb)->frags[0];
  		fragto = &skb_shinfo(tgt)->frags[merge];
9e903e085   Eric Dumazet   net: add skb frag...
3066
  		skb_frag_size_add(fragto, skb_frag_size(fragfrom));
ea2ab6937   Ian Campbell   net: convert core...
3067
  		__skb_frag_unref(fragfrom);
832d11c5c   Ilpo Järvinen   tcp: Try to resto...
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
  	}
  
  	/* Reposition in the original skb */
  	to = 0;
  	while (from < skb_shinfo(skb)->nr_frags)
  		skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
  	skb_shinfo(skb)->nr_frags = to;
  
  	BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
  
  onlymerged:
  	/* Most likely the tgt won't ever need its checksum anymore, skb on
  	 * the other hand might need it if it needs to be resent
  	 */
  	tgt->ip_summed = CHECKSUM_PARTIAL;
  	skb->ip_summed = CHECKSUM_PARTIAL;
  
  	/* Yak, is it really working this way? Some helper please? */
  	skb->len -= shiftlen;
  	skb->data_len -= shiftlen;
  	skb->truesize -= shiftlen;
  	tgt->len += shiftlen;
  	tgt->data_len += shiftlen;
  	tgt->truesize += shiftlen;
  
  	return shiftlen;
  }
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
  /**
   * skb_prepare_seq_read - Prepare a sequential read of skb data
   * @skb: the buffer to read
   * @from: lower offset of data to be read
   * @to: upper offset of data to be read
   * @st: state variable
   *
   * Initializes the specified state variable. Must be called before
   * invoking skb_seq_read() for the first time.
   */
  void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
  			  unsigned int to, struct skb_seq_state *st)
  {
  	st->lower_offset = from;
  	st->upper_offset = to;
  	st->root_skb = st->cur_skb = skb;
  	st->frag_idx = st->stepped_offset = 0;
  	st->frag_data = NULL;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
3114
  EXPORT_SYMBOL(skb_prepare_seq_read);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3115
3116
3117
3118
3119
3120
3121
  
  /**
   * skb_seq_read - Sequentially read skb data
   * @consumed: number of bytes consumed by the caller so far
   * @data: destination pointer for data to be returned
   * @st: state variable
   *
bc32383cd   Mathias Krause   net: skbuff - ker...
3122
   * Reads a block of skb data at @consumed relative to the
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3123
   * lower offset specified to skb_prepare_seq_read(). Assigns
bc32383cd   Mathias Krause   net: skbuff - ker...
3124
   * the head of the data block to @data and returns the length
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3125
3126
3127
3128
   * of the block or 0 if the end of the skb data or the upper
   * offset has been reached.
   *
   * The caller is not required to consume all of the data
bc32383cd   Mathias Krause   net: skbuff - ker...
3129
   * returned, i.e. @consumed is typically set to the number
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3130
3131
3132
   * of bytes already consumed and the next call to
   * skb_seq_read() will return the remaining part of the block.
   *
25985edce   Lucas De Marchi   Fix common misspe...
3133
   * Note 1: The size of each block of data returned can be arbitrary,
e793c0f70   Masanari Iida   net: treewide: Fi...
3134
   *       this limitation is the cost for zerocopy sequential
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3135
3136
   *       reads of potentially non linear data.
   *
bc2cda1eb   Randy Dunlap   docbook: make a n...
3137
   * Note 2: Fragment lists within fragments are not implemented
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3138
3139
3140
3141
3142
3143
3144
3145
   *       at the moment, state->root_skb could be replaced with
   *       a stack for this purpose.
   */
  unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
  			  struct skb_seq_state *st)
  {
  	unsigned int block_limit, abs_offset = consumed + st->lower_offset;
  	skb_frag_t *frag;
aeb193ea6   Wedson Almeida Filho   net: Unmap fragme...
3146
3147
3148
3149
3150
  	if (unlikely(abs_offset >= st->upper_offset)) {
  		if (st->frag_data) {
  			kunmap_atomic(st->frag_data);
  			st->frag_data = NULL;
  		}
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3151
  		return 0;
aeb193ea6   Wedson Almeida Filho   net: Unmap fragme...
3152
  	}
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3153
3154
  
  next_skb:
95e3b24cf   Herbert Xu   net: Fix frag_lis...
3155
  	block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3156

995b33795   Thomas Chenault   net: fix skb_seq_...
3157
  	if (abs_offset < block_limit && !st->frag_data) {
95e3b24cf   Herbert Xu   net: Fix frag_lis...
3158
  		*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3159
3160
3161
3162
3163
3164
3165
3166
  		return block_limit - abs_offset;
  	}
  
  	if (st->frag_idx == 0 && !st->frag_data)
  		st->stepped_offset += skb_headlen(st->cur_skb);
  
  	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
  		frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
9e903e085   Eric Dumazet   net: add skb frag...
3167
  		block_limit = skb_frag_size(frag) + st->stepped_offset;
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3168
3169
3170
  
  		if (abs_offset < block_limit) {
  			if (!st->frag_data)
51c56b004   Eric Dumazet   net: remove k{un}...
3171
  				st->frag_data = kmap_atomic(skb_frag_page(frag));
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3172
3173
3174
3175
3176
3177
3178
3179
  
  			*data = (u8 *) st->frag_data + frag->page_offset +
  				(abs_offset - st->stepped_offset);
  
  			return block_limit - abs_offset;
  		}
  
  		if (st->frag_data) {
51c56b004   Eric Dumazet   net: remove k{un}...
3180
  			kunmap_atomic(st->frag_data);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3181
3182
3183
3184
  			st->frag_data = NULL;
  		}
  
  		st->frag_idx++;
9e903e085   Eric Dumazet   net: add skb frag...
3185
  		st->stepped_offset += skb_frag_size(frag);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3186
  	}
5b5a60da2   Olaf Kirch   [NET]: Make skb_s...
3187
  	if (st->frag_data) {
51c56b004   Eric Dumazet   net: remove k{un}...
3188
  		kunmap_atomic(st->frag_data);
5b5a60da2   Olaf Kirch   [NET]: Make skb_s...
3189
3190
  		st->frag_data = NULL;
  	}
21dc33015   David S. Miller   net: Rename skb_h...
3191
  	if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
71b3346d1   Shyam Iyer   net: Fix OOPS in ...
3192
  		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3193
3194
  		st->frag_idx = 0;
  		goto next_skb;
71b3346d1   Shyam Iyer   net: Fix OOPS in ...
3195
3196
  	} else if (st->cur_skb->next) {
  		st->cur_skb = st->cur_skb->next;
95e3b24cf   Herbert Xu   net: Fix frag_lis...
3197
  		st->frag_idx = 0;
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3198
3199
3200
3201
3202
  		goto next_skb;
  	}
  
  	return 0;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
3203
  EXPORT_SYMBOL(skb_seq_read);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
  
  /**
   * skb_abort_seq_read - Abort a sequential read of skb data
   * @st: state variable
   *
   * Must be called if skb_seq_read() was not called until it
   * returned 0.
   */
  void skb_abort_seq_read(struct skb_seq_state *st)
  {
  	if (st->frag_data)
51c56b004   Eric Dumazet   net: remove k{un}...
3215
  		kunmap_atomic(st->frag_data);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3216
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
3217
  EXPORT_SYMBOL(skb_abort_seq_read);
677e90eda   Thomas Graf   [NET]: Zerocopy s...
3218

3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
  #define TS_SKB_CB(state)	((struct skb_seq_state *) &((state)->cb))
  
  static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
  					  struct ts_config *conf,
  					  struct ts_state *state)
  {
  	return skb_seq_read(offset, text, TS_SKB_CB(state));
  }
  
  static void skb_ts_finish(struct ts_config *conf, struct ts_state *state)
  {
  	skb_abort_seq_read(TS_SKB_CB(state));
  }
  
  /**
   * skb_find_text - Find a text pattern in skb data
   * @skb: the buffer to look in
   * @from: search offset
   * @to: search limit
   * @config: textsearch configuration
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3239
3240
3241
3242
3243
3244
3245
   *
   * Finds a pattern in the skb data according to the specified
   * textsearch configuration. Use textsearch_next() to retrieve
   * subsequent occurrences of the pattern. Returns the offset
   * to the first occurrence or UINT_MAX if no match was found.
   */
  unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
059a2440f   Bojan Prtvar   net: Remove state...
3246
  			   unsigned int to, struct ts_config *config)
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3247
  {
059a2440f   Bojan Prtvar   net: Remove state...
3248
  	struct ts_state state;
f72b948dc   Phil Oester   [NET]: skb_find_t...
3249
  	unsigned int ret;
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3250
3251
  	config->get_next_block = skb_ts_get_next_block;
  	config->finish = skb_ts_finish;
059a2440f   Bojan Prtvar   net: Remove state...
3252
  	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3253

059a2440f   Bojan Prtvar   net: Remove state...
3254
  	ret = textsearch_find(config, &state);
f72b948dc   Phil Oester   [NET]: skb_find_t...
3255
  	return (ret <= to - from ? ret : UINT_MAX);
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3256
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
3257
  EXPORT_SYMBOL(skb_find_text);
3fc7e8a6d   Thomas Graf   [NET]: skb_find_t...
3258

e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3259
  /**
2c53040f0   Ben Hutchings   net: Fix (nearly-...
3260
   * skb_append_datato_frags - append the user data to a skb
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3261
   * @sk: sock  structure
e793c0f70   Masanari Iida   net: treewide: Fi...
3262
   * @skb: skb structure to be appended with user data.
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3263
3264
3265
3266
3267
3268
3269
3270
   * @getfrag: call back function to be used for getting the user data
   * @from: pointer to user message iov
   * @length: length of the iov message
   *
   * Description: This procedure append the user data in the fragment part
   * of the skb if any page alloc fails user this procedure returns  -ENOMEM
   */
  int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
dab9630fb   Martin Waitz   [NET]: make funct...
3271
  			int (*getfrag)(void *from, char *to, int offset,
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3272
3273
3274
  					int len, int odd, struct sk_buff *skb),
  			void *from, int length)
  {
b2111724a   Eric Dumazet   net: use per task...
3275
3276
  	int frg_cnt = skb_shinfo(skb)->nr_frags;
  	int copy;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3277
3278
  	int offset = 0;
  	int ret;
b2111724a   Eric Dumazet   net: use per task...
3279
  	struct page_frag *pfrag = &current->task_frag;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3280
3281
3282
  
  	do {
  		/* Return error if we don't have space for new frag */
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3283
  		if (frg_cnt >= MAX_SKB_FRAGS)
b2111724a   Eric Dumazet   net: use per task...
3284
  			return -EMSGSIZE;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3285

b2111724a   Eric Dumazet   net: use per task...
3286
  		if (!sk_page_frag_refill(sk, pfrag))
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3287
  			return -ENOMEM;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3288
  		/* copy the user data to page */
b2111724a   Eric Dumazet   net: use per task...
3289
  		copy = min_t(int, length, pfrag->size - pfrag->offset);
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3290

b2111724a   Eric Dumazet   net: use per task...
3291
3292
  		ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
  			      offset, copy, 0, skb);
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3293
3294
3295
3296
  		if (ret < 0)
  			return -EFAULT;
  
  		/* copy was successful so update the size parameters */
b2111724a   Eric Dumazet   net: use per task...
3297
3298
3299
3300
3301
3302
3303
  		skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
  				   copy);
  		frg_cnt++;
  		pfrag->offset += copy;
  		get_page(pfrag->page);
  
  		skb->truesize += copy;
14afee4b6   Reshetova, Elena   net: convert sock...
3304
  		refcount_add(copy, &sk->sk_wmem_alloc);
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3305
3306
3307
3308
3309
3310
3311
3312
3313
  		skb->len += copy;
  		skb->data_len += copy;
  		offset += copy;
  		length -= copy;
  
  	} while (length > 0);
  
  	return 0;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
3314
  EXPORT_SYMBOL(skb_append_datato_frags);
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
3315

be12a1fe2   Hannes Frederic Sowa   net: skbuff: add ...
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
  int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
  			 int offset, size_t size)
  {
  	int i = skb_shinfo(skb)->nr_frags;
  
  	if (skb_can_coalesce(skb, i, page, offset)) {
  		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
  	} else if (i < MAX_SKB_FRAGS) {
  		get_page(page);
  		skb_fill_page_desc(skb, i, page, offset, size);
  	} else {
  		return -EMSGSIZE;
  	}
  
  	return 0;
  }
  EXPORT_SYMBOL_GPL(skb_append_pagefrags);
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3333
3334
3335
  /**
   *	skb_pull_rcsum - pull skb and update receive checksum
   *	@skb: buffer to update
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3336
3337
3338
   *	@len: length of data pulled
   *
   *	This function performs an skb_pull on the packet and updates
fee54fa51   Urs Thuermann   [NET]: Fix commen...
3339
   *	the CHECKSUM_COMPLETE checksum.  It should be used on
84fa7933a   Patrick McHardy   [NET]: Replace CH...
3340
3341
3342
   *	receive path processing instead of skb_pull unless you know
   *	that the checksum difference is zero (e.g., a valid IP header)
   *	or you are setting ip_summed to CHECKSUM_NONE.
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3343
   */
af72868b9   Johannes Berg   networking: make ...
3344
  void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3345
  {
31b33dfb0   Pravin B Shelar   skbuff: Fix skb c...
3346
  	unsigned char *data = skb->data;
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3347
  	BUG_ON(len > skb->len);
31b33dfb0   Pravin B Shelar   skbuff: Fix skb c...
3348
3349
3350
  	__skb_pull(skb, len);
  	skb_postpull_rcsum(skb, data, len);
  	return skb->data;
cbb042f9e   Herbert Xu   [NET]: Replace sk...
3351
  }
f94691acf   Arnaldo Carvalho de Melo   [SK_BUFF]: export...
3352
  EXPORT_SYMBOL_GPL(skb_pull_rcsum);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3353
3354
  /**
   *	skb_segment - Perform protocol segmentation on skb.
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3355
   *	@head_skb: buffer to segment
576a30eb6   Herbert Xu   [NET]: Added GSO ...
3356
   *	@features: features for the output path (see dev->features)
f4c50d990   Herbert Xu   [NET]: Add softwa...
3357
3358
   *
   *	This function performs segmentation on the given skb.  It returns
4c821d753   Ben Hutchings   [NET]: Fix kernel...
3359
3360
   *	a pointer to the first in a list of new skbs for the segments.
   *	In case of error it returns ERR_PTR(err).
f4c50d990   Herbert Xu   [NET]: Add softwa...
3361
   */
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3362
3363
  struct sk_buff *skb_segment(struct sk_buff *head_skb,
  			    netdev_features_t features)
f4c50d990   Herbert Xu   [NET]: Add softwa...
3364
3365
3366
  {
  	struct sk_buff *segs = NULL;
  	struct sk_buff *tail = NULL;
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3367
  	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3368
3369
3370
  	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
  	unsigned int mss = skb_shinfo(head_skb)->gso_size;
  	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
1fd819ecb   Michael S. Tsirkin   skbuff: skb_segme...
3371
  	struct sk_buff *frag_skb = head_skb;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3372
  	unsigned int offset = doffset;
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3373
  	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
802ab55ad   Alexander Duyck   GSO: Support part...
3374
  	unsigned int partial_segs = 0;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3375
  	unsigned int headroom;
802ab55ad   Alexander Duyck   GSO: Support part...
3376
  	unsigned int len = head_skb->len;
ec5f06156   Pravin B Shelar   net: Kill link be...
3377
  	__be16 proto;
36c983824   Alexander Duyck   gso: Only allow G...
3378
  	bool csum, sg;
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3379
  	int nfrags = skb_shinfo(head_skb)->nr_frags;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3380
3381
3382
  	int err = -ENOMEM;
  	int i = 0;
  	int pos;
53d6471ce   Vlad Yasevich   net: Account for ...
3383
  	int dummy;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3384

5882a07c7   Wei-Chun Chao   net: fix UDP tunn...
3385
  	__skb_push(head_skb, doffset);
53d6471ce   Vlad Yasevich   net: Account for ...
3386
  	proto = skb_network_protocol(head_skb, &dummy);
ec5f06156   Pravin B Shelar   net: Kill link be...
3387
3388
  	if (unlikely(!proto))
  		return ERR_PTR(-EINVAL);
36c983824   Alexander Duyck   gso: Only allow G...
3389
  	sg = !!(features & NETIF_F_SG);
f245d079c   Alexander Duyck   net: Allow tunnel...
3390
  	csum = !!can_checksum_protocol(features, proto);
7e2b10c1e   Tom Herbert   net: Support for ...
3391

07b26c945   Steffen Klassert   gso: Support part...
3392
3393
3394
  	if (sg && csum && (mss != GSO_BY_FRAGS))  {
  		if (!(features & NETIF_F_GSO_PARTIAL)) {
  			struct sk_buff *iter;
43170c4e0   Ilan Tayari   gso: Validate ass...
3395
  			unsigned int frag_len;
07b26c945   Steffen Klassert   gso: Support part...
3396
3397
3398
3399
  
  			if (!list_skb ||
  			    !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
  				goto normal;
43170c4e0   Ilan Tayari   gso: Validate ass...
3400
3401
3402
3403
3404
3405
3406
  			/* If we get here then all the required
  			 * GSO features except frag_list are supported.
  			 * Try to split the SKB to multiple GSO SKBs
  			 * with no frag_list.
  			 * Currently we can do that only when the buffers don't
  			 * have a linear part and all the buffers except
  			 * the last are of the same length.
07b26c945   Steffen Klassert   gso: Support part...
3407
  			 */
43170c4e0   Ilan Tayari   gso: Validate ass...
3408
  			frag_len = list_skb->len;
07b26c945   Steffen Klassert   gso: Support part...
3409
  			skb_walk_frags(head_skb, iter) {
43170c4e0   Ilan Tayari   gso: Validate ass...
3410
3411
  				if (frag_len != iter->len && iter->next)
  					goto normal;
eaffadbbb   Ilan Tayari   gso: Support frag...
3412
  				if (skb_headlen(iter) && !iter->head_frag)
07b26c945   Steffen Klassert   gso: Support part...
3413
3414
3415
3416
  					goto normal;
  
  				len -= iter->len;
  			}
43170c4e0   Ilan Tayari   gso: Validate ass...
3417
3418
3419
  
  			if (len != frag_len)
  				goto normal;
07b26c945   Steffen Klassert   gso: Support part...
3420
3421
3422
3423
3424
3425
  		}
  
  		/* GSO partial only requires that we trim off any excess that
  		 * doesn't fit into an MSS sized block, so take care of that
  		 * now.
  		 */
802ab55ad   Alexander Duyck   GSO: Support part...
3426
  		partial_segs = len / mss;
d7fb5a804   Alexander Duyck   gso: Do not perfo...
3427
3428
3429
3430
  		if (partial_segs > 1)
  			mss *= partial_segs;
  		else
  			partial_segs = 0;
802ab55ad   Alexander Duyck   GSO: Support part...
3431
  	}
07b26c945   Steffen Klassert   gso: Support part...
3432
  normal:
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3433
3434
  	headroom = skb_headroom(head_skb);
  	pos = skb_headlen(head_skb);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3435
3436
3437
  
  	do {
  		struct sk_buff *nskb;
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3438
  		skb_frag_t *nskb_frag;
c8884edd0   Herbert Xu   [NET]: Fix segmen...
3439
  		int hsize;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3440
  		int size;
3953c46c3   Marcelo Ricardo Leitner   sk_buff: allow se...
3441
3442
3443
3444
3445
3446
3447
  		if (unlikely(mss == GSO_BY_FRAGS)) {
  			len = list_skb->len;
  		} else {
  			len = head_skb->len - offset;
  			if (len > mss)
  				len = mss;
  		}
f4c50d990   Herbert Xu   [NET]: Add softwa...
3448

df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3449
  		hsize = skb_headlen(head_skb) - offset;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3450
3451
  		if (hsize < 0)
  			hsize = 0;
c8884edd0   Herbert Xu   [NET]: Fix segmen...
3452
3453
  		if (hsize > len || !sg)
  			hsize = len;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3454

1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3455
3456
3457
  		if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
  		    (skb_headlen(list_skb) == len || sg)) {
  			BUG_ON(skb_headlen(list_skb) > len);
9d8506cc2   Herbert Xu   gso: handle new f...
3458
3459
  
  			i = 0;
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3460
3461
  			nfrags = skb_shinfo(list_skb)->nr_frags;
  			frag = skb_shinfo(list_skb)->frags;
1fd819ecb   Michael S. Tsirkin   skbuff: skb_segme...
3462
  			frag_skb = list_skb;
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3463
  			pos += skb_headlen(list_skb);
9d8506cc2   Herbert Xu   gso: handle new f...
3464
3465
3466
  
  			while (pos < offset + len) {
  				BUG_ON(i >= nfrags);
4e1beba12   Michael S. Tsirkin   skbuff: skb_segme...
3467
  				size = skb_frag_size(frag);
9d8506cc2   Herbert Xu   gso: handle new f...
3468
3469
3470
3471
3472
  				if (pos + size > offset + len)
  					break;
  
  				i++;
  				pos += size;
4e1beba12   Michael S. Tsirkin   skbuff: skb_segme...
3473
  				frag++;
9d8506cc2   Herbert Xu   gso: handle new f...
3474
  			}
89319d380   Herbert Xu   net: Add frag_lis...
3475

1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3476
3477
  			nskb = skb_clone(list_skb, GFP_ATOMIC);
  			list_skb = list_skb->next;
89319d380   Herbert Xu   net: Add frag_lis...
3478
3479
3480
  
  			if (unlikely(!nskb))
  				goto err;
9d8506cc2   Herbert Xu   gso: handle new f...
3481
3482
3483
3484
  			if (unlikely(pskb_trim(nskb, len))) {
  				kfree_skb(nskb);
  				goto err;
  			}
ec47ea824   Alexander Duyck   skb: Add inline h...
3485
  			hsize = skb_end_offset(nskb);
89319d380   Herbert Xu   net: Add frag_lis...
3486
3487
3488
3489
  			if (skb_cow_head(nskb, doffset + headroom)) {
  				kfree_skb(nskb);
  				goto err;
  			}
ec47ea824   Alexander Duyck   skb: Add inline h...
3490
  			nskb->truesize += skb_end_offset(nskb) - hsize;
89319d380   Herbert Xu   net: Add frag_lis...
3491
3492
3493
  			skb_release_head_state(nskb);
  			__skb_push(nskb, doffset);
  		} else {
c93bdd0e0   Mel Gorman   netvm: allow skb ...
3494
  			nskb = __alloc_skb(hsize + doffset + headroom,
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3495
  					   GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
c93bdd0e0   Mel Gorman   netvm: allow skb ...
3496
  					   NUMA_NO_NODE);
89319d380   Herbert Xu   net: Add frag_lis...
3497
3498
3499
3500
3501
3502
3503
  
  			if (unlikely(!nskb))
  				goto err;
  
  			skb_reserve(nskb, headroom);
  			__skb_put(nskb, doffset);
  		}
f4c50d990   Herbert Xu   [NET]: Add softwa...
3504
3505
3506
3507
3508
3509
  
  		if (segs)
  			tail->next = nskb;
  		else
  			segs = nskb;
  		tail = nskb;
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3510
  		__copy_skb_header(nskb, head_skb);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3511

030737bcc   Eric Dumazet   net: generalize s...
3512
  		skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
fcdfe3a7f   Vlad Yasevich   net: Correctly se...
3513
  		skb_reset_mac_len(nskb);
68c331631   Pravin B Shelar   v4 GRE: Add TCP s...
3514

df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3515
  		skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
68c331631   Pravin B Shelar   v4 GRE: Add TCP s...
3516
3517
  						 nskb->data - tnl_hlen,
  						 doffset + tnl_hlen);
89319d380   Herbert Xu   net: Add frag_lis...
3518

9d8506cc2   Herbert Xu   gso: handle new f...
3519
  		if (nskb->len == len + doffset)
1cdbcb795   Simon Horman   net: Loosen const...
3520
  			goto perform_csum_check;
89319d380   Herbert Xu   net: Add frag_lis...
3521

7fbeffed7   Alexander Duyck   net: Update remot...
3522
3523
3524
  		if (!sg) {
  			if (!nskb->remcsum_offload)
  				nskb->ip_summed = CHECKSUM_NONE;
764434562   Alexander Duyck   net: Move GSO csu...
3525
3526
3527
3528
  			SKB_GSO_CB(nskb)->csum =
  				skb_copy_and_csum_bits(head_skb, offset,
  						       skb_put(nskb, len),
  						       len, 0);
7e2b10c1e   Tom Herbert   net: Support for ...
3529
  			SKB_GSO_CB(nskb)->csum_start =
764434562   Alexander Duyck   net: Move GSO csu...
3530
  				skb_headroom(nskb) + doffset;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3531
3532
  			continue;
  		}
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3533
  		nskb_frag = skb_shinfo(nskb)->frags;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3534

df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3535
  		skb_copy_from_linear_data_offset(head_skb, offset,
d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
3536
  						 skb_put(nskb, hsize), hsize);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3537

fff88030b   Willem de Bruijn   skbuff: only inhe...
3538
3539
  		skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
  					      SKBTX_SHARED_FRAG;
cef401de7   Eric Dumazet   net: fix possible...
3540

9d8506cc2   Herbert Xu   gso: handle new f...
3541
3542
  		while (pos < offset + len) {
  			if (i >= nfrags) {
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3543
  				BUG_ON(skb_headlen(list_skb));
9d8506cc2   Herbert Xu   gso: handle new f...
3544
3545
  
  				i = 0;
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3546
3547
  				nfrags = skb_shinfo(list_skb)->nr_frags;
  				frag = skb_shinfo(list_skb)->frags;
1fd819ecb   Michael S. Tsirkin   skbuff: skb_segme...
3548
  				frag_skb = list_skb;
9d8506cc2   Herbert Xu   gso: handle new f...
3549
3550
  
  				BUG_ON(!nfrags);
1a4cedaf6   Michael S. Tsirkin   skbuff: skb_segme...
3551
  				list_skb = list_skb->next;
9d8506cc2   Herbert Xu   gso: handle new f...
3552
3553
3554
3555
3556
3557
3558
3559
  			}
  
  			if (unlikely(skb_shinfo(nskb)->nr_frags >=
  				     MAX_SKB_FRAGS)) {
  				net_warn_ratelimited(
  					"skb_segment: too many frags: %u %u
  ",
  					pos, mss);
f208fbad9   Eric Dumazet   net: skb_segment(...
3560
  				err = -EINVAL;
9d8506cc2   Herbert Xu   gso: handle new f...
3561
3562
  				goto err;
  			}
1fd819ecb   Michael S. Tsirkin   skbuff: skb_segme...
3563
3564
  			if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
  				goto err;
17155ea82   Willem de Bruijn   skbuff: orphan fr...
3565
3566
  			if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
  				goto err;
1fd819ecb   Michael S. Tsirkin   skbuff: skb_segme...
3567

4e1beba12   Michael S. Tsirkin   skbuff: skb_segme...
3568
  			*nskb_frag = *frag;
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3569
3570
  			__skb_frag_ref(nskb_frag);
  			size = skb_frag_size(nskb_frag);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3571
3572
  
  			if (pos < offset) {
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3573
3574
  				nskb_frag->page_offset += offset - pos;
  				skb_frag_size_sub(nskb_frag, offset - pos);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3575
  			}
89319d380   Herbert Xu   net: Add frag_lis...
3576
  			skb_shinfo(nskb)->nr_frags++;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3577
3578
3579
  
  			if (pos + size <= offset + len) {
  				i++;
4e1beba12   Michael S. Tsirkin   skbuff: skb_segme...
3580
  				frag++;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3581
3582
  				pos += size;
  			} else {
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3583
  				skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
89319d380   Herbert Xu   net: Add frag_lis...
3584
  				goto skip_fraglist;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3585
  			}
8cb19905e   Michael S. Tsirkin   skbuff: skb_segme...
3586
  			nskb_frag++;
f4c50d990   Herbert Xu   [NET]: Add softwa...
3587
  		}
89319d380   Herbert Xu   net: Add frag_lis...
3588
  skip_fraglist:
f4c50d990   Herbert Xu   [NET]: Add softwa...
3589
3590
3591
  		nskb->data_len = len - hsize;
  		nskb->len += nskb->data_len;
  		nskb->truesize += nskb->data_len;
ec5f06156   Pravin B Shelar   net: Kill link be...
3592

1cdbcb795   Simon Horman   net: Loosen const...
3593
  perform_csum_check:
7fbeffed7   Alexander Duyck   net: Update remot...
3594
  		if (!csum) {
f208fbad9   Eric Dumazet   net: skb_segment(...
3595
3596
3597
  			if (skb_has_shared_frag(nskb) &&
  			    __skb_linearize(nskb))
  				goto err;
7fbeffed7   Alexander Duyck   net: Update remot...
3598
3599
  			if (!nskb->remcsum_offload)
  				nskb->ip_summed = CHECKSUM_NONE;
764434562   Alexander Duyck   net: Move GSO csu...
3600
3601
3602
  			SKB_GSO_CB(nskb)->csum =
  				skb_checksum(nskb, doffset,
  					     nskb->len - doffset, 0);
7e2b10c1e   Tom Herbert   net: Support for ...
3603
  			SKB_GSO_CB(nskb)->csum_start =
764434562   Alexander Duyck   net: Move GSO csu...
3604
  				skb_headroom(nskb) + doffset;
ec5f06156   Pravin B Shelar   net: Kill link be...
3605
  		}
df5771ffe   Michael S. Tsirkin   skbuff: skb_segme...
3606
  	} while ((offset += len) < head_skb->len);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3607

bec3cfdca   Eric Dumazet   net: skb_segment(...
3608
3609
3610
3611
3612
  	/* Some callers want to get the end of the list.
  	 * Put it in segs->prev to avoid walking the list.
  	 * (see validate_xmit_skb_list() for example)
  	 */
  	segs->prev = tail;
432c856fc   Toshiaki Makita   net: skb_segment(...
3613

802ab55ad   Alexander Duyck   GSO: Support part...
3614
  	if (partial_segs) {
07b26c945   Steffen Klassert   gso: Support part...
3615
  		struct sk_buff *iter;
802ab55ad   Alexander Duyck   GSO: Support part...
3616
  		int type = skb_shinfo(head_skb)->gso_type;
07b26c945   Steffen Klassert   gso: Support part...
3617
  		unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
802ab55ad   Alexander Duyck   GSO: Support part...
3618
3619
  
  		/* Update type to add partial and then remove dodgy if set */
07b26c945   Steffen Klassert   gso: Support part...
3620
  		type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
802ab55ad   Alexander Duyck   GSO: Support part...
3621
3622
3623
3624
3625
  		type &= ~SKB_GSO_DODGY;
  
  		/* Update GSO info and prepare to start updating headers on
  		 * our way back down the stack of protocols.
  		 */
07b26c945   Steffen Klassert   gso: Support part...
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
  		for (iter = segs; iter; iter = iter->next) {
  			skb_shinfo(iter)->gso_size = gso_size;
  			skb_shinfo(iter)->gso_segs = partial_segs;
  			skb_shinfo(iter)->gso_type = type;
  			SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
  		}
  
  		if (tail->len - doffset <= gso_size)
  			skb_shinfo(tail)->gso_size = 0;
  		else if (tail != segs)
  			skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
802ab55ad   Alexander Duyck   GSO: Support part...
3637
  	}
432c856fc   Toshiaki Makita   net: skb_segment(...
3638
3639
3640
3641
3642
3643
3644
3645
3646
  	/* Following permits correct backpressure, for protocols
  	 * using skb_set_owner_w().
  	 * Idea is to tranfert ownership from head_skb to last segment.
  	 */
  	if (head_skb->destructor == sock_wfree) {
  		swap(tail->truesize, head_skb->truesize);
  		swap(tail->destructor, head_skb->destructor);
  		swap(tail->sk, head_skb->sk);
  	}
f4c50d990   Herbert Xu   [NET]: Add softwa...
3647
3648
3649
  	return segs;
  
  err:
289dccbe1   Eric Dumazet   net: use kfree_sk...
3650
  	kfree_skb_list(segs);
f4c50d990   Herbert Xu   [NET]: Add softwa...
3651
3652
  	return ERR_PTR(err);
  }
f4c50d990   Herbert Xu   [NET]: Add softwa...
3653
  EXPORT_SYMBOL_GPL(skb_segment);
71d93b39e   Herbert Xu   net: Add skb_gro_...
3654
3655
  int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
  {
8a29111c7   Eric Dumazet   net: gro: allow t...
3656
  	struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
67147ba99   Herbert Xu   gro: Localise off...
3657
3658
  	unsigned int offset = skb_gro_offset(skb);
  	unsigned int headlen = skb_headlen(skb);
8a29111c7   Eric Dumazet   net: gro: allow t...
3659
  	unsigned int len = skb_gro_len(skb);
58025e46e   Eric Dumazet   net: gro: remove ...
3660
  	struct sk_buff *lp, *p = *head;
715dc1f34   Eric Dumazet   net: Fix truesize...
3661
  	unsigned int delta_truesize;
71d93b39e   Herbert Xu   net: Add skb_gro_...
3662

8a29111c7   Eric Dumazet   net: gro: allow t...
3663
  	if (unlikely(p->len + len >= 65536))
71d93b39e   Herbert Xu   net: Add skb_gro_...
3664
  		return -E2BIG;
29e982427   Eric Dumazet   net: gro: make su...
3665
  	lp = NAPI_GRO_CB(p)->last;
8a29111c7   Eric Dumazet   net: gro: allow t...
3666
3667
3668
  	pinfo = skb_shinfo(lp);
  
  	if (headlen <= offset) {
42da6994c   Herbert Xu   gro: Open-code fr...
3669
  		skb_frag_t *frag;
66e92fcf1   Herbert Xu   gro: Nasty optimi...
3670
  		skb_frag_t *frag2;
9aaa156cf   Herbert Xu   gro: Store shinfo...
3671
3672
  		int i = skbinfo->nr_frags;
  		int nr_frags = pinfo->nr_frags + i;
66e92fcf1   Herbert Xu   gro: Nasty optimi...
3673

66e92fcf1   Herbert Xu   gro: Nasty optimi...
3674
  		if (nr_frags > MAX_SKB_FRAGS)
8a29111c7   Eric Dumazet   net: gro: allow t...
3675
  			goto merge;
81705ad1b   Herbert Xu   gro: Do not merge...
3676

8a29111c7   Eric Dumazet   net: gro: allow t...
3677
  		offset -= headlen;
9aaa156cf   Herbert Xu   gro: Store shinfo...
3678
3679
  		pinfo->nr_frags = nr_frags;
  		skbinfo->nr_frags = 0;
86911732d   Herbert Xu   gro: Avoid copyin...
3680

9aaa156cf   Herbert Xu   gro: Store shinfo...
3681
3682
  		frag = pinfo->frags + nr_frags;
  		frag2 = skbinfo->frags + i;
66e92fcf1   Herbert Xu   gro: Nasty optimi...
3683
3684
3685
  		do {
  			*--frag = *--frag2;
  		} while (--i);
5d38a079c   Herbert Xu   gro: Add page fra...
3686

66e92fcf1   Herbert Xu   gro: Nasty optimi...
3687
  		frag->page_offset += offset;
9e903e085   Eric Dumazet   net: add skb frag...
3688
  		skb_frag_size_sub(frag, offset);
f55720680   Herbert Xu   gro: Fix page ref...
3689

715dc1f34   Eric Dumazet   net: Fix truesize...
3690
  		/* all fragments truesize : remove (head size + sk_buff) */
ec47ea824   Alexander Duyck   skb: Add inline h...
3691
3692
  		delta_truesize = skb->truesize -
  				 SKB_TRUESIZE(skb_end_offset(skb));
715dc1f34   Eric Dumazet   net: Fix truesize...
3693

f55720680   Herbert Xu   gro: Fix page ref...
3694
3695
3696
  		skb->truesize -= skb->data_len;
  		skb->len -= skb->data_len;
  		skb->data_len = 0;
715dc1f34   Eric Dumazet   net: Fix truesize...
3697
  		NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE;
5d38a079c   Herbert Xu   gro: Add page fra...
3698
  		goto done;
d7e8883cf   Eric Dumazet   net: make GRO awa...
3699
3700
3701
3702
3703
3704
3705
3706
  	} else if (skb->head_frag) {
  		int nr_frags = pinfo->nr_frags;
  		skb_frag_t *frag = pinfo->frags + nr_frags;
  		struct page *page = virt_to_head_page(skb->head);
  		unsigned int first_size = headlen - offset;
  		unsigned int first_offset;
  
  		if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
8a29111c7   Eric Dumazet   net: gro: allow t...
3707
  			goto merge;
d7e8883cf   Eric Dumazet   net: make GRO awa...
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
  
  		first_offset = skb->data -
  			       (unsigned char *)page_address(page) +
  			       offset;
  
  		pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
  
  		frag->page.p	  = page;
  		frag->page_offset = first_offset;
  		skb_frag_size_set(frag, first_size);
  
  		memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
  		/* We dont need to clear skbinfo->nr_frags here */
715dc1f34   Eric Dumazet   net: Fix truesize...
3721
  		delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
d7e8883cf   Eric Dumazet   net: make GRO awa...
3722
3723
  		NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
  		goto done;
8a29111c7   Eric Dumazet   net: gro: allow t...
3724
  	}
71d93b39e   Herbert Xu   net: Add skb_gro_...
3725
3726
  
  merge:
715dc1f34   Eric Dumazet   net: Fix truesize...
3727
  	delta_truesize = skb->truesize;
67147ba99   Herbert Xu   gro: Localise off...
3728
  	if (offset > headlen) {
d1dc7abf2   Michal Schmidt   GRO: fix merging ...
3729
3730
3731
  		unsigned int eat = offset - headlen;
  
  		skbinfo->frags[0].page_offset += eat;
9e903e085   Eric Dumazet   net: add skb frag...
3732
  		skb_frag_size_sub(&skbinfo->frags[0], eat);
d1dc7abf2   Michal Schmidt   GRO: fix merging ...
3733
3734
  		skb->data_len -= eat;
  		skb->len -= eat;
67147ba99   Herbert Xu   gro: Localise off...
3735
  		offset = headlen;
56035022d   Herbert Xu   gro: Fix frag_lis...
3736
  	}
67147ba99   Herbert Xu   gro: Localise off...
3737
  	__skb_pull(skb, offset);
56035022d   Herbert Xu   gro: Fix frag_lis...
3738

29e982427   Eric Dumazet   net: gro: make su...
3739
  	if (NAPI_GRO_CB(p)->last == p)
8a29111c7   Eric Dumazet   net: gro: allow t...
3740
3741
3742
  		skb_shinfo(p)->frag_list = skb;
  	else
  		NAPI_GRO_CB(p)->last->next = skb;
c3c7c254b   Eric Dumazet   net: gro: fix pos...
3743
  	NAPI_GRO_CB(p)->last = skb;
f4a775d14   Eric Dumazet   net: introduce __...
3744
  	__skb_header_release(skb);
8a29111c7   Eric Dumazet   net: gro: allow t...
3745
  	lp = p;
71d93b39e   Herbert Xu   net: Add skb_gro_...
3746

5d38a079c   Herbert Xu   gro: Add page fra...
3747
3748
  done:
  	NAPI_GRO_CB(p)->count++;
37fe4732b   Herbert Xu   gro: Fix merging ...
3749
  	p->data_len += len;
715dc1f34   Eric Dumazet   net: Fix truesize...
3750
  	p->truesize += delta_truesize;
37fe4732b   Herbert Xu   gro: Fix merging ...
3751
  	p->len += len;
8a29111c7   Eric Dumazet   net: gro: allow t...
3752
3753
3754
3755
3756
  	if (lp != p) {
  		lp->data_len += len;
  		lp->truesize += delta_truesize;
  		lp->len += len;
  	}
71d93b39e   Herbert Xu   net: Add skb_gro_...
3757
3758
3759
  	NAPI_GRO_CB(skb)->same_flow = 1;
  	return 0;
  }
57c056503   Marcelo Ricardo Leitner   skbuff: export sk...
3760
  EXPORT_SYMBOL_GPL(skb_gro_receive);
71d93b39e   Herbert Xu   net: Add skb_gro_...
3761

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3762
3763
3764
3765
3766
  void __init skb_init(void)
  {
  	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
  					      sizeof(struct sk_buff),
  					      0,
e5d679f33   Alexey Dobriyan   [NET]: Use SLAB_P...
3767
  					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83d   Paul Mundt   mm: Remove slab d...
3768
  					      NULL);
d179cd129   David S. Miller   [NET]: Implement ...
3769
  	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
d0bf4a9e9   Eric Dumazet   net: cleanup and ...
3770
  						sizeof(struct sk_buff_fclones),
d179cd129   David S. Miller   [NET]: Implement ...
3771
  						0,
e5d679f33   Alexey Dobriyan   [NET]: Use SLAB_P...
3772
  						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
20c2df83d   Paul Mundt   mm: Remove slab d...
3773
  						NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3774
  }
51c739d1f   David S. Miller   [NET]: Fix incorr...
3775
  static int
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3776
3777
  __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
  	       unsigned int recursion_level)
716ea3a7a   David Howells   [NET]: Move gener...
3778
  {
1a028e507   David S. Miller   [NET]: Revert sk_...
3779
3780
  	int start = skb_headlen(skb);
  	int i, copy = start - offset;
fbb398a83   David S. Miller   net/core/skbuff.c...
3781
  	struct sk_buff *frag_iter;
716ea3a7a   David Howells   [NET]: Move gener...
3782
  	int elt = 0;
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3783
3784
  	if (unlikely(recursion_level >= 24))
  		return -EMSGSIZE;
716ea3a7a   David Howells   [NET]: Move gener...
3785
3786
3787
  	if (copy > 0) {
  		if (copy > len)
  			copy = len;
642f14903   Jens Axboe   SG: Change sg_set...
3788
  		sg_set_buf(sg, skb->data + offset, copy);
716ea3a7a   David Howells   [NET]: Move gener...
3789
3790
3791
3792
3793
3794
3795
  		elt++;
  		if ((len -= copy) == 0)
  			return elt;
  		offset += copy;
  	}
  
  	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1a028e507   David S. Miller   [NET]: Revert sk_...
3796
  		int end;
716ea3a7a   David Howells   [NET]: Move gener...
3797

547b792ca   Ilpo Järvinen   net: convert BUG_...
3798
  		WARN_ON(start > offset + len);
1a028e507   David S. Miller   [NET]: Revert sk_...
3799

9e903e085   Eric Dumazet   net: add skb frag...
3800
  		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
716ea3a7a   David Howells   [NET]: Move gener...
3801
3802
  		if ((copy = end - offset) > 0) {
  			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3803
3804
  			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
  				return -EMSGSIZE;
716ea3a7a   David Howells   [NET]: Move gener...
3805
3806
3807
  
  			if (copy > len)
  				copy = len;
ea2ab6937   Ian Campbell   net: convert core...
3808
  			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
642f14903   Jens Axboe   SG: Change sg_set...
3809
  					frag->page_offset+offset-start);
716ea3a7a   David Howells   [NET]: Move gener...
3810
3811
3812
3813
3814
  			elt++;
  			if (!(len -= copy))
  				return elt;
  			offset += copy;
  		}
1a028e507   David S. Miller   [NET]: Revert sk_...
3815
  		start = end;
716ea3a7a   David Howells   [NET]: Move gener...
3816
  	}
fbb398a83   David S. Miller   net/core/skbuff.c...
3817
  	skb_walk_frags(skb, frag_iter) {
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3818
  		int end, ret;
1a028e507   David S. Miller   [NET]: Revert sk_...
3819

fbb398a83   David S. Miller   net/core/skbuff.c...
3820
  		WARN_ON(start > offset + len);
716ea3a7a   David Howells   [NET]: Move gener...
3821

fbb398a83   David S. Miller   net/core/skbuff.c...
3822
3823
  		end = start + frag_iter->len;
  		if ((copy = end - offset) > 0) {
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3824
3825
  			if (unlikely(elt && sg_is_last(&sg[elt - 1])))
  				return -EMSGSIZE;
fbb398a83   David S. Miller   net/core/skbuff.c...
3826
3827
  			if (copy > len)
  				copy = len;
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3828
3829
3830
3831
3832
  			ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start,
  					      copy, recursion_level + 1);
  			if (unlikely(ret < 0))
  				return ret;
  			elt += ret;
fbb398a83   David S. Miller   net/core/skbuff.c...
3833
3834
3835
  			if ((len -= copy) == 0)
  				return elt;
  			offset += copy;
716ea3a7a   David Howells   [NET]: Move gener...
3836
  		}
fbb398a83   David S. Miller   net/core/skbuff.c...
3837
  		start = end;
716ea3a7a   David Howells   [NET]: Move gener...
3838
3839
3840
3841
  	}
  	BUG_ON(len);
  	return elt;
  }
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
  /**
   *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
   *	@skb: Socket buffer containing the buffers to be mapped
   *	@sg: The scatter-gather list to map into
   *	@offset: The offset into the buffer's contents to start mapping
   *	@len: Length of buffer space to be mapped
   *
   *	Fill the specified scatter-gather list with mappings/pointers into a
   *	region of the buffer space attached to a socket buffer. Returns either
   *	the number of scatterlist items used, or -EMSGSIZE if the contents
   *	could not fit.
   */
  int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
  {
  	int nsg = __skb_to_sgvec(skb, sg, offset, len, 0);
  
  	if (nsg <= 0)
  		return nsg;
  
  	sg_mark_end(&sg[nsg - 1]);
  
  	return nsg;
  }
  EXPORT_SYMBOL_GPL(skb_to_sgvec);
25a91d8d9   Fan Du   skbuff: Introduce...
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
  /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
   * sglist without mark the sg which contain last skb data as the end.
   * So the caller can mannipulate sg list as will when padding new data after
   * the first call without calling sg_unmark_end to expend sg list.
   *
   * Scenario to use skb_to_sgvec_nomark:
   * 1. sg_init_table
   * 2. skb_to_sgvec_nomark(payload1)
   * 3. skb_to_sgvec_nomark(payload2)
   *
   * This is equivalent to:
   * 1. sg_init_table
   * 2. skb_to_sgvec(payload1)
   * 3. sg_unmark_end
   * 4. skb_to_sgvec(payload2)
   *
   * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
   * is more preferable.
   */
  int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
  			int offset, int len)
  {
48a1df653   Jason A. Donenfeld   skbuff: return -E...
3888
  	return __skb_to_sgvec(skb, sg, offset, len, 0);
25a91d8d9   Fan Du   skbuff: Introduce...
3889
3890
  }
  EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
51c739d1f   David S. Miller   [NET]: Fix incorr...
3891

51c739d1f   David S. Miller   [NET]: Fix incorr...
3892

716ea3a7a   David Howells   [NET]: Move gener...
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
  /**
   *	skb_cow_data - Check that a socket buffer's data buffers are writable
   *	@skb: The socket buffer to check.
   *	@tailbits: Amount of trailing space to be added
   *	@trailer: Returned pointer to the skb where the @tailbits space begins
   *
   *	Make sure that the data buffers attached to a socket buffer are
   *	writable. If they are not, private copies are made of the data buffers
   *	and the socket buffer is set to use these instead.
   *
   *	If @tailbits is given, make sure that there is space to write @tailbits
   *	bytes of data beyond current end of socket buffer.  @trailer will be
   *	set to point to the skb in which this space begins.
   *
   *	The number of scatterlist elements required to completely map the
   *	COW'd and extended socket buffer will be returned.
   */
  int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
  {
  	int copyflag;
  	int elt;
  	struct sk_buff *skb1, **skb_p;
  
  	/* If skb is cloned or its head is paged, reallocate
  	 * head pulling out all the pages (pages are considered not writable
  	 * at the moment even if they are anonymous).
  	 */
  	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
  	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
  		return -ENOMEM;
  
  	/* Easy case. Most of packets will go this way. */
21dc33015   David S. Miller   net: Rename skb_h...
3925
  	if (!skb_has_frag_list(skb)) {
716ea3a7a   David Howells   [NET]: Move gener...
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
  		/* A little of trouble, not enough of space for trailer.
  		 * This should not happen, when stack is tuned to generate
  		 * good frames. OK, on miss we reallocate and reserve even more
  		 * space, 128 bytes is fair. */
  
  		if (skb_tailroom(skb) < tailbits &&
  		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
  			return -ENOMEM;
  
  		/* Voila! */
  		*trailer = skb;
  		return 1;
  	}
  
  	/* Misery. We are in troubles, going to mincer fragments... */
  
  	elt = 1;
  	skb_p = &skb_shinfo(skb)->frag_list;
  	copyflag = 0;
  
  	while ((skb1 = *skb_p) != NULL) {
  		int ntail = 0;
  
  		/* The fragment is partially pulled by someone,
  		 * this can happen on input. Copy it and everything
  		 * after it. */
  
  		if (skb_shared(skb1))
  			copyflag = 1;
  
  		/* If the skb is the last, worry about trailer. */
  
  		if (skb1->next == NULL && tailbits) {
  			if (skb_shinfo(skb1)->nr_frags ||
21dc33015   David S. Miller   net: Rename skb_h...
3960
  			    skb_has_frag_list(skb1) ||
716ea3a7a   David Howells   [NET]: Move gener...
3961
3962
3963
3964
3965
3966
3967
3968
  			    skb_tailroom(skb1) < tailbits)
  				ntail = tailbits + 128;
  		}
  
  		if (copyflag ||
  		    skb_cloned(skb1) ||
  		    ntail ||
  		    skb_shinfo(skb1)->nr_frags ||
21dc33015   David S. Miller   net: Rename skb_h...
3969
  		    skb_has_frag_list(skb1)) {
716ea3a7a   David Howells   [NET]: Move gener...
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
  			struct sk_buff *skb2;
  
  			/* Fuck, we are miserable poor guys... */
  			if (ntail == 0)
  				skb2 = skb_copy(skb1, GFP_ATOMIC);
  			else
  				skb2 = skb_copy_expand(skb1,
  						       skb_headroom(skb1),
  						       ntail,
  						       GFP_ATOMIC);
  			if (unlikely(skb2 == NULL))
  				return -ENOMEM;
  
  			if (skb1->sk)
  				skb_set_owner_w(skb2, skb1->sk);
  
  			/* Looking around. Are we still alive?
  			 * OK, link new skb, drop old one */
  
  			skb2->next = skb1->next;
  			*skb_p = skb2;
  			kfree_skb(skb1);
  			skb1 = skb2;
  		}
  		elt++;
  		*trailer = skb1;
  		skb_p = &skb1->next;
  	}
  
  	return elt;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
4001
  EXPORT_SYMBOL_GPL(skb_cow_data);
716ea3a7a   David Howells   [NET]: Move gener...
4002

b1faf5666   Eric Dumazet   net: sock_queue_e...
4003
4004
4005
4006
4007
4008
  static void sock_rmem_free(struct sk_buff *skb)
  {
  	struct sock *sk = skb->sk;
  
  	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
  }
8605330aa   Soheil Hassas Yeganeh   tcp: fix SCM_TIME...
4009
4010
4011
4012
4013
4014
4015
4016
  static void skb_set_err_queue(struct sk_buff *skb)
  {
  	/* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
  	 * So, it is safe to (mis)use it to mark skbs on the error queue.
  	 */
  	skb->pkt_type = PACKET_OUTGOING;
  	BUILD_BUG_ON(PACKET_OUTGOING == 0);
  }
b1faf5666   Eric Dumazet   net: sock_queue_e...
4017
4018
4019
4020
4021
4022
  /*
   * Note: We dont mem charge error packets (no sk_forward_alloc changes)
   */
  int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
  {
  	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
95c961747   Eric Dumazet   net: cleanup unsi...
4023
  	    (unsigned int)sk->sk_rcvbuf)
b1faf5666   Eric Dumazet   net: sock_queue_e...
4024
4025
4026
4027
4028
4029
  		return -ENOMEM;
  
  	skb_orphan(skb);
  	skb->sk = sk;
  	skb->destructor = sock_rmem_free;
  	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
8605330aa   Soheil Hassas Yeganeh   tcp: fix SCM_TIME...
4030
  	skb_set_err_queue(skb);
b1faf5666   Eric Dumazet   net: sock_queue_e...
4031

abb57ea48   Eric Dumazet   net: add skb_dst_...
4032
4033
  	/* before exiting rcu section, make sure dst is refcounted */
  	skb_dst_force(skb);
b1faf5666   Eric Dumazet   net: sock_queue_e...
4034
4035
  	skb_queue_tail(&sk->sk_error_queue, skb);
  	if (!sock_flag(sk, SOCK_DEAD))
329f4710f   Vinicius Costa Gomes   skbuff: Fix not w...
4036
  		sk->sk_error_report(sk);
b1faf5666   Eric Dumazet   net: sock_queue_e...
4037
4038
4039
  	return 0;
  }
  EXPORT_SYMBOL(sock_queue_err_skb);
83a1a1a70   Soheil Hassas Yeganeh   sock: reset sk_er...
4040
4041
4042
4043
4044
  static bool is_icmp_err_skb(const struct sk_buff *skb)
  {
  	return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
  		       SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
  }
364a9e932   Willem de Bruijn   sock: deduplicate...
4045
4046
4047
  struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
  {
  	struct sk_buff_head *q = &sk->sk_error_queue;
83a1a1a70   Soheil Hassas Yeganeh   sock: reset sk_er...
4048
4049
  	struct sk_buff *skb, *skb_next = NULL;
  	bool icmp_next = false;
997d5c3f4   Eric Dumazet   sock: sock_dequeu...
4050
  	unsigned long flags;
364a9e932   Willem de Bruijn   sock: deduplicate...
4051

997d5c3f4   Eric Dumazet   sock: sock_dequeu...
4052
  	spin_lock_irqsave(&q->lock, flags);
364a9e932   Willem de Bruijn   sock: deduplicate...
4053
  	skb = __skb_dequeue(q);
38b257938   Soheil Hassas Yeganeh   sock: reset sk_er...
4054
  	if (skb && (skb_next = skb_peek(q))) {
83a1a1a70   Soheil Hassas Yeganeh   sock: reset sk_er...
4055
  		icmp_next = is_icmp_err_skb(skb_next);
38b257938   Soheil Hassas Yeganeh   sock: reset sk_er...
4056
4057
4058
  		if (icmp_next)
  			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
  	}
997d5c3f4   Eric Dumazet   sock: sock_dequeu...
4059
  	spin_unlock_irqrestore(&q->lock, flags);
364a9e932   Willem de Bruijn   sock: deduplicate...
4060

83a1a1a70   Soheil Hassas Yeganeh   sock: reset sk_er...
4061
4062
4063
4064
  	if (is_icmp_err_skb(skb) && !icmp_next)
  		sk->sk_err = 0;
  
  	if (skb_next)
364a9e932   Willem de Bruijn   sock: deduplicate...
4065
4066
4067
4068
4069
  		sk->sk_error_report(sk);
  
  	return skb;
  }
  EXPORT_SYMBOL(sock_dequeue_err_skb);
cab41c47d   Alexander Duyck   skb: Add document...
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
  /**
   * skb_clone_sk - create clone of skb, and take reference to socket
   * @skb: the skb to clone
   *
   * This function creates a clone of a buffer that holds a reference on
   * sk_refcnt.  Buffers created via this function are meant to be
   * returned using sock_queue_err_skb, or free via kfree_skb.
   *
   * When passing buffers allocated with this function to sock_queue_err_skb
   * it is necessary to wrap the call with sock_hold/sock_put in order to
   * prevent the socket from being released prior to being enqueued on
   * the sk_error_queue.
   */
62bccb8cd   Alexander Duyck   net-timestamp: Ma...
4083
4084
4085
4086
  struct sk_buff *skb_clone_sk(struct sk_buff *skb)
  {
  	struct sock *sk = skb->sk;
  	struct sk_buff *clone;
41c6d650f   Reshetova, Elena   net: convert sock...
4087
  	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
62bccb8cd   Alexander Duyck   net-timestamp: Ma...
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
  		return NULL;
  
  	clone = skb_clone(skb, GFP_ATOMIC);
  	if (!clone) {
  		sock_put(sk);
  		return NULL;
  	}
  
  	clone->sk = sk;
  	clone->destructor = sock_efree;
  
  	return clone;
  }
  EXPORT_SYMBOL(skb_clone_sk);
37846ef01   Alexander Duyck   net-timestamp: Me...
4102
4103
  static void __skb_complete_tx_timestamp(struct sk_buff *skb,
  					struct sock *sk,
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4104
4105
  					int tstype,
  					bool opt_stats)
ac45f602e   Patrick Ohly   net: infrastructu...
4106
  {
ac45f602e   Patrick Ohly   net: infrastructu...
4107
  	struct sock_exterr_skb *serr;
ac45f602e   Patrick Ohly   net: infrastructu...
4108
  	int err;
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4109
  	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
ac45f602e   Patrick Ohly   net: infrastructu...
4110
4111
4112
4113
  	serr = SKB_EXT_ERR(skb);
  	memset(serr, 0, sizeof(*serr));
  	serr->ee.ee_errno = ENOMSG;
  	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
e7fd28853   Willem de Bruijn   net-timestamp: SC...
4114
  	serr->ee.ee_info = tstype;
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4115
  	serr->opt_stats = opt_stats;
1862d6208   Willem de Bruijn   net-timestamp: av...
4116
  	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
4ed2d765d   Willem de Bruijn   net-timestamp: TC...
4117
  	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
09c2d251b   Willem de Bruijn   net-timestamp: ad...
4118
  		serr->ee.ee_data = skb_shinfo(skb)->tskey;
ac5cc9779   WANG Cong   net: check both t...
4119
4120
  		if (sk->sk_protocol == IPPROTO_TCP &&
  		    sk->sk_type == SOCK_STREAM)
4ed2d765d   Willem de Bruijn   net-timestamp: TC...
4121
4122
  			serr->ee.ee_data -= sk->sk_tskey;
  	}
290303740   Eric Dumazet   net: fix sk_forwa...
4123

ac45f602e   Patrick Ohly   net: infrastructu...
4124
  	err = sock_queue_err_skb(sk, skb);
290303740   Eric Dumazet   net: fix sk_forwa...
4125

ac45f602e   Patrick Ohly   net: infrastructu...
4126
4127
4128
  	if (err)
  		kfree_skb(skb);
  }
37846ef01   Alexander Duyck   net-timestamp: Me...
4129

b245be1f4   Willem de Bruijn   net-timestamp: no...
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
  static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
  {
  	bool ret;
  
  	if (likely(sysctl_tstamp_allow_data || tsonly))
  		return true;
  
  	read_lock_bh(&sk->sk_callback_lock);
  	ret = sk->sk_socket && sk->sk_socket->file &&
  	      file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
  	read_unlock_bh(&sk->sk_callback_lock);
  	return ret;
  }
37846ef01   Alexander Duyck   net-timestamp: Me...
4143
4144
4145
4146
  void skb_complete_tx_timestamp(struct sk_buff *skb,
  			       struct skb_shared_hwtstamps *hwtstamps)
  {
  	struct sock *sk = skb->sk;
b245be1f4   Willem de Bruijn   net-timestamp: no...
4147
  	if (!skb_may_tx_timestamp(sk, false))
265ba7a04   Willem de Bruijn   sock: free skb in...
4148
  		goto err;
b245be1f4   Willem de Bruijn   net-timestamp: no...
4149

9ac25fc06   Eric Dumazet   net: fix socket r...
4150
4151
4152
  	/* Take a reference to prevent skb_orphan() from freeing the socket,
  	 * but only if the socket refcount is not zero.
  	 */
41c6d650f   Reshetova, Elena   net: convert sock...
4153
  	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
9ac25fc06   Eric Dumazet   net: fix socket r...
4154
  		*skb_hwtstamps(skb) = *hwtstamps;
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4155
  		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
9ac25fc06   Eric Dumazet   net: fix socket r...
4156
  		sock_put(sk);
265ba7a04   Willem de Bruijn   sock: free skb in...
4157
  		return;
9ac25fc06   Eric Dumazet   net: fix socket r...
4158
  	}
265ba7a04   Willem de Bruijn   sock: free skb in...
4159
4160
4161
  
  err:
  	kfree_skb(skb);
37846ef01   Alexander Duyck   net-timestamp: Me...
4162
4163
4164
4165
4166
4167
4168
4169
  }
  EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
  
  void __skb_tstamp_tx(struct sk_buff *orig_skb,
  		     struct skb_shared_hwtstamps *hwtstamps,
  		     struct sock *sk, int tstype)
  {
  	struct sk_buff *skb;
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4170
  	bool tsonly, opt_stats = false;
37846ef01   Alexander Duyck   net-timestamp: Me...
4171

3a8dd9711   Willem de Bruijn   sock: fix possibl...
4172
4173
  	if (!sk)
  		return;
b50a5c70f   Miroslav Lichvar   net: allow simult...
4174
4175
4176
  	if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
  	    skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
  		return;
3a8dd9711   Willem de Bruijn   sock: fix possibl...
4177
4178
  	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
  	if (!skb_may_tx_timestamp(sk, tsonly))
37846ef01   Alexander Duyck   net-timestamp: Me...
4179
  		return;
1c885808e   Francis Yan   tcp: SOF_TIMESTAM...
4180
4181
4182
4183
  	if (tsonly) {
  #ifdef CONFIG_INET
  		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
  		    sk->sk_protocol == IPPROTO_TCP &&
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4184
  		    sk->sk_type == SOCK_STREAM) {
1c885808e   Francis Yan   tcp: SOF_TIMESTAM...
4185
  			skb = tcp_get_timestamping_opt_stats(sk);
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4186
4187
  			opt_stats = true;
  		} else
1c885808e   Francis Yan   tcp: SOF_TIMESTAM...
4188
4189
4190
  #endif
  			skb = alloc_skb(0, GFP_ATOMIC);
  	} else {
49ca0d8bf   Willem de Bruijn   net-timestamp: no...
4191
  		skb = skb_clone(orig_skb, GFP_ATOMIC);
1c885808e   Francis Yan   tcp: SOF_TIMESTAM...
4192
  	}
37846ef01   Alexander Duyck   net-timestamp: Me...
4193
4194
  	if (!skb)
  		return;
49ca0d8bf   Willem de Bruijn   net-timestamp: no...
4195
  	if (tsonly) {
fff88030b   Willem de Bruijn   skbuff: only inhe...
4196
4197
  		skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags &
  					     SKBTX_ANY_TSTAMP;
49ca0d8bf   Willem de Bruijn   net-timestamp: no...
4198
4199
4200
4201
4202
4203
4204
  		skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
  	}
  
  	if (hwtstamps)
  		*skb_hwtstamps(skb) = *hwtstamps;
  	else
  		skb->tstamp = ktime_get_real();
4ef1b2869   Soheil Hassas Yeganeh   tcp: mark skbs wi...
4205
  	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
37846ef01   Alexander Duyck   net-timestamp: Me...
4206
  }
e7fd28853   Willem de Bruijn   net-timestamp: SC...
4207
4208
4209
4210
4211
4212
4213
4214
  EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
  
  void skb_tstamp_tx(struct sk_buff *orig_skb,
  		   struct skb_shared_hwtstamps *hwtstamps)
  {
  	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
  			       SCM_TSTAMP_SND);
  }
ac45f602e   Patrick Ohly   net: infrastructu...
4215
  EXPORT_SYMBOL_GPL(skb_tstamp_tx);
6e3e939f3   Johannes Berg   net: add wireless...
4216
4217
4218
4219
  void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
  {
  	struct sock *sk = skb->sk;
  	struct sock_exterr_skb *serr;
dd4f10722   Eric Dumazet   net: fix socket r...
4220
  	int err = 1;
6e3e939f3   Johannes Berg   net: add wireless...
4221
4222
4223
4224
4225
4226
4227
4228
  
  	skb->wifi_acked_valid = 1;
  	skb->wifi_acked = acked;
  
  	serr = SKB_EXT_ERR(skb);
  	memset(serr, 0, sizeof(*serr));
  	serr->ee.ee_errno = ENOMSG;
  	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
dd4f10722   Eric Dumazet   net: fix socket r...
4229
4230
4231
  	/* Take a reference to prevent skb_orphan() from freeing the socket,
  	 * but only if the socket refcount is not zero.
  	 */
41c6d650f   Reshetova, Elena   net: convert sock...
4232
  	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
dd4f10722   Eric Dumazet   net: fix socket r...
4233
4234
4235
  		err = sock_queue_err_skb(sk, skb);
  		sock_put(sk);
  	}
6e3e939f3   Johannes Berg   net: add wireless...
4236
4237
4238
4239
  	if (err)
  		kfree_skb(skb);
  }
  EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
f35d9d8aa   Rusty Russell   virtio: Implement...
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
  /**
   * skb_partial_csum_set - set up and verify partial csum values for packet
   * @skb: the skb to set
   * @start: the number of bytes after skb->data to start checksumming.
   * @off: the offset from start to place the checksum.
   *
   * For untrusted partially-checksummed packets, we need to make sure the values
   * for skb->csum_start and skb->csum_offset are valid so we don't oops.
   *
   * This function checks and sets those values and skb->ip_summed: if this
   * returns false you should drop the packet.
   */
  bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
  {
5ff8dda30   Herbert Xu   net: Ensure parti...
4254
4255
  	if (unlikely(start > skb_headlen(skb)) ||
  	    unlikely((int)start + off > skb_headlen(skb) - 2)) {
e87cc4728   Joe Perches   net: Convert net_...
4256
4257
4258
  		net_warn_ratelimited("bad partial csum: csum=%u/%u len=%u
  ",
  				     start, off, skb_headlen(skb));
f35d9d8aa   Rusty Russell   virtio: Implement...
4259
4260
4261
4262
4263
  		return false;
  	}
  	skb->ip_summed = CHECKSUM_PARTIAL;
  	skb->csum_start = skb_headroom(skb) + start;
  	skb->csum_offset = off;
e5d5decae   Jason Wang   net: core: let sk...
4264
  	skb_set_transport_header(skb, start);
f35d9d8aa   Rusty Russell   virtio: Implement...
4265
4266
  	return true;
  }
b4ac530fc   David S. Miller   net: Move skbuff ...
4267
  EXPORT_SYMBOL_GPL(skb_partial_csum_set);
f35d9d8aa   Rusty Russell   virtio: Implement...
4268

ed1f50c3a   Paul Durrant   net: add skb_chec...
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
  static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
  			       unsigned int max)
  {
  	if (skb_headlen(skb) >= len)
  		return 0;
  
  	/* If we need to pullup then pullup to the max, so we
  	 * won't need to do it again.
  	 */
  	if (max > skb->len)
  		max = skb->len;
  
  	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
  		return -ENOMEM;
  
  	if (skb_headlen(skb) < len)
  		return -EPROTO;
  
  	return 0;
  }
f9708b430   Jan Beulich   consolidate dupli...
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
  #define MAX_TCP_HDR_LEN (15 * 4)
  
  static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
  				      typeof(IPPROTO_IP) proto,
  				      unsigned int off)
  {
  	switch (proto) {
  		int err;
  
  	case IPPROTO_TCP:
  		err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
  					  off + MAX_TCP_HDR_LEN);
  		if (!err && !skb_partial_csum_set(skb, off,
  						  offsetof(struct tcphdr,
  							   check)))
  			err = -EPROTO;
  		return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
  
  	case IPPROTO_UDP:
  		err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
  					  off + sizeof(struct udphdr));
  		if (!err && !skb_partial_csum_set(skb, off,
  						  offsetof(struct udphdr,
  							   check)))
  			err = -EPROTO;
  		return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
  	}
  
  	return ERR_PTR(-EPROTO);
  }
ed1f50c3a   Paul Durrant   net: add skb_chec...
4319
4320
4321
4322
  /* This value should be large enough to cover a tagged ethernet header plus
   * maximally sized IP and TCP or UDP headers.
   */
  #define MAX_IP_HDR_LEN 128
f9708b430   Jan Beulich   consolidate dupli...
4323
  static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
ed1f50c3a   Paul Durrant   net: add skb_chec...
4324
4325
4326
  {
  	unsigned int off;
  	bool fragment;
f9708b430   Jan Beulich   consolidate dupli...
4327
  	__sum16 *csum;
ed1f50c3a   Paul Durrant   net: add skb_chec...
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
  	int err;
  
  	fragment = false;
  
  	err = skb_maybe_pull_tail(skb,
  				  sizeof(struct iphdr),
  				  MAX_IP_HDR_LEN);
  	if (err < 0)
  		goto out;
  
  	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
  		fragment = true;
  
  	off = ip_hdrlen(skb);
  
  	err = -EPROTO;
  
  	if (fragment)
  		goto out;
f9708b430   Jan Beulich   consolidate dupli...
4347
4348
4349
  	csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
  	if (IS_ERR(csum))
  		return PTR_ERR(csum);
ed1f50c3a   Paul Durrant   net: add skb_chec...
4350

f9708b430   Jan Beulich   consolidate dupli...
4351
4352
4353
4354
4355
  	if (recalculate)
  		*csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
  					   ip_hdr(skb)->daddr,
  					   skb->len - off,
  					   ip_hdr(skb)->protocol, 0);
ed1f50c3a   Paul Durrant   net: add skb_chec...
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
  	err = 0;
  
  out:
  	return err;
  }
  
  /* This value should be large enough to cover a tagged ethernet header plus
   * an IPv6 header, all options, and a maximal TCP or UDP header.
   */
  #define MAX_IPV6_HDR_LEN 256
  
  #define OPT_HDR(type, skb, off) \
  	(type *)(skb_network_header(skb) + (off))
  
  static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
  {
  	int err;
  	u8 nexthdr;
  	unsigned int off;
  	unsigned int len;
  	bool fragment;
  	bool done;
f9708b430   Jan Beulich   consolidate dupli...
4378
  	__sum16 *csum;
ed1f50c3a   Paul Durrant   net: add skb_chec...
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
  
  	fragment = false;
  	done = false;
  
  	off = sizeof(struct ipv6hdr);
  
  	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
  	if (err < 0)
  		goto out;
  
  	nexthdr = ipv6_hdr(skb)->nexthdr;
  
  	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
  	while (off <= len && !done) {
  		switch (nexthdr) {
  		case IPPROTO_DSTOPTS:
  		case IPPROTO_HOPOPTS:
  		case IPPROTO_ROUTING: {
  			struct ipv6_opt_hdr *hp;
  
  			err = skb_maybe_pull_tail(skb,
  						  off +
  						  sizeof(struct ipv6_opt_hdr),
  						  MAX_IPV6_HDR_LEN);
  			if (err < 0)
  				goto out;
  
  			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
  			nexthdr = hp->nexthdr;
  			off += ipv6_optlen(hp);
  			break;
  		}
  		case IPPROTO_AH: {
  			struct ip_auth_hdr *hp;
  
  			err = skb_maybe_pull_tail(skb,
  						  off +
  						  sizeof(struct ip_auth_hdr),
  						  MAX_IPV6_HDR_LEN);
  			if (err < 0)
  				goto out;
  
  			hp = OPT_HDR(struct ip_auth_hdr, skb, off);
  			nexthdr = hp->nexthdr;
  			off += ipv6_authlen(hp);
  			break;
  		}
  		case IPPROTO_FRAGMENT: {
  			struct frag_hdr *hp;
  
  			err = skb_maybe_pull_tail(skb,
  						  off +
  						  sizeof(struct frag_hdr),
  						  MAX_IPV6_HDR_LEN);
  			if (err < 0)
  				goto out;
  
  			hp = OPT_HDR(struct frag_hdr, skb, off);
  
  			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
  				fragment = true;
  
  			nexthdr = hp->nexthdr;
  			off += sizeof(struct frag_hdr);
  			break;
  		}
  		default:
  			done = true;
  			break;
  		}
  	}
  
  	err = -EPROTO;
  
  	if (!done || fragment)
  		goto out;
f9708b430   Jan Beulich   consolidate dupli...
4455
4456
4457
  	csum = skb_checksum_setup_ip(skb, nexthdr, off);
  	if (IS_ERR(csum))
  		return PTR_ERR(csum);
ed1f50c3a   Paul Durrant   net: add skb_chec...
4458

f9708b430   Jan Beulich   consolidate dupli...
4459
4460
4461
4462
  	if (recalculate)
  		*csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
  					 &ipv6_hdr(skb)->daddr,
  					 skb->len - off, nexthdr, 0);
ed1f50c3a   Paul Durrant   net: add skb_chec...
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
  	err = 0;
  
  out:
  	return err;
  }
  
  /**
   * skb_checksum_setup - set up partial checksum offset
   * @skb: the skb to set up
   * @recalculate: if true the pseudo-header checksum will be recalculated
   */
  int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
  {
  	int err;
  
  	switch (skb->protocol) {
  	case htons(ETH_P_IP):
f9708b430   Jan Beulich   consolidate dupli...
4480
  		err = skb_checksum_setup_ipv4(skb, recalculate);
ed1f50c3a   Paul Durrant   net: add skb_chec...
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
  		break;
  
  	case htons(ETH_P_IPV6):
  		err = skb_checksum_setup_ipv6(skb, recalculate);
  		break;
  
  	default:
  		err = -EPROTO;
  		break;
  	}
  
  	return err;
  }
  EXPORT_SYMBOL(skb_checksum_setup);
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
  /**
   * skb_checksum_maybe_trim - maybe trims the given skb
   * @skb: the skb to check
   * @transport_len: the data length beyond the network header
   *
   * Checks whether the given skb has data beyond the given transport length.
   * If so, returns a cloned skb trimmed to this transport length.
   * Otherwise returns the provided skb. Returns NULL in error cases
   * (e.g. transport_len exceeds skb length or out-of-memory).
   *
a516993f0   Linus Lüssing   net: fix wrong sk...
4505
4506
   * Caller needs to set the skb transport header and free any returned skb if it
   * differs from the provided skb.
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4507
4508
4509
4510
4511
4512
4513
   */
  static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
  					       unsigned int transport_len)
  {
  	struct sk_buff *skb_chk;
  	unsigned int len = skb_transport_offset(skb) + transport_len;
  	int ret;
a516993f0   Linus Lüssing   net: fix wrong sk...
4514
  	if (skb->len < len)
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4515
  		return NULL;
a516993f0   Linus Lüssing   net: fix wrong sk...
4516
  	else if (skb->len == len)
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4517
  		return skb;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4518
4519
  
  	skb_chk = skb_clone(skb, GFP_ATOMIC);
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
  	if (!skb_chk)
  		return NULL;
  
  	ret = pskb_trim_rcsum(skb_chk, len);
  	if (ret) {
  		kfree_skb(skb_chk);
  		return NULL;
  	}
  
  	return skb_chk;
  }
  
  /**
   * skb_checksum_trimmed - validate checksum of an skb
   * @skb: the skb to check
   * @transport_len: the data length beyond the network header
   * @skb_chkf: checksum function to use
   *
   * Applies the given checksum function skb_chkf to the provided skb.
   * Returns a checked and maybe trimmed skb. Returns NULL on error.
   *
   * If the skb has data beyond the given transport length, then a
   * trimmed & cloned skb is checked and returned.
   *
a516993f0   Linus Lüssing   net: fix wrong sk...
4544
4545
   * Caller needs to set the skb transport header and free any returned skb if it
   * differs from the provided skb.
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4546
4547
4548
4549
4550
4551
4552
   */
  struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
  				     unsigned int transport_len,
  				     __sum16(*skb_chkf)(struct sk_buff *skb))
  {
  	struct sk_buff *skb_chk;
  	unsigned int offset = skb_transport_offset(skb);
fcba67c94   Linus Lüssing   net: fix two spar...
4553
  	__sum16 ret;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4554
4555
4556
  
  	skb_chk = skb_checksum_maybe_trim(skb, transport_len);
  	if (!skb_chk)
a516993f0   Linus Lüssing   net: fix wrong sk...
4557
  		goto err;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4558

a516993f0   Linus Lüssing   net: fix wrong sk...
4559
4560
  	if (!pskb_may_pull(skb_chk, offset))
  		goto err;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4561

9b368814b   Linus Lüssing   net: fix bridge m...
4562
  	skb_pull_rcsum(skb_chk, offset);
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4563
  	ret = skb_chkf(skb_chk);
9b368814b   Linus Lüssing   net: fix bridge m...
4564
  	skb_push_rcsum(skb_chk, offset);
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4565

a516993f0   Linus Lüssing   net: fix wrong sk...
4566
4567
  	if (ret)
  		goto err;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4568
4569
  
  	return skb_chk;
a516993f0   Linus Lüssing   net: fix wrong sk...
4570
4571
4572
4573
4574
4575
  
  err:
  	if (skb_chk && skb_chk != skb)
  		kfree_skb(skb_chk);
  
  	return NULL;
9afd85c9e   Linus Lüssing   net: Export IGMP/...
4576
4577
  }
  EXPORT_SYMBOL(skb_checksum_trimmed);
4497b0763   Ben Hutchings   net: Discard and ...
4578
4579
  void __skb_warn_lro_forwarding(const struct sk_buff *skb)
  {
e87cc4728   Joe Perches   net: Convert net_...
4580
4581
4582
  	net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled
  ",
  			     skb->dev->name);
4497b0763   Ben Hutchings   net: Discard and ...
4583
  }
4497b0763   Ben Hutchings   net: Discard and ...
4584
  EXPORT_SYMBOL(__skb_warn_lro_forwarding);
bad43ca83   Eric Dumazet   net: introduce sk...
4585
4586
4587
  
  void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
  {
3d861f661   Eric Dumazet   net: fix secpath ...
4588
4589
  	if (head_stolen) {
  		skb_release_head_state(skb);
bad43ca83   Eric Dumazet   net: introduce sk...
4590
  		kmem_cache_free(skbuff_head_cache, skb);
3d861f661   Eric Dumazet   net: fix secpath ...
4591
  	} else {
bad43ca83   Eric Dumazet   net: introduce sk...
4592
  		__kfree_skb(skb);
3d861f661   Eric Dumazet   net: fix secpath ...
4593
  	}
bad43ca83   Eric Dumazet   net: introduce sk...
4594
4595
4596
4597
4598
4599
4600
4601
  }
  EXPORT_SYMBOL(kfree_skb_partial);
  
  /**
   * skb_try_coalesce - try to merge skb to prior one
   * @to: prior buffer
   * @from: buffer to add
   * @fragstolen: pointer to boolean
c6c4b97c6   Randy Dunlap   net/core: fix ker...
4602
   * @delta_truesize: how much more was allocated than was requested
bad43ca83   Eric Dumazet   net: introduce sk...
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
   */
  bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
  		      bool *fragstolen, int *delta_truesize)
  {
  	int i, delta, len = from->len;
  
  	*fragstolen = false;
  
  	if (skb_cloned(to))
  		return false;
  
  	if (len <= skb_tailroom(to)) {
e93a0435f   Eric Dumazet   tcp: allow segmen...
4615
4616
  		if (len)
  			BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
bad43ca83   Eric Dumazet   net: introduce sk...
4617
4618
4619
4620
4621
4622
  		*delta_truesize = 0;
  		return true;
  	}
  
  	if (skb_has_frag_list(to) || skb_has_frag_list(from))
  		return false;
1f8b977ab   Willem de Bruijn   sock: enable MSG_...
4623
4624
  	if (skb_zcopy(to) || skb_zcopy(from))
  		return false;
bad43ca83   Eric Dumazet   net: introduce sk...
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
  
  	if (skb_headlen(from) != 0) {
  		struct page *page;
  		unsigned int offset;
  
  		if (skb_shinfo(to)->nr_frags +
  		    skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
  			return false;
  
  		if (skb_head_is_locked(from))
  			return false;
  
  		delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
  
  		page = virt_to_head_page(from->head);
  		offset = from->data - (unsigned char *)page_address(page);
  
  		skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
  				   page, offset, skb_headlen(from));
  		*fragstolen = true;
  	} else {
  		if (skb_shinfo(to)->nr_frags +
  		    skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
  			return false;
f4b549a5a   Weiping Pan   use skb_end_offse...
4649
  		delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
bad43ca83   Eric Dumazet   net: introduce sk...
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
  	}
  
  	WARN_ON_ONCE(delta < len);
  
  	memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
  	       skb_shinfo(from)->frags,
  	       skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
  	skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
  
  	if (!skb_cloned(from))
  		skb_shinfo(from)->nr_frags = 0;
8ea853fd0   Li RongQing   net/core: fix com...
4661
4662
4663
  	/* if the skb is not cloned this does nothing
  	 * since we set nr_frags to 0.
  	 */
bad43ca83   Eric Dumazet   net: introduce sk...
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
  	for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
  		skb_frag_ref(from, i);
  
  	to->truesize += delta;
  	to->len += len;
  	to->data_len += len;
  
  	*delta_truesize = delta;
  	return true;
  }
  EXPORT_SYMBOL(skb_try_coalesce);
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4675
4676
  
  /**
8b27f2779   Nicolas Dichtel   skb: allow skb_sc...
4677
   * skb_scrub_packet - scrub an skb
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4678
4679
   *
   * @skb: buffer to clean
8b27f2779   Nicolas Dichtel   skb: allow skb_sc...
4680
4681
4682
4683
4684
4685
4686
4687
   * @xnet: packet is crossing netns
   *
   * skb_scrub_packet can be used after encapsulating or decapsulting a packet
   * into/from a tunnel. Some information have to be cleared during these
   * operations.
   * skb_scrub_packet can also be used to clean a skb before injecting it in
   * another namespace (@xnet == true). We have to clear all information in the
   * skb that could impact namespace isolation.
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4688
   */
8b27f2779   Nicolas Dichtel   skb: allow skb_sc...
4689
  void skb_scrub_packet(struct sk_buff *skb, bool xnet)
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4690
  {
2456e8553   Thomas Gleixner   ktime: Get rid of...
4691
  	skb->tstamp = 0;
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4692
4693
  	skb->pkt_type = PACKET_HOST;
  	skb->skb_iif = 0;
60ff74673   WANG Cong   net: rename local...
4694
  	skb->ignore_df = 0;
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4695
  	skb_dst_drop(skb);
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4696
4697
4698
  	secpath_reset(skb);
  	nf_reset(skb);
  	nf_reset_trace(skb);
213dd74ae   Herbert Xu   skbuff: Do not sc...
4699
4700
4701
  
  	if (!xnet)
  		return;
2b5ec1a5f   Ye Yin   netfilter/ipvs: c...
4702
  	ipvs_reset(skb);
213dd74ae   Herbert Xu   skbuff: Do not sc...
4703
4704
  	skb_orphan(skb);
  	skb->mark = 0;
621e84d6f   Nicolas Dichtel   dev: introduce sk...
4705
4706
  }
  EXPORT_SYMBOL_GPL(skb_scrub_packet);
de960aa9a   Florian Westphal   net: add and use ...
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
  
  /**
   * skb_gso_transport_seglen - Return length of individual segments of a gso packet
   *
   * @skb: GSO skb
   *
   * skb_gso_transport_seglen is used to determine the real size of the
   * individual segments, including Layer4 headers (TCP/UDP).
   *
   * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
   */
  unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
  {
  	const struct skb_shared_info *shinfo = skb_shinfo(skb);
f993bc25e   Florian Westphal   net: core: handle...
4721
  	unsigned int thlen = 0;
de960aa9a   Florian Westphal   net: add and use ...
4722

f993bc25e   Florian Westphal   net: core: handle...
4723
4724
4725
  	if (skb->encapsulation) {
  		thlen = skb_inner_transport_header(skb) -
  			skb_transport_header(skb);
6d39d589b   Florian Westphal   net: core: don't ...
4726

f993bc25e   Florian Westphal   net: core: handle...
4727
4728
4729
4730
  		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
  			thlen += inner_tcp_hdrlen(skb);
  	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
  		thlen = tcp_hdrlen(skb);
90017accf   Marcelo Ricardo Leitner   sctp: Add GSO sup...
4731
4732
  	} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
  		thlen = sizeof(struct sctphdr);
f993bc25e   Florian Westphal   net: core: handle...
4733
  	}
6d39d589b   Florian Westphal   net: core: don't ...
4734
4735
4736
4737
  	/* UFO sets gso_size to the size of the fragmentation
  	 * payload, i.e. the size of the L4 (UDP) header is already
  	 * accounted for.
  	 */
f993bc25e   Florian Westphal   net: core: handle...
4738
  	return thlen + shinfo->gso_size;
de960aa9a   Florian Westphal   net: add and use ...
4739
4740
  }
  EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
0d5501c1c   Vlad Yasevich   net: Always untag...
4741

ae7ef81ef   Marcelo Ricardo Leitner   skbuff: introduce...
4742
4743
4744
4745
  /**
   * skb_gso_validate_mtu - Return in case such skb fits a given MTU
   *
   * @skb: GSO skb
76f21b990   David S. Miller   net: Add docbook ...
4746
   * @mtu: MTU to validate against
ae7ef81ef   Marcelo Ricardo Leitner   skbuff: introduce...
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
   *
   * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU
   * once split.
   */
  bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu)
  {
  	const struct skb_shared_info *shinfo = skb_shinfo(skb);
  	const struct sk_buff *iter;
  	unsigned int hlen;
  
  	hlen = skb_gso_network_seglen(skb);
  
  	if (shinfo->gso_size != GSO_BY_FRAGS)
  		return hlen <= mtu;
  
  	/* Undo this so we can re-use header sizes */
  	hlen -= GSO_BY_FRAGS;
  
  	skb_walk_frags(skb, iter) {
  		if (hlen + skb_headlen(iter) > mtu)
  			return false;
  	}
  
  	return true;
  }
  EXPORT_SYMBOL_GPL(skb_gso_validate_mtu);
0d5501c1c   Vlad Yasevich   net: Always untag...
4773
4774
  static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
  {
01a68a265   Toshiaki Makita   net: Fix vlan unt...
4775
  	int mac_len;
0d5501c1c   Vlad Yasevich   net: Always untag...
4776
4777
4778
4779
  	if (skb_cow(skb, skb_headroom(skb)) < 0) {
  		kfree_skb(skb);
  		return NULL;
  	}
01a68a265   Toshiaki Makita   net: Fix vlan unt...
4780
  	mac_len = skb->data - skb_mac_header(skb);
34a9a0363   Toshiaki Makita   net: Fix untag fo...
4781
4782
4783
4784
  	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
  		memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
  			mac_len - VLAN_HLEN - ETH_TLEN);
  	}
0d5501c1c   Vlad Yasevich   net: Always untag...
4785
4786
4787
4788
4789
4790
4791
4792
  	skb->mac_header += VLAN_HLEN;
  	return skb;
  }
  
  struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
  {
  	struct vlan_hdr *vhdr;
  	u16 vlan_tci;
df8a39def   Jiri Pirko   net: rename vlan_...
4793
  	if (unlikely(skb_vlan_tag_present(skb))) {
0d5501c1c   Vlad Yasevich   net: Always untag...
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
  		/* vlan_tci is already set-up so leave this for another time */
  		return skb;
  	}
  
  	skb = skb_share_check(skb, GFP_ATOMIC);
  	if (unlikely(!skb))
  		goto err_free;
  
  	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
  		goto err_free;
  
  	vhdr = (struct vlan_hdr *)skb->data;
  	vlan_tci = ntohs(vhdr->h_vlan_TCI);
  	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
  
  	skb_pull_rcsum(skb, VLAN_HLEN);
  	vlan_set_encap_proto(skb, vhdr);
  
  	skb = skb_reorder_vlan_header(skb);
  	if (unlikely(!skb))
  		goto err_free;
  
  	skb_reset_network_header(skb);
  	skb_reset_transport_header(skb);
  	skb_reset_mac_len(skb);
  
  	return skb;
  
  err_free:
  	kfree_skb(skb);
  	return NULL;
  }
  EXPORT_SYMBOL(skb_vlan_untag);
2e4e44107   Eric Dumazet   net: add alloc_sk...
4827

e21951212   Jiri Pirko   net: move make_wr...
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
  int skb_ensure_writable(struct sk_buff *skb, int write_len)
  {
  	if (!pskb_may_pull(skb, write_len))
  		return -ENOMEM;
  
  	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
  		return 0;
  
  	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
  }
  EXPORT_SYMBOL(skb_ensure_writable);
bfca4c520   Shmulik Ladkani   net: skbuff: Expo...
4839
4840
4841
4842
  /* remove VLAN header from packet and update csum accordingly.
   * expects a non skb_vlan_tag_present skb with a vlan tag payload
   */
  int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
93515d53b   Jiri Pirko   net: move vlan po...
4843
4844
  {
  	struct vlan_hdr *vhdr;
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4845
  	int offset = skb->data - skb_mac_header(skb);
93515d53b   Jiri Pirko   net: move vlan po...
4846
  	int err;
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4847
4848
4849
4850
4851
4852
  	if (WARN_ONCE(offset,
  		      "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)
  ",
  		      offset)) {
  		return -EINVAL;
  	}
93515d53b   Jiri Pirko   net: move vlan po...
4853
4854
  	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
  	if (unlikely(err))
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4855
  		return err;
93515d53b   Jiri Pirko   net: move vlan po...
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
  
  	skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
  
  	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
  	*vlan_tci = ntohs(vhdr->h_vlan_TCI);
  
  	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
  	__skb_pull(skb, VLAN_HLEN);
  
  	vlan_set_encap_proto(skb, vhdr);
  	skb->mac_header += VLAN_HLEN;
  
  	if (skb_network_offset(skb) < ETH_HLEN)
  		skb_set_network_header(skb, ETH_HLEN);
  
  	skb_reset_mac_len(skb);
93515d53b   Jiri Pirko   net: move vlan po...
4872
4873
4874
  
  	return err;
  }
bfca4c520   Shmulik Ladkani   net: skbuff: Expo...
4875
  EXPORT_SYMBOL(__skb_vlan_pop);
93515d53b   Jiri Pirko   net: move vlan po...
4876

b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4877
4878
4879
  /* Pop a vlan tag either from hwaccel or from payload.
   * Expects skb->data at mac header.
   */
93515d53b   Jiri Pirko   net: move vlan po...
4880
4881
4882
4883
4884
  int skb_vlan_pop(struct sk_buff *skb)
  {
  	u16 vlan_tci;
  	__be16 vlan_proto;
  	int err;
df8a39def   Jiri Pirko   net: rename vlan_...
4885
  	if (likely(skb_vlan_tag_present(skb))) {
93515d53b   Jiri Pirko   net: move vlan po...
4886
4887
  		skb->vlan_tci = 0;
  	} else {
ecf4ee41d   Shmulik Ladkani   net: skbuff: Codi...
4888
  		if (unlikely(!eth_type_vlan(skb->protocol)))
93515d53b   Jiri Pirko   net: move vlan po...
4889
4890
4891
4892
4893
4894
4895
  			return 0;
  
  		err = __skb_vlan_pop(skb, &vlan_tci);
  		if (err)
  			return err;
  	}
  	/* move next vlan tag to hw accel tag */
ecf4ee41d   Shmulik Ladkani   net: skbuff: Codi...
4896
  	if (likely(!eth_type_vlan(skb->protocol)))
93515d53b   Jiri Pirko   net: move vlan po...
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
  		return 0;
  
  	vlan_proto = skb->protocol;
  	err = __skb_vlan_pop(skb, &vlan_tci);
  	if (unlikely(err))
  		return err;
  
  	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
  	return 0;
  }
  EXPORT_SYMBOL(skb_vlan_pop);
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4908
4909
4910
  /* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
   * Expects skb->data at mac header.
   */
93515d53b   Jiri Pirko   net: move vlan po...
4911
4912
  int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
  {
df8a39def   Jiri Pirko   net: rename vlan_...
4913
  	if (skb_vlan_tag_present(skb)) {
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4914
  		int offset = skb->data - skb_mac_header(skb);
93515d53b   Jiri Pirko   net: move vlan po...
4915
  		int err;
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4916
4917
4918
4919
4920
4921
  		if (WARN_ONCE(offset,
  			      "skb_vlan_push got skb with skb->data not at mac header (offset %d)
  ",
  			      offset)) {
  			return -EINVAL;
  		}
93515d53b   Jiri Pirko   net: move vlan po...
4922
  		err = __vlan_insert_tag(skb, skb->vlan_proto,
df8a39def   Jiri Pirko   net: rename vlan_...
4923
  					skb_vlan_tag_get(skb));
b6a792084   Shmulik Ladkani   net: skbuff: Limi...
4924
  		if (err)
93515d53b   Jiri Pirko   net: move vlan po...
4925
  			return err;
9241e2df4   Daniel Borkmann   vlan: pull on __v...
4926

93515d53b   Jiri Pirko   net: move vlan po...
4927
4928
  		skb->protocol = skb->vlan_proto;
  		skb->mac_len += VLAN_HLEN;
93515d53b   Jiri Pirko   net: move vlan po...
4929

6b83d28a5   Daniel Borkmann   net: use skb_post...
4930
  		skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
93515d53b   Jiri Pirko   net: move vlan po...
4931
4932
4933
4934
4935
  	}
  	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
  	return 0;
  }
  EXPORT_SYMBOL(skb_vlan_push);
2e4e44107   Eric Dumazet   net: add alloc_sk...
4936
4937
4938
  /**
   * alloc_skb_with_frags - allocate skb with page frags
   *
de3f0d0ef   Masanari Iida   net: Missing @ be...
4939
4940
4941
4942
4943
   * @header_len: size of linear part
   * @data_len: needed length in frags
   * @max_page_order: max page order desired.
   * @errcode: pointer to error code if any
   * @gfp_mask: allocation mask
2e4e44107   Eric Dumazet   net: add alloc_sk...
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
   *
   * This can be used to allocate a paged skb, given a maximal order for frags.
   */
  struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
  				     unsigned long data_len,
  				     int max_page_order,
  				     int *errcode,
  				     gfp_t gfp_mask)
  {
  	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
  	unsigned long chunk;
  	struct sk_buff *skb;
  	struct page *page;
  	gfp_t gfp_head;
  	int i;
  
  	*errcode = -EMSGSIZE;
  	/* Note this test could be relaxed, if we succeed to allocate
  	 * high order pages...
  	 */
  	if (npages > MAX_SKB_FRAGS)
  		return NULL;
  
  	gfp_head = gfp_mask;
d0164adc8   Mel Gorman   mm, page_alloc: d...
4968
  	if (gfp_head & __GFP_DIRECT_RECLAIM)
dcda9b047   Michal Hocko   mm, tree wide: re...
4969
  		gfp_head |= __GFP_RETRY_MAYFAIL;
2e4e44107   Eric Dumazet   net: add alloc_sk...
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
  
  	*errcode = -ENOBUFS;
  	skb = alloc_skb(header_len, gfp_head);
  	if (!skb)
  		return NULL;
  
  	skb->truesize += npages << PAGE_SHIFT;
  
  	for (i = 0; npages > 0; i++) {
  		int order = max_page_order;
  
  		while (order) {
  			if (npages >= 1 << order) {
d0164adc8   Mel Gorman   mm, page_alloc: d...
4983
  				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
2e4e44107   Eric Dumazet   net: add alloc_sk...
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
  						   __GFP_COMP |
  						   __GFP_NOWARN |
  						   __GFP_NORETRY,
  						   order);
  				if (page)
  					goto fill_page;
  				/* Do not retry other high order allocations */
  				order = 1;
  				max_page_order = 0;
  			}
  			order--;
  		}
  		page = alloc_page(gfp_mask);
  		if (!page)
  			goto failure;
  fill_page:
  		chunk = min_t(unsigned long, data_len,
  			      PAGE_SIZE << order);
  		skb_fill_page_desc(skb, i, page, 0, chunk);
  		data_len -= chunk;
  		npages -= 1 << order;
  	}
  	return skb;
  
  failure:
  	kfree_skb(skb);
  	return NULL;
  }
  EXPORT_SYMBOL(alloc_skb_with_frags);
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5013
5014
5015
5016
5017
5018
5019
5020
5021
  
  /* carve out the first off bytes from skb when off < headlen */
  static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
  				    const int headlen, gfp_t gfp_mask)
  {
  	int i;
  	int size = skb_end_offset(skb);
  	int new_hlen = headlen - off;
  	u8 *data;
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
  
  	size = SKB_DATA_ALIGN(size);
  
  	if (skb_pfmemalloc(skb))
  		gfp_mask |= __GFP_MEMALLOC;
  	data = kmalloc_reserve(size +
  			       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
  			       gfp_mask, NUMA_NO_NODE, NULL);
  	if (!data)
  		return -ENOMEM;
  
  	size = SKB_WITH_OVERHEAD(ksize(data));
  
  	/* Copy real data, and all frags */
  	skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
  	skb->len -= off;
  
  	memcpy((struct skb_shared_info *)(data + size),
  	       skb_shinfo(skb),
  	       offsetof(struct skb_shared_info,
  			frags[skb_shinfo(skb)->nr_frags]));
  	if (skb_cloned(skb)) {
  		/* drop the old head gracefully */
  		if (skb_orphan_frags(skb, gfp_mask)) {
  			kfree(data);
  			return -ENOMEM;
  		}
  		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  			skb_frag_ref(skb, i);
  		if (skb_has_frag_list(skb))
  			skb_clone_fraglist(skb);
  		skb_release_data(skb);
  	} else {
  		/* we can reuse existing recount- all we did was
  		 * relocate values
  		 */
  		skb_free_head(skb);
  	}
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5060
5061
5062
5063
5064
  	skb->head = data;
  	skb->data = data;
  	skb->head_frag = 0;
  #ifdef NET_SKBUFF_DATA_USES_OFFSET
  	skb->end = size;
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
  #else
  	skb->end = skb->head + size;
  #endif
  	skb_set_tail_pointer(skb, skb_headlen(skb));
  	skb_headers_offset_update(skb, 0);
  	skb->cloned = 0;
  	skb->hdr_len = 0;
  	skb->nohdr = 0;
  	atomic_set(&skb_shinfo(skb)->dataref, 1);
  
  	return 0;
  }
  
  static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
  
  /* carve out the first eat bytes from skb's frag_list. May recurse into
   * pskb_carve()
   */
  static int pskb_carve_frag_list(struct sk_buff *skb,
  				struct skb_shared_info *shinfo, int eat,
  				gfp_t gfp_mask)
  {
  	struct sk_buff *list = shinfo->frag_list;
  	struct sk_buff *clone = NULL;
  	struct sk_buff *insp = NULL;
  
  	do {
  		if (!list) {
  			pr_err("Not enough bytes to eat. Want %d
  ", eat);
  			return -EFAULT;
  		}
  		if (list->len <= eat) {
  			/* Eaten as whole. */
  			eat -= list->len;
  			list = list->next;
  			insp = list;
  		} else {
  			/* Eaten partially. */
  			if (skb_shared(list)) {
  				clone = skb_clone(list, gfp_mask);
  				if (!clone)
  					return -ENOMEM;
  				insp = list->next;
  				list = clone;
  			} else {
  				/* This may be pulled without problems. */
  				insp = list;
  			}
  			if (pskb_carve(list, eat, gfp_mask) < 0) {
  				kfree_skb(clone);
  				return -ENOMEM;
  			}
  			break;
  		}
  	} while (eat);
  
  	/* Free pulled out fragments. */
  	while ((list = shinfo->frag_list) != insp) {
  		shinfo->frag_list = list->next;
  		kfree_skb(list);
  	}
  	/* And insert new clone at head. */
  	if (clone) {
  		clone->next = list;
  		shinfo->frag_list = clone;
  	}
  	return 0;
  }
  
  /* carve off first len bytes from skb. Split line (off) is in the
   * non-linear part of skb
   */
  static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
  				       int pos, gfp_t gfp_mask)
  {
  	int i, k = 0;
  	int size = skb_end_offset(skb);
  	u8 *data;
  	const int nfrags = skb_shinfo(skb)->nr_frags;
  	struct skb_shared_info *shinfo;
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
  
  	size = SKB_DATA_ALIGN(size);
  
  	if (skb_pfmemalloc(skb))
  		gfp_mask |= __GFP_MEMALLOC;
  	data = kmalloc_reserve(size +
  			       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
  			       gfp_mask, NUMA_NO_NODE, NULL);
  	if (!data)
  		return -ENOMEM;
  
  	size = SKB_WITH_OVERHEAD(ksize(data));
  
  	memcpy((struct skb_shared_info *)(data + size),
  	       skb_shinfo(skb), offsetof(struct skb_shared_info,
  					 frags[skb_shinfo(skb)->nr_frags]));
  	if (skb_orphan_frags(skb, gfp_mask)) {
  		kfree(data);
  		return -ENOMEM;
  	}
  	shinfo = (struct skb_shared_info *)(data + size);
  	for (i = 0; i < nfrags; i++) {
  		int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);
  
  		if (pos + fsize > off) {
  			shinfo->frags[k] = skb_shinfo(skb)->frags[i];
  
  			if (pos < off) {
  				/* Split frag.
  				 * We have two variants in this case:
  				 * 1. Move all the frag to the second
  				 *    part, if it is possible. F.e.
  				 *    this approach is mandatory for TUX,
  				 *    where splitting is expensive.
  				 * 2. Split is accurately. We make this.
  				 */
  				shinfo->frags[0].page_offset += off - pos;
  				skb_frag_size_sub(&shinfo->frags[0], off - pos);
  			}
  			skb_frag_ref(skb, i);
  			k++;
  		}
  		pos += fsize;
  	}
  	shinfo->nr_frags = k;
  	if (skb_has_frag_list(skb))
  		skb_clone_fraglist(skb);
  
  	if (k == 0) {
  		/* split line is in frag list */
  		pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
  	}
  	skb_release_data(skb);
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5199
5200
5201
5202
5203
  	skb->head = data;
  	skb->head_frag = 0;
  	skb->data = data;
  #ifdef NET_SKBUFF_DATA_USES_OFFSET
  	skb->end = size;
6fa01ccd8   Sowmini Varadhan   skbuff: Add pskb_...
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
  #else
  	skb->end = skb->head + size;
  #endif
  	skb_reset_tail_pointer(skb);
  	skb_headers_offset_update(skb, 0);
  	skb->cloned   = 0;
  	skb->hdr_len  = 0;
  	skb->nohdr    = 0;
  	skb->len -= off;
  	skb->data_len = skb->len;
  	atomic_set(&skb_shinfo(skb)->dataref, 1);
  	return 0;
  }
  
  /* remove len bytes from the beginning of the skb */
  static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
  {
  	int headlen = skb_headlen(skb);
  
  	if (len < headlen)
  		return pskb_carve_inside_header(skb, len, headlen, gfp);
  	else
  		return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
  }
  
  /* Extract to_copy bytes starting at off from skb, and return this in
   * a new skb
   */
  struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
  			     int to_copy, gfp_t gfp)
  {
  	struct sk_buff  *clone = skb_clone(skb, gfp);
  
  	if (!clone)
  		return NULL;
  
  	if (pskb_carve(clone, off, gfp) < 0 ||
  	    pskb_trim(clone, to_copy)) {
  		kfree_skb(clone);
  		return NULL;
  	}
  	return clone;
  }
  EXPORT_SYMBOL(pskb_extract);
c8c8b1270   Eric Dumazet   udp: under rx pre...
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
  
  /**
   * skb_condense - try to get rid of fragments/frag_list if possible
   * @skb: buffer
   *
   * Can be used to save memory before skb is added to a busy queue.
   * If packet has bytes in frags and enough tail room in skb->head,
   * pull all of them, so that we can free the frags right now and adjust
   * truesize.
   * Notes:
   *	We do not reallocate skb->head thus can not fail.
   *	Caller must re-evaluate skb->truesize if needed.
   */
  void skb_condense(struct sk_buff *skb)
  {
3174fed98   Eric Dumazet   net: skb_condense...
5263
5264
5265
5266
  	if (skb->data_len) {
  		if (skb->data_len > skb->end - skb->tail ||
  		    skb_cloned(skb))
  			return;
c8c8b1270   Eric Dumazet   udp: under rx pre...
5267

3174fed98   Eric Dumazet   net: skb_condense...
5268
5269
5270
5271
5272
5273
5274
5275
5276
  		/* Nice, we can free page frag(s) right now */
  		__pskb_pull_tail(skb, skb->data_len);
  	}
  	/* At this point, skb->truesize might be over estimated,
  	 * because skb had a fragment, and fragments do not tell
  	 * their truesize.
  	 * When we pulled its content into skb->head, fragment
  	 * was freed, but __pskb_pull_tail() could not possibly
  	 * adjust skb->truesize, not knowing the frag truesize.
c8c8b1270   Eric Dumazet   udp: under rx pre...
5277
5278
5279
  	 */
  	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
  }