Blame view

net/ipv4/ip_fragment.c 24.5 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
6
7
  /*
   * INET		An implementation of the TCP/IP protocol suite for the LINUX
   *		operating system.  INET is implemented using the  BSD Socket
   *		interface as the means of communication with the user level.
   *
   *		The IP fragmentation functionality.
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
8
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
   * Authors:	Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
113aa838e   Alan Cox   net: Rationalise ...
10
   *		Alan Cox <alan@lxorguk.ukuu.org.uk>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
12
13
14
15
16
17
18
19
20
21
22
   *
   * Fixes:
   *		Alan Cox	:	Split from ip.c , see ip_input.c for history.
   *		David S. Miller :	Begin massive cleanup...
   *		Andi Kleen	:	Add sysctls.
   *		xxxx		:	Overlapfrag bug.
   *		Ultima          :       ip_expire() kernel panic.
   *		Bill Hawes	:	Frag accounting and evictor fixes.
   *		John McDonald	:	0 length frag bug.
   *		Alexey Kuznetsov:	SMP races, threading, cleanup.
   *		Patrick McHardy :	LRU queue of frag heads for evictor.
   */
afd465030   Joe Perches   net: ipv4: Standa...
23
  #define pr_fmt(fmt) "IPv4: " fmt
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
24
  #include <linux/compiler.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
28
29
30
31
32
33
34
35
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/mm.h>
  #include <linux/jiffies.h>
  #include <linux/skbuff.h>
  #include <linux/list.h>
  #include <linux/ip.h>
  #include <linux/icmp.h>
  #include <linux/netdevice.h>
  #include <linux/jhash.h>
  #include <linux/random.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
36
  #include <linux/slab.h>
e9017b551   Shan Wei   IP: Send an ICMP ...
37
38
  #include <net/route.h>
  #include <net/dst.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
39
40
41
42
  #include <net/sock.h>
  #include <net/ip.h>
  #include <net/icmp.h>
  #include <net/checksum.h>
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
43
  #include <net/inetpeer.h>
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
44
  #include <net/inet_frag.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
48
  #include <linux/tcp.h>
  #include <linux/udp.h>
  #include <linux/inet.h>
  #include <linux/netfilter_ipv4.h>
6623e3b24   Eric Dumazet   ipv4: IP defragme...
49
  #include <net/inet_ecn.h>
385add906   David Ahern   net: Replace vrf_...
50
  #include <net/l3mdev.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51
52
53
54
55
  
  /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
   * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
   * as well. Or notify me, at least. --ANK
   */
d4ad4d22e   Nikolay Aleksandrov   inet: frags: use ...
56
  static const char ip_frag_cache_name[] = "ip4-frags";
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
57

c91f27fb5   Peter Oskolkov   ip: add helpers t...
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  /* Use skb->cb to track consecutive/adjacent fragments coming at
   * the end of the queue. Nodes in the rb-tree queue will
   * contain "runs" of one or more adjacent fragments.
   *
   * Invariants:
   * - next_frag is NULL at the tail of a "run";
   * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
   */
  struct ipfrag_skb_cb {
  	struct inet_skb_parm	h;
  	struct sk_buff		*next_frag;
  	int			frag_run_len;
  };
  
  #define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
  
  static void ip4_frag_init_run(struct sk_buff *skb)
  {
  	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
  
  	FRAG_CB(skb)->next_frag = NULL;
  	FRAG_CB(skb)->frag_run_len = skb->len;
  }
  
  /* Append skb to the last "run". */
  static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
  					struct sk_buff *skb)
  {
  	RB_CLEAR_NODE(&skb->rbnode);
  	FRAG_CB(skb)->next_frag = NULL;
  
  	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
  	FRAG_CB(q->fragments_tail)->next_frag = skb;
  	q->fragments_tail = skb;
  }
  
  /* Create a new "run" with the skb. */
  static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
  {
  	if (q->last_run_head)
  		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
  			     &q->last_run_head->rbnode.rb_right);
  	else
  		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
  	rb_insert_color(&skb->rbnode, &q->rb_fragments);
  
  	ip4_frag_init_run(skb);
  	q->fragments_tail = skb;
  	q->last_run_head = skb;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
108
109
  /* Describe an entry in the "incomplete datagrams" queue. */
  struct ipq {
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
110
  	struct inet_frag_queue q;
6623e3b24   Eric Dumazet   ipv4: IP defragme...
111
  	u8		ecn; /* RFC3168 support */
d6b915e29   Florian Westphal   ip_fragment: don'...
112
  	u16		max_df_size; /* largest frag with DF set seen */
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
113
114
115
  	int             iif;
  	unsigned int    rid;
  	struct inet_peer *peer;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
116
  };
aa1f731e5   Fabian Frederick   inet: frags: remo...
117
  static u8 ip4_frag_ecn(u8 tos)
6623e3b24   Eric Dumazet   ipv4: IP defragme...
118
  {
5173cc057   Eric Dumazet   ipv4: more compli...
119
  	return 1 << (tos & INET_ECN_MASK);
6623e3b24   Eric Dumazet   ipv4: IP defragme...
120
  }
7eb95156d   Pavel Emelyanov   [INET]: Collect f...
121
  static struct inet_frags ip4_frags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122

b3a0c61b7   Peter Oskolkov   ip: process in-or...
123
124
  static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
  			 struct sk_buff *prev_tail, struct net_device *dev);
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
125

abd6523d1   Pavel Emelyanov   [INET]: Consolida...
126

36c777821   Florian Westphal   inet: frag: const...
127
  static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
c6fda2822   Pavel Emelyanov   [INET]: Consolida...
128
129
  {
  	struct ipq *qp = container_of(q, struct ipq, q);
54db0cc2b   Gao feng   inetpeer: add par...
130
131
132
  	struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
  					       frags);
  	struct net *net = container_of(ipv4, struct net, ipv4);
9aee41eff   Eric Dumazet   inet: frags: use ...
133
  	const struct frag_v4_compare_key *key = a;
c6fda2822   Pavel Emelyanov   [INET]: Consolida...
134

9aee41eff   Eric Dumazet   inet: frags: use ...
135
136
  	q->key.v4 = *key;
  	qp->ecn = 0;
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
137
  	qp->peer = q->net->max_dist ?
9aee41eff   Eric Dumazet   inet: frags: use ...
138
  		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
192132b9a   David Ahern   net: Add support ...
139
  		NULL;
c6fda2822   Pavel Emelyanov   [INET]: Consolida...
140
  }
aa1f731e5   Fabian Frederick   inet: frags: remo...
141
  static void ip4_frag_free(struct inet_frag_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
142
  {
1e4b82873   Pavel Emelyanov   [INET]: Consolida...
143
144
145
146
147
  	struct ipq *qp;
  
  	qp = container_of(q, struct ipq, q);
  	if (qp->peer)
  		inet_putpeer(qp->peer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
  
  /* Destruction primitives. */
aa1f731e5   Fabian Frederick   inet: frags: remo...
151
  static void ipq_put(struct ipq *ipq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
152
  {
673220d64   Eric Dumazet   inet: frags: add ...
153
  	inet_frag_put(&ipq->q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
155
156
157
158
159
160
  }
  
  /* Kill ipq entry. It is not destroyed immediately,
   * because caller (and someone more) holds reference count.
   */
  static void ipq_kill(struct ipq *ipq)
  {
673220d64   Eric Dumazet   inet: frags: add ...
161
  	inet_frag_kill(&ipq->q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
  }
5cf422808   Andy Zhou   ipv4: introduce f...
163
164
165
166
  static bool frag_expire_skip_icmp(u32 user)
  {
  	return user == IP_DEFRAG_AF_PACKET ||
  	       ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
8bc04864a   Andy Zhou   IPv4: skip ICMP f...
167
168
169
  					 __IP_DEFRAG_CONNTRACK_IN_END) ||
  	       ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
  					 __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
5cf422808   Andy Zhou   ipv4: introduce f...
170
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
171
172
173
  /*
   * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
   */
0512f7e93   Kees Cook   inet: frags: Conv...
174
  static void ip_expire(struct timer_list *t)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
  {
0512f7e93   Kees Cook   inet: frags: Conv...
176
  	struct inet_frag_queue *frag = from_timer(frag, t, timer);
5b1b3ad46   Eric Dumazet   inet: frags: get ...
177
  	const struct iphdr *iph;
6b921536f   Eric Dumazet   net: sk_buff rbno...
178
  	struct sk_buff *head = NULL;
84a3aa000   Pavel Emelyanov   ipv4: prepare net...
179
  	struct net *net;
5b1b3ad46   Eric Dumazet   inet: frags: get ...
180
181
  	struct ipq *qp;
  	int err;
e521db9d7   Pavel Emelyanov   [INET]: Consolida...
182

0512f7e93   Kees Cook   inet: frags: Conv...
183
  	qp = container_of(frag, struct ipq, q);
84a3aa000   Pavel Emelyanov   ipv4: prepare net...
184
  	net = container_of(qp->q.net, struct net, ipv4.frags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185

ec4fbd647   Eric Dumazet   inet: frag: relea...
186
  	rcu_read_lock();
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
187
  	spin_lock(&qp->q.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
188

06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
189
  	if (qp->q.flags & INET_FRAG_COMPLETE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
190
191
192
  		goto out;
  
  	ipq_kill(qp);
b45386efa   Eric Dumazet   net: rename IP_IN...
193
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
5b1b3ad46   Eric Dumazet   inet: frags: get ...
194
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
2e404f632   Nikolay Aleksandrov   inet: frags: use ...
195

04b28f406   Dan Carpenter   ipv4: frags: prec...
196
  	if (!(qp->q.flags & INET_FRAG_FIRST_IN))
5b1b3ad46   Eric Dumazet   inet: frags: get ...
197
  		goto out;
2e404f632   Nikolay Aleksandrov   inet: frags: use ...
198

6b921536f   Eric Dumazet   net: sk_buff rbno...
199
200
201
202
203
204
205
206
207
208
209
  	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
  	 * pull the head out of the tree in order to be able to
  	 * deal with head->dev.
  	 */
  	if (qp->q.fragments) {
  		head = qp->q.fragments;
  		qp->q.fragments = head->next;
  	} else {
  		head = skb_rb_first(&qp->q.rb_fragments);
  		if (!head)
  			goto out;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
210
211
212
213
214
215
  		if (FRAG_CB(head)->next_frag)
  			rb_replace_node(&head->rbnode,
  					&FRAG_CB(head)->next_frag->rbnode,
  					&qp->q.rb_fragments);
  		else
  			rb_erase(&head->rbnode, &qp->q.rb_fragments);
6b921536f   Eric Dumazet   net: sk_buff rbno...
216
217
218
219
220
221
222
  		memset(&head->rbnode, 0, sizeof(head->rbnode));
  		barrier();
  	}
  	if (head == qp->q.fragments_tail)
  		qp->q.fragments_tail = NULL;
  
  	sub_frag_mem_limit(qp->q.net, head->truesize);
5b1b3ad46   Eric Dumazet   inet: frags: get ...
223
224
225
  	head->dev = dev_get_by_index_rcu(net, qp->iif);
  	if (!head->dev)
  		goto out;
ec4fbd647   Eric Dumazet   inet: frag: relea...
226

e9017b551   Shan Wei   IP: Send an ICMP ...
227

5b1b3ad46   Eric Dumazet   inet: frags: get ...
228
229
230
  	/* skb has no dst, perform route lookup again */
  	iph = ip_hdr(head);
  	err = ip_route_input_noref(head, iph->daddr, iph->saddr,
c6cffba4f   David S. Miller   ipv4: Fix input r...
231
  					   iph->tos, head->dev);
5b1b3ad46   Eric Dumazet   inet: frags: get ...
232
233
234
235
236
237
238
239
240
  	if (err)
  		goto out;
  
  	/* Only an end host needs to send an ICMP
  	 * "Fragment Reassembly Timeout" message, per RFC792.
  	 */
  	if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
  	    (skb_rtable(head)->rt_type != RTN_LOCAL))
  		goto out;
085a01474   Eric Dumazet   inet: frags: do n...
241
242
  	spin_unlock(&qp->q.lock);
  	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
085a01474   Eric Dumazet   inet: frags: do n...
243
  	goto out_rcu_unlock;
5b1b3ad46   Eric Dumazet   inet: frags: get ...
244

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
245
  out:
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
246
  	spin_unlock(&qp->q.lock);
ec4fbd647   Eric Dumazet   inet: frag: relea...
247
248
  out_rcu_unlock:
  	rcu_read_unlock();
6b921536f   Eric Dumazet   net: sk_buff rbno...
249
250
  	if (head)
  		kfree_skb(head);
4b6cb5d8e   Pavel Emelyanov   [INET]: Small cle...
251
  	ipq_put(qp);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
252
  }
abd6523d1   Pavel Emelyanov   [INET]: Consolida...
253
254
255
  /* Find the correct entry in the "incomplete datagrams" queue for
   * this IP datagram, and create new one, if nothing is found.
   */
9972f134a   David Ahern   net: frags: Add V...
256
257
  static struct ipq *ip_find(struct net *net, struct iphdr *iph,
  			   u32 user, int vif)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
258
  {
9aee41eff   Eric Dumazet   inet: frags: use ...
259
260
261
262
263
264
265
266
  	struct frag_v4_compare_key key = {
  		.saddr = iph->saddr,
  		.daddr = iph->daddr,
  		.user = user,
  		.vif = vif,
  		.id = iph->id,
  		.protocol = iph->protocol,
  	};
c6fda2822   Pavel Emelyanov   [INET]: Consolida...
267
  	struct inet_frag_queue *q;
9a375803f   Pavel Emelyanov   inet fragments: f...
268

9aee41eff   Eric Dumazet   inet: frags: use ...
269
  	q = inet_frag_find(&net->ipv4.frags, &key);
caa4249ec   Eric Dumazet   inet: frags: remo...
270
  	if (!q)
5a3da1fe9   Hannes Frederic Sowa   inet: limit lengt...
271
  		return NULL;
caa4249ec   Eric Dumazet   inet: frags: remo...
272

c6fda2822   Pavel Emelyanov   [INET]: Consolida...
273
  	return container_of(q, struct ipq, q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
  }
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
275
  /* Is the fragment too far ahead to be part of ipq? */
aa1f731e5   Fabian Frederick   inet: frags: remo...
276
  static int ip_frag_too_far(struct ipq *qp)
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
277
278
  {
  	struct inet_peer *peer = qp->peer;
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
279
  	unsigned int max = qp->q.net->max_dist;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
280
281
282
283
284
285
286
287
288
289
  	unsigned int start, end;
  
  	int rc;
  
  	if (!peer || !max)
  		return 0;
  
  	start = qp->rid;
  	end = atomic_inc_return(&peer->rid);
  	qp->rid = end;
6b921536f   Eric Dumazet   net: sk_buff rbno...
290
  	rc = qp->q.fragments_tail && (end - start) > max;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
291
292
  
  	if (rc) {
7c73a6faf   Pavel Emelyanov   mib: add net to I...
293
294
295
  		struct net *net;
  
  		net = container_of(qp->q.net, struct net, ipv4.frags);
b45386efa   Eric Dumazet   net: rename IP_IN...
296
  		__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
297
298
299
300
301
302
303
  	}
  
  	return rc;
  }
  
  static int ip_frag_reinit(struct ipq *qp)
  {
d433673e5   Jesper Dangaard Brouer   net: frag helper ...
304
  	unsigned int sum_truesize = 0;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
305

b2fd5321d   Pavel Emelyanov   [NETNS][FRAGS]: M...
306
  	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
edcb69187   Reshetova, Elena   net: convert inet...
307
  		refcount_inc(&qp->q.refcnt);
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
308
309
  		return -ETIMEDOUT;
  	}
b3a0c61b7   Peter Oskolkov   ip: process in-or...
310
  	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
0e60d245a   Florian Westphal   inet: frag: chang...
311
  	sub_frag_mem_limit(qp->q.net, sum_truesize);
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
312

06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
313
  	qp->q.flags = 0;
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
314
315
316
  	qp->q.len = 0;
  	qp->q.meat = 0;
  	qp->q.fragments = NULL;
6b921536f   Eric Dumazet   net: sk_buff rbno...
317
  	qp->q.rb_fragments = RB_ROOT;
d6bebca92   Changli Gao   fragment: add fas...
318
  	qp->q.fragments_tail = NULL;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
319
  	qp->q.last_run_head = NULL;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
320
  	qp->iif = 0;
6623e3b24   Eric Dumazet   ipv4: IP defragme...
321
  	qp->ecn = 0;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
322
323
324
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
  /* Add new segment to existing queue. */
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
326
  static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
327
  {
1c4496911   Peter Oskolkov   ip: discard IPv4 ...
328
  	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
6b921536f   Eric Dumazet   net: sk_buff rbno...
329
  	struct rb_node **rbn, *parent;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
330
  	struct sk_buff *skb1, *prev_tail;
95b4b7114   Michal Kubecek   net: ipv4: do not...
331
  	int ihl, end, skb1_run_end;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
332
  	struct net_device *dev;
d6b915e29   Florian Westphal   ip_fragment: don'...
333
  	unsigned int fragsize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
334
  	int flags, offset;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
335
  	int err = -ENOENT;
6623e3b24   Eric Dumazet   ipv4: IP defragme...
336
  	u8 ecn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
337

06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
338
  	if (qp->q.flags & INET_FRAG_COMPLETE)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
339
  		goto err;
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
340
  	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
341
342
  	    unlikely(ip_frag_too_far(qp)) &&
  	    unlikely(err = ip_frag_reinit(qp))) {
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
343
344
345
  		ipq_kill(qp);
  		goto err;
  	}
6623e3b24   Eric Dumazet   ipv4: IP defragme...
346
  	ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
347
  	offset = ntohs(ip_hdr(skb)->frag_off);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
348
349
350
  	flags = offset & ~IP_OFFSET;
  	offset &= IP_OFFSET;
  	offset <<= 3;		/* offset is in 8-byte chunks */
c9bdd4b52   Arnaldo Carvalho de Melo   [IP]: Introduce i...
351
  	ihl = ip_hdrlen(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
353
  
  	/* Determine the position of this fragment. */
0848f6428   Edward Hyunkoo Jee   inet: frags: fix ...
354
  	end = offset + skb->len - skb_network_offset(skb) - ihl;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
355
  	err = -EINVAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
356
357
358
359
  
  	/* Is this the final fragment? */
  	if ((flags & IP_MF) == 0) {
  		/* If we already have some bits beyond end
42b2aa86c   Justin P. Mattock   treewide: Fix typ...
360
  		 * or have different end, the segment is corrupted.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
361
  		 */
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
362
  		if (end < qp->q.len ||
06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
363
  		    ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
364
  			goto err;
06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
365
  		qp->q.flags |= INET_FRAG_LAST_IN;
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
366
  		qp->q.len = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
367
368
369
370
371
372
  	} else {
  		if (end&7) {
  			end &= ~7;
  			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
  				skb->ip_summed = CHECKSUM_NONE;
  		}
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
373
  		if (end > qp->q.len) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  			/* Some bits beyond end -> corruption. */
06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
375
  			if (qp->q.flags & INET_FRAG_LAST_IN)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
376
  				goto err;
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
377
  			qp->q.len = end;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378
379
380
381
  		}
  	}
  	if (end == offset)
  		goto err;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
382
  	err = -ENOMEM;
0848f6428   Edward Hyunkoo Jee   inet: frags: fix ...
383
  	if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384
  		goto err;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
385
386
387
  
  	err = pskb_trim_rcsum(skb, end - offset);
  	if (err)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
388
  		goto err;
6b921536f   Eric Dumazet   net: sk_buff rbno...
389
390
391
392
  	/* Note : skb->rbnode and skb->dev share the same location. */
  	dev = skb->dev;
  	/* Makes sure compiler wont do silly aliasing games */
  	barrier();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
393

1c4496911   Peter Oskolkov   ip: discard IPv4 ...
394
395
396
397
398
399
  	/* RFC5722, Section 4, amended by Errata ID : 3089
  	 *                          When reassembling an IPv6 datagram, if
  	 *   one or more its constituent fragments is determined to be an
  	 *   overlapping fragment, the entire datagram (and any constituent
  	 *   fragments) MUST be silently discarded.
  	 *
95b4b7114   Michal Kubecek   net: ipv4: do not...
400
401
402
  	 * We do the same here for IPv4 (and increment an snmp counter) but
  	 * we do not want to drop the whole queue in response to a duplicate
  	 * fragment.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
403
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
404

dc489ad6a   Greg Kroah-Hartman   Fix "net: ipv4: d...
405
  	err = -EINVAL;
6b921536f   Eric Dumazet   net: sk_buff rbno...
406
  	/* Find out where to put this fragment.  */
b3a0c61b7   Peter Oskolkov   ip: process in-or...
407
408
409
410
411
  	prev_tail = qp->q.fragments_tail;
  	if (!prev_tail)
  		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
  	else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
  		/* This is the common case: skb goes to the end. */
6b921536f   Eric Dumazet   net: sk_buff rbno...
412
  		/* Detect and discard overlaps. */
b3a0c61b7   Peter Oskolkov   ip: process in-or...
413
  		if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
6b921536f   Eric Dumazet   net: sk_buff rbno...
414
  			goto discard_qp;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
415
416
417
418
  		if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
  			ip4_frag_append_to_last_run(&qp->q, skb);
  		else
  			ip4_frag_create_run(&qp->q, skb);
6b921536f   Eric Dumazet   net: sk_buff rbno...
419
  	} else {
b3a0c61b7   Peter Oskolkov   ip: process in-or...
420
421
422
  		/* Binary search. Note that skb can become the first fragment,
  		 * but not the last (covered above).
  		 */
6b921536f   Eric Dumazet   net: sk_buff rbno...
423
424
425
426
  		rbn = &qp->q.rb_fragments.rb_node;
  		do {
  			parent = *rbn;
  			skb1 = rb_to_skb(parent);
95b4b7114   Michal Kubecek   net: ipv4: do not...
427
428
  			skb1_run_end = skb1->ip_defrag_offset +
  				       FRAG_CB(skb1)->frag_run_len;
6b921536f   Eric Dumazet   net: sk_buff rbno...
429
430
  			if (end <= skb1->ip_defrag_offset)
  				rbn = &parent->rb_left;
95b4b7114   Michal Kubecek   net: ipv4: do not...
431
  			else if (offset >= skb1_run_end)
6b921536f   Eric Dumazet   net: sk_buff rbno...
432
  				rbn = &parent->rb_right;
95b4b7114   Michal Kubecek   net: ipv4: do not...
433
434
435
436
437
  			else if (offset >= skb1->ip_defrag_offset &&
  				 end <= skb1_run_end)
  				goto err; /* No new data, potential duplicate */
  			else
  				goto discard_qp; /* Found an overlap */
6b921536f   Eric Dumazet   net: sk_buff rbno...
438
439
  		} while (*rbn);
  		/* Here we have parent properly set, and rbn pointing to
b3a0c61b7   Peter Oskolkov   ip: process in-or...
440
441
442
  		 * one of its NULL left/right children. Insert skb.
  		 */
  		ip4_frag_init_run(skb);
6b921536f   Eric Dumazet   net: sk_buff rbno...
443
  		rb_link_node(&skb->rbnode, parent, rbn);
b3a0c61b7   Peter Oskolkov   ip: process in-or...
444
  		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
6b921536f   Eric Dumazet   net: sk_buff rbno...
445
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446

48c2afc16   Eric Dumazet   inet: frags: get ...
447
448
  	if (dev)
  		qp->iif = dev->ifindex;
48c2afc16   Eric Dumazet   inet: frags: get ...
449
  	skb->ip_defrag_offset = offset;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450

5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
451
452
  	qp->q.stamp = skb->tstamp;
  	qp->q.meat += skb->len;
6623e3b24   Eric Dumazet   ipv4: IP defragme...
453
  	qp->ecn |= ecn;
0e60d245a   Florian Westphal   inet: frag: chang...
454
  	add_frag_mem_limit(qp->q.net, skb->truesize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455
  	if (offset == 0)
06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
456
  		qp->q.flags |= INET_FRAG_FIRST_IN;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457

d6b915e29   Florian Westphal   ip_fragment: don'...
458
459
460
461
  	fragsize = skb->len + ihl;
  
  	if (fragsize > qp->q.max_size)
  		qp->q.max_size = fragsize;
5f2d04f1f   Patrick McHardy   ipv4: fix path MT...
462
  	if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
d6b915e29   Florian Westphal   ip_fragment: don'...
463
464
  	    fragsize > qp->max_df_size)
  		qp->max_df_size = fragsize;
5f2d04f1f   Patrick McHardy   ipv4: fix path MT...
465

06aa8b8a0   Nikolay Aleksandrov   inet: frags: rena...
466
  	if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
97599dc79   Eric Dumazet   net: drop dst bef...
467
468
  	    qp->q.meat == qp->q.len) {
  		unsigned long orefdst = skb->_skb_refdst;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
469

97599dc79   Eric Dumazet   net: drop dst bef...
470
  		skb->_skb_refdst = 0UL;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
471
  		err = ip_frag_reasm(qp, skb, prev_tail, dev);
97599dc79   Eric Dumazet   net: drop dst bef...
472
473
474
475
476
  		skb->_skb_refdst = orefdst;
  		return err;
  	}
  
  	skb_dst_drop(skb);
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
477
  	return -EINPROGRESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
478

1c4496911   Peter Oskolkov   ip: discard IPv4 ...
479
480
  discard_qp:
  	inet_frag_kill(&qp->q);
1c4496911   Peter Oskolkov   ip: discard IPv4 ...
481
  	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
483
  err:
  	kfree_skb(skb);
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
484
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
486
  /* Build a new IP datagram from all its fragments. */
6b921536f   Eric Dumazet   net: sk_buff rbno...
487
  static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
b3a0c61b7   Peter Oskolkov   ip: process in-or...
488
  			 struct sk_buff *prev_tail, struct net_device *dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
  {
2bad35b7c   Jorge Boncompte [DTI2]   netns: oops in ip...
490
  	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
491
  	struct iphdr *iph;
6b921536f   Eric Dumazet   net: sk_buff rbno...
492
493
494
  	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
  	struct sk_buff **nextp; /* To build frag_list. */
  	struct rb_node *rbn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
495
496
  	int len;
  	int ihlen;
da4b69299   Jiri Wiesner   ipv4: ipv6: netfi...
497
  	int delta;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
498
  	int err;
5173cc057   Eric Dumazet   ipv4: more compli...
499
  	u8 ecn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
501
  
  	ipq_kill(qp);
be991971d   Hannes Frederic Sowa   inet: generalize ...
502
  	ecn = ip_frag_ecn_table[qp->ecn];
5173cc057   Eric Dumazet   ipv4: more compli...
503
504
505
506
  	if (unlikely(ecn == 0xff)) {
  		err = -EINVAL;
  		goto out_fail;
  	}
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
507
  	/* Make the one we just received the head. */
6b921536f   Eric Dumazet   net: sk_buff rbno...
508
509
  	if (head != skb) {
  		fp = skb_clone(skb, GFP_ATOMIC);
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
510
511
  		if (!fp)
  			goto out_nomem;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
512
513
514
515
516
517
  		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
  		if (RB_EMPTY_NODE(&skb->rbnode))
  			FRAG_CB(prev_tail)->next_frag = fp;
  		else
  			rb_replace_node(&skb->rbnode, &fp->rbnode,
  					&qp->q.rb_fragments);
6b921536f   Eric Dumazet   net: sk_buff rbno...
518
  		if (qp->q.fragments_tail == skb)
d6bebca92   Changli Gao   fragment: add fas...
519
  			qp->q.fragments_tail = fp;
6b921536f   Eric Dumazet   net: sk_buff rbno...
520
  		skb_morph(skb, head);
b3a0c61b7   Peter Oskolkov   ip: process in-or...
521
  		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
6b921536f   Eric Dumazet   net: sk_buff rbno...
522
523
524
525
  		rb_replace_node(&head->rbnode, &skb->rbnode,
  				&qp->q.rb_fragments);
  		consume_skb(head);
  		head = skb;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
526
  	}
48c2afc16   Eric Dumazet   inet: frags: get ...
527
  	WARN_ON(head->ip_defrag_offset != 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
528
529
  
  	/* Allocate a new buffer for the datagram. */
c9bdd4b52   Arnaldo Carvalho de Melo   [IP]: Introduce i...
530
  	ihlen = ip_hdrlen(head);
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
531
  	len = ihlen + qp->q.len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532

1706d5876   Herbert Xu   [IPV4]: Make ip_d...
533
  	err = -E2BIG;
132adf546   Stephen Hemminger   [IPV4]: cleanup
534
  	if (len > 65535)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  		goto out_oversize;
da4b69299   Jiri Wiesner   ipv4: ipv6: netfi...
536
  	delta = - head->truesize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
  	/* Head of list must not be cloned. */
14bbd6a56   Pravin B Shelar   net: Add skb_uncl...
538
  	if (skb_unclone(head, GFP_ATOMIC))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
  		goto out_nomem;
da4b69299   Jiri Wiesner   ipv4: ipv6: netfi...
540
541
542
  	delta += head->truesize;
  	if (delta)
  		add_frag_mem_limit(qp->q.net, delta);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543
544
545
  	/* If the first fragment is fragmented itself, we split
  	 * it to two chunks: the first with data and paged part
  	 * and the second, holding only fragments. */
21dc33015   David S. Miller   net: Rename skb_h...
546
  	if (skb_has_frag_list(head)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
  		struct sk_buff *clone;
  		int i, plen = 0;
51456b291   Ian Morris   ipv4: coding styl...
549
550
  		clone = alloc_skb(0, GFP_ATOMIC);
  		if (!clone)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
  			goto out_nomem;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
  		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
d7fcf1a5c   David S. Miller   ipv4: Use frag li...
553
  		skb_frag_list_init(head);
9e903e085   Eric Dumazet   net: add skb frag...
554
555
  		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
  			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556
  		clone->len = clone->data_len = head->data_len - plen;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
557
  		head->truesize += clone->truesize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
  		clone->csum = 0;
  		clone->ip_summed = head->ip_summed;
0e60d245a   Florian Westphal   inet: frag: chang...
560
  		add_frag_mem_limit(qp->q.net, clone->truesize);
6b921536f   Eric Dumazet   net: sk_buff rbno...
561
562
563
564
  		skb_shinfo(head)->frag_list = clone;
  		nextp = &clone->next;
  	} else {
  		nextp = &skb_shinfo(head)->frag_list;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
  	}
d56f90a7c   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
566
  	skb_push(head, head->data - skb_network_header(head));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
567

6b921536f   Eric Dumazet   net: sk_buff rbno...
568
  	/* Traverse the tree in order, to build frag_list. */
b3a0c61b7   Peter Oskolkov   ip: process in-or...
569
  	fp = FRAG_CB(head)->next_frag;
6b921536f   Eric Dumazet   net: sk_buff rbno...
570
571
  	rbn = rb_next(&head->rbnode);
  	rb_erase(&head->rbnode, &qp->q.rb_fragments);
b3a0c61b7   Peter Oskolkov   ip: process in-or...
572
573
574
575
576
577
578
579
580
581
  	while (rbn || fp) {
  		/* fp points to the next sk_buff in the current run;
  		 * rbn points to the next run.
  		 */
  		/* Go through the current run. */
  		while (fp) {
  			*nextp = fp;
  			nextp = &fp->next;
  			fp->prev = NULL;
  			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
08fb833b4   Taehee Yoo   ip: frags: fix cr...
582
  			fp->sk = NULL;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
  			head->data_len += fp->len;
  			head->len += fp->len;
  			if (head->ip_summed != fp->ip_summed)
  				head->ip_summed = CHECKSUM_NONE;
  			else if (head->ip_summed == CHECKSUM_COMPLETE)
  				head->csum = csum_add(head->csum, fp->csum);
  			head->truesize += fp->truesize;
  			fp = FRAG_CB(fp)->next_frag;
  		}
  		/* Move to the next run. */
  		if (rbn) {
  			struct rb_node *rbnext = rb_next(rbn);
  
  			fp = rb_to_skb(rbn);
  			rb_erase(rbn, &qp->q.rb_fragments);
  			rbn = rbnext;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
600
  	}
5510b3c2a   David S. Miller   Merge git://git.k...
601
  	sub_frag_mem_limit(qp->q.net, head->truesize);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602

6b921536f   Eric Dumazet   net: sk_buff rbno...
603
  	*nextp = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
  	head->next = NULL;
6b921536f   Eric Dumazet   net: sk_buff rbno...
605
  	head->prev = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606
  	head->dev = dev;
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
607
  	head->tstamp = qp->q.stamp;
d6b915e29   Florian Westphal   ip_fragment: don'...
608
  	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
609

eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
610
  	iph = ip_hdr(head);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
  	iph->tot_len = htons(len);
5173cc057   Eric Dumazet   ipv4: more compli...
612
  	iph->tos |= ecn;
d6b915e29   Florian Westphal   ip_fragment: don'...
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
  
  	/* When we set IP_DF on a refragmented skb we must also force a
  	 * call to ip_fragment to avoid forwarding a DF-skb of size s while
  	 * original sender only sent fragments of size f (where f < s).
  	 *
  	 * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
  	 * frag seen to avoid sending tiny DF-fragments in case skb was built
  	 * from one very small df-fragment and one large non-df frag.
  	 */
  	if (qp->max_df_size == qp->q.max_size) {
  		IPCB(head)->flags |= IPSKB_FRAG_PMTU;
  		iph->frag_off = htons(IP_DF);
  	} else {
  		iph->frag_off = 0;
  	}
0848f6428   Edward Hyunkoo Jee   inet: frags: fix ...
628
  	ip_send_check(iph);
b45386efa   Eric Dumazet   net: rename IP_IN...
629
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
630
  	qp->q.fragments = NULL;
6b921536f   Eric Dumazet   net: sk_buff rbno...
631
  	qp->q.rb_fragments = RB_ROOT;
d6bebca92   Changli Gao   fragment: add fas...
632
  	qp->q.fragments_tail = NULL;
b3a0c61b7   Peter Oskolkov   ip: process in-or...
633
  	qp->q.last_run_head = NULL;
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
634
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
635
636
  
  out_nomem:
ba7a46f16   Joe Perches   net: Convert LIMI...
637
638
  	net_dbg_ratelimited("queue_glue: no memory for gluing queue %p
  ", qp);
45542479f   David Howells   [NET]: Fix uninit...
639
  	err = -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
640
641
  	goto out_fail;
  out_oversize:
9aee41eff   Eric Dumazet   inet: frags: use ...
642
643
  	net_info_ratelimited("Oversized IP packet from %pI4
  ", &qp->q.key.v4.saddr);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
644
  out_fail:
b45386efa   Eric Dumazet   net: rename IP_IN...
645
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
646
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
647
648
649
  }
  
  /* Process an incoming IP datagram fragment. */
19bcf9f20   Eric W. Biederman   ipv4: Pass struct...
650
  int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
  {
9972f134a   David Ahern   net: frags: Add V...
652
  	struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
385add906   David Ahern   net: Replace vrf_...
653
  	int vif = l3mdev_master_ifindex_rcu(dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
654
  	struct ipq *qp;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
655

b45386efa   Eric Dumazet   net: rename IP_IN...
656
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
8282f2744   Joe Stringer   inet: frag: Alway...
657
  	skb_orphan(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659
  	/* Lookup (or create) queue header */
9972f134a   David Ahern   net: frags: Add V...
660
  	qp = ip_find(net, ip_hdr(skb), user, vif);
00db41243   Ian Morris   ipv4: coding styl...
661
  	if (qp) {
1706d5876   Herbert Xu   [IPV4]: Make ip_d...
662
  		int ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
663

5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
664
  		spin_lock(&qp->q.lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665

1706d5876   Herbert Xu   [IPV4]: Make ip_d...
666
  		ret = ip_frag_queue(qp, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
667

5ab11c98d   Pavel Emelyanov   [INET]: Move comm...
668
  		spin_unlock(&qp->q.lock);
4b6cb5d8e   Pavel Emelyanov   [INET]: Small cle...
669
  		ipq_put(qp);
776c729e8   Herbert Xu   [IPV4]: Change ip...
670
  		return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
671
  	}
b45386efa   Eric Dumazet   net: rename IP_IN...
672
  	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
673
  	kfree_skb(skb);
776c729e8   Herbert Xu   [IPV4]: Change ip...
674
  	return -ENOMEM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
675
  }
4bc2f18ba   Eric Dumazet   net/ipv4: EXPORT_...
676
  EXPORT_SYMBOL(ip_defrag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
677

19bcf9f20   Eric W. Biederman   ipv4: Pass struct...
678
  struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
bc416d976   Eric Dumazet   macvlan: handle f...
679
  {
1bf3751ec   Johannes Berg   ipv4: ip_check_de...
680
  	struct iphdr iph;
3e32e733d   Alexander Drozdov   ipv4: ip_check_de...
681
  	int netoff;
bc416d976   Eric Dumazet   macvlan: handle f...
682
683
684
685
  	u32 len;
  
  	if (skb->protocol != htons(ETH_P_IP))
  		return skb;
3e32e733d   Alexander Drozdov   ipv4: ip_check_de...
686
687
688
  	netoff = skb_network_offset(skb);
  
  	if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
bc416d976   Eric Dumazet   macvlan: handle f...
689
  		return skb;
1bf3751ec   Johannes Berg   ipv4: ip_check_de...
690
  	if (iph.ihl < 5 || iph.version != 4)
bc416d976   Eric Dumazet   macvlan: handle f...
691
  		return skb;
1bf3751ec   Johannes Berg   ipv4: ip_check_de...
692
693
  
  	len = ntohs(iph.tot_len);
3e32e733d   Alexander Drozdov   ipv4: ip_check_de...
694
  	if (skb->len < netoff + len || len < (iph.ihl * 4))
bc416d976   Eric Dumazet   macvlan: handle f...
695
  		return skb;
1bf3751ec   Johannes Berg   ipv4: ip_check_de...
696
  	if (ip_is_fragment(&iph)) {
bc416d976   Eric Dumazet   macvlan: handle f...
697
698
  		skb = skb_share_check(skb, GFP_ATOMIC);
  		if (skb) {
623670a9f   Cong Wang   net: drop skb on ...
699
700
701
702
703
704
705
706
  			if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
  				kfree_skb(skb);
  				return NULL;
  			}
  			if (pskb_trim_rcsum(skb, netoff + len)) {
  				kfree_skb(skb);
  				return NULL;
  			}
bc416d976   Eric Dumazet   macvlan: handle f...
707
  			memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
19bcf9f20   Eric W. Biederman   ipv4: Pass struct...
708
  			if (ip_defrag(net, skb, user))
bc416d976   Eric Dumazet   macvlan: handle f...
709
  				return NULL;
7539fadcb   Tom Herbert   net: Add utility ...
710
  			skb_clear_hash(skb);
bc416d976   Eric Dumazet   macvlan: handle f...
711
712
713
714
715
  		}
  	}
  	return skb;
  }
  EXPORT_SYMBOL(ip_check_defrag);
c91f27fb5   Peter Oskolkov   ip: add helpers t...
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
  unsigned int inet_frag_rbtree_purge(struct rb_root *root)
  {
  	struct rb_node *p = rb_first(root);
  	unsigned int sum = 0;
  
  	while (p) {
  		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
  
  		p = rb_next(p);
  		rb_erase(&skb->rbnode, root);
  		while (skb) {
  			struct sk_buff *next = FRAG_CB(skb)->next_frag;
  
  			sum += skb->truesize;
  			kfree_skb(skb);
  			skb = next;
  		}
  	}
  	return sum;
  }
  EXPORT_SYMBOL(inet_frag_rbtree_purge);
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
737
  #ifdef CONFIG_SYSCTL
5fff99e88   Eric Dumazet   inet: frags: fix ...
738
  static int dist_min;
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
739

0a64b4b81   Pavel Emelyanov   inet: Rename frag...
740
  static struct ctl_table ip4_frags_ns_ctl_table[] = {
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
741
  	{
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
742
  		.procname	= "ipfrag_high_thresh",
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
743
  		.data		= &init_net.ipv4.frags.high_thresh,
990204ddc   Eric Dumazet   inet: frags: brea...
744
  		.maxlen		= sizeof(unsigned long),
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
745
  		.mode		= 0644,
990204ddc   Eric Dumazet   inet: frags: brea...
746
  		.proc_handler	= proc_doulongvec_minmax,
1bab4c750   Nikolay Aleksandrov   inet: frag: set l...
747
  		.extra1		= &init_net.ipv4.frags.low_thresh
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
748
749
  	},
  	{
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
750
  		.procname	= "ipfrag_low_thresh",
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
751
  		.data		= &init_net.ipv4.frags.low_thresh,
990204ddc   Eric Dumazet   inet: frags: brea...
752
  		.maxlen		= sizeof(unsigned long),
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
753
  		.mode		= 0644,
990204ddc   Eric Dumazet   inet: frags: brea...
754
  		.proc_handler	= proc_doulongvec_minmax,
1bab4c750   Nikolay Aleksandrov   inet: frag: set l...
755
  		.extra2		= &init_net.ipv4.frags.high_thresh
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
756
757
  	},
  	{
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
758
  		.procname	= "ipfrag_time",
b2fd5321d   Pavel Emelyanov   [NETNS][FRAGS]: M...
759
  		.data		= &init_net.ipv4.frags.timeout,
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
760
761
  		.maxlen		= sizeof(int),
  		.mode		= 0644,
6d9f239a1   Alexey Dobriyan   net: '&' redux
762
  		.proc_handler	= proc_dointvec_jiffies,
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
763
  	},
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
764
765
766
767
768
769
  	{
  		.procname	= "ipfrag_max_dist",
  		.data		= &init_net.ipv4.frags.max_dist,
  		.maxlen		= sizeof(int),
  		.mode		= 0644,
  		.proc_handler	= proc_dointvec_minmax,
5fff99e88   Eric Dumazet   inet: frags: fix ...
770
  		.extra1		= &dist_min,
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
771
  	},
7d291ebb8   Pavel Emelyanov   inet: Register fr...
772
773
  	{ }
  };
e3a57d18b   Florian Westphal   inet: frag: remov...
774
775
  /* secret interval has been deprecated */
  static int ip4_frags_secret_interval_unused;
7d291ebb8   Pavel Emelyanov   inet: Register fr...
776
  static struct ctl_table ip4_frags_ctl_table[] = {
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
777
  	{
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
778
  		.procname	= "ipfrag_secret_interval",
e3a57d18b   Florian Westphal   inet: frag: remov...
779
  		.data		= &ip4_frags_secret_interval_unused,
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
780
781
  		.maxlen		= sizeof(int),
  		.mode		= 0644,
6d9f239a1   Alexey Dobriyan   net: '&' redux
782
  		.proc_handler	= proc_dointvec_jiffies,
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
783
  	},
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
784
785
  	{ }
  };
2c8c1e729   Alexey Dobriyan   net: spread __net...
786
  static int __net_init ip4_frags_ns_ctl_register(struct net *net)
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
787
  {
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
788
  	struct ctl_table *table;
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
789
  	struct ctl_table_header *hdr;
0a64b4b81   Pavel Emelyanov   inet: Rename frag...
790
  	table = ip4_frags_ns_ctl_table;
09ad9bc75   Octavian Purdila   net: use net_eq t...
791
  	if (!net_eq(net, &init_net)) {
0a64b4b81   Pavel Emelyanov   inet: Rename frag...
792
  		table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
51456b291   Ian Morris   ipv4: coding styl...
793
  		if (!table)
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
794
  			goto err_alloc;
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
795
  		table[0].data = &net->ipv4.frags.high_thresh;
1bab4c750   Nikolay Aleksandrov   inet: frag: set l...
796
797
  		table[0].extra1 = &net->ipv4.frags.low_thresh;
  		table[0].extra2 = &init_net.ipv4.frags.high_thresh;
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
798
  		table[1].data = &net->ipv4.frags.low_thresh;
1bab4c750   Nikolay Aleksandrov   inet: frag: set l...
799
  		table[1].extra2 = &net->ipv4.frags.high_thresh;
b2fd5321d   Pavel Emelyanov   [NETNS][FRAGS]: M...
800
  		table[2].data = &net->ipv4.frags.timeout;
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
801
  		table[3].data = &net->ipv4.frags.max_dist;
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
802
  	}
ec8f23ce0   Eric W. Biederman   net: Convert all ...
803
  	hdr = register_net_sysctl(net, "net/ipv4", table);
51456b291   Ian Morris   ipv4: coding styl...
804
  	if (!hdr)
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
805
806
807
808
809
810
  		goto err_reg;
  
  	net->ipv4.frags_hdr = hdr;
  	return 0;
  
  err_reg:
09ad9bc75   Octavian Purdila   net: use net_eq t...
811
  	if (!net_eq(net, &init_net))
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
812
813
814
815
  		kfree(table);
  err_alloc:
  	return -ENOMEM;
  }
2c8c1e729   Alexey Dobriyan   net: spread __net...
816
  static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
817
818
819
820
821
822
  {
  	struct ctl_table *table;
  
  	table = net->ipv4.frags_hdr->ctl_table_arg;
  	unregister_net_sysctl_table(net->ipv4.frags_hdr);
  	kfree(table);
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
823
  }
7d291ebb8   Pavel Emelyanov   inet: Register fr...
824

57a02c39c   Fabian Frederick   inet: frags: add ...
825
  static void __init ip4_frags_ctl_register(void)
7d291ebb8   Pavel Emelyanov   inet: Register fr...
826
  {
434447579   Eric W. Biederman   net: Kill registe...
827
  	register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
7d291ebb8   Pavel Emelyanov   inet: Register fr...
828
  }
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
829
  #else
aa1f731e5   Fabian Frederick   inet: frags: remo...
830
  static int ip4_frags_ns_ctl_register(struct net *net)
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
831
832
833
  {
  	return 0;
  }
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
834

aa1f731e5   Fabian Frederick   inet: frags: remo...
835
  static void ip4_frags_ns_ctl_unregister(struct net *net)
e4a2d5c2b   Pavel Emelyanov   [NETNS][FRAGS]: D...
836
837
  {
  }
7d291ebb8   Pavel Emelyanov   inet: Register fr...
838

aa1f731e5   Fabian Frederick   inet: frags: remo...
839
  static void __init ip4_frags_ctl_register(void)
7d291ebb8   Pavel Emelyanov   inet: Register fr...
840
841
  {
  }
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
842
  #endif
2c8c1e729   Alexey Dobriyan   net: spread __net...
843
  static int __net_init ipv4_frags_init_net(struct net *net)
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
844
  {
6093d5abc   Eric Dumazet   inet: frags: chan...
845
  	int res;
c2a936600   Jesper Dangaard Brouer   net: increase fra...
846
847
848
849
850
851
852
853
854
855
856
857
858
  	/* Fragment cache limits.
  	 *
  	 * The fragment memory accounting code, (tries to) account for
  	 * the real memory usage, by measuring both the size of frag
  	 * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
  	 * and the SKB's truesize.
  	 *
  	 * A 64K fragment consumes 129736 bytes (44*2944)+200
  	 * (1500 truesize == 2944, sizeof(struct ipq) == 200)
  	 *
  	 * We will commit 4MB at one time. Should we cross that limit
  	 * we will prune down to 3MB, making room for approx 8 big 64K
  	 * fragments 8x128k.
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
859
  	 */
c2a936600   Jesper Dangaard Brouer   net: increase fra...
860
861
  	net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
  	net->ipv4.frags.low_thresh  = 3 * 1024 * 1024;
e31e0bdc7   Pavel Emelyanov   [NETNS][FRAGS]: M...
862
  	/*
b2fd5321d   Pavel Emelyanov   [NETNS][FRAGS]: M...
863
864
865
866
867
  	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
  	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
  	 * by TTL.
  	 */
  	net->ipv4.frags.timeout = IP_FRAG_TIME;
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
868
  	net->ipv4.frags.max_dist = 64;
673220d64   Eric Dumazet   inet: frags: add ...
869
  	net->ipv4.frags.f = &ip4_frags;
0fbf4cb27   Nikolay Borisov   ipv4: namespacify...
870

6093d5abc   Eric Dumazet   inet: frags: chan...
871
872
873
874
875
  	res = inet_frags_init_net(&net->ipv4.frags);
  	if (res < 0)
  		return res;
  	res = ip4_frags_ns_ctl_register(net);
  	if (res < 0)
673220d64   Eric Dumazet   inet: frags: add ...
876
  		inet_frags_exit_net(&net->ipv4.frags);
6093d5abc   Eric Dumazet   inet: frags: chan...
877
  	return res;
8d8354d2f   Pavel Emelyanov   [NETNS][FRAGS]: M...
878
  }
2c8c1e729   Alexey Dobriyan   net: spread __net...
879
  static void __net_exit ipv4_frags_exit_net(struct net *net)
81566e832   Pavel Emelyanov   [NETNS][FRAGS]: M...
880
  {
0a64b4b81   Pavel Emelyanov   inet: Rename frag...
881
  	ip4_frags_ns_ctl_unregister(net);
673220d64   Eric Dumazet   inet: frags: add ...
882
  	inet_frags_exit_net(&net->ipv4.frags);
81566e832   Pavel Emelyanov   [NETNS][FRAGS]: M...
883
884
885
886
887
888
  }
  
  static struct pernet_operations ip4_frags_ops = {
  	.init = ipv4_frags_init_net,
  	.exit = ipv4_frags_exit_net,
  };
9aee41eff   Eric Dumazet   inet: frags: use ...
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
  
  static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
  {
  	return jhash2(data,
  		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
  }
  
  static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
  {
  	const struct inet_frag_queue *fq = data;
  
  	return jhash2((const u32 *)&fq->key.v4,
  		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
  }
  
  static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
  {
  	const struct frag_v4_compare_key *key = arg->key;
  	const struct inet_frag_queue *fq = ptr;
  
  	return !!memcmp(&fq->key, key, sizeof(*key));
  }
  
  static const struct rhashtable_params ip4_rhash_params = {
  	.head_offset		= offsetof(struct inet_frag_queue, node),
  	.key_offset		= offsetof(struct inet_frag_queue, key),
  	.key_len		= sizeof(struct frag_v4_compare_key),
  	.hashfn			= ip4_key_hashfn,
  	.obj_hashfn		= ip4_obj_hashfn,
  	.obj_cmpfn		= ip4_obj_cmpfn,
  	.automatic_shrinking	= true,
  };
b7aa0bf70   Eric Dumazet   [NET]: convert ne...
921
  void __init ipfrag_init(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
922
  {
c6fda2822   Pavel Emelyanov   [INET]: Consolida...
923
  	ip4_frags.constructor = ip4_frag_init;
1e4b82873   Pavel Emelyanov   [INET]: Consolida...
924
  	ip4_frags.destructor = ip4_frag_free;
1e4b82873   Pavel Emelyanov   [INET]: Consolida...
925
  	ip4_frags.qsize = sizeof(struct ipq);
e521db9d7   Pavel Emelyanov   [INET]: Consolida...
926
  	ip4_frags.frag_expire = ip_expire;
d4ad4d22e   Nikolay Aleksandrov   inet: frags: use ...
927
  	ip4_frags.frags_cache_name = ip_frag_cache_name;
9aee41eff   Eric Dumazet   inet: frags: use ...
928
  	ip4_frags.rhash_params = ip4_rhash_params;
d4ad4d22e   Nikolay Aleksandrov   inet: frags: use ...
929
930
931
  	if (inet_frags_init(&ip4_frags))
  		panic("IP: failed to allocate ip4_frags cache
  ");
0cbf74b95   Eric Dumazet   inet: frags: refa...
932
933
  	ip4_frags_ctl_register();
  	register_pernet_subsys(&ip4_frags_ops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
  }