Blame view

net/ipv4/ip_output.c 38.7 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
  /*
   * INET		An implementation of the TCP/IP protocol suite for the LINUX
   *		operating system.  INET is implemented using the  BSD Socket
   *		interface as the means of communication with the user level.
   *
   *		The Internet Protocol (IP) output module.
   *
02c30a84e   Jesper Juhl   [PATCH] update Ro...
8
   * Authors:	Ross Biro
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
15
16
17
18
19
20
21
22
   *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   *		Donald Becker, <becker@super.org>
   *		Alan Cox, <Alan.Cox@linux.org>
   *		Richard Underwood
   *		Stefan Becker, <stefanb@yello.ping.de>
   *		Jorge Cwik, <jorge@laser.satlink.net>
   *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
   *		Hirokazu Takahashi, <taka@valinux.co.jp>
   *
   *	See ip_input.c for original log
   *
   *	Fixes:
   *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
   *		Mike Kilburn	:	htons() missing in ip_build_xmit.
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
23
   *		Bradford Johnson:	Fix faulty handling of some frames when
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
28
29
30
31
32
33
   *					no route is found.
   *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
   *					(in case if packet not accepted by
   *					output firewall rules)
   *		Mike McLagan	:	Routing by source
   *		Alexey Kuznetsov:	use new route cache
   *		Andi Kleen:		Fix broken PMTU recovery and remove
   *					some redundant tests.
   *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
   *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
34
35
36
   *		Andi Kleen	:	Split fast and slow ip_build_xmit path
   *					for decreased register pressure on x86
   *					and more readibility.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
37
38
39
40
41
42
43
   *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
   *					silently drop skb instead of failing with -EPERM.
   *		Detlev Wengorz	:	Copy protocol for fragments.
   *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
   *					datagrams.
   *		Hirokazu Takahashi:	sendfile() on UDP works now.
   */
7c0f6ba68   Linus Torvalds   Replace <asm/uacc...
44
  #include <linux/uaccess.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
45
46
47
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
49
50
  #include <linux/mm.h>
  #include <linux/string.h>
  #include <linux/errno.h>
a1f8e7f7f   Al Viro   [PATCH] severing ...
51
  #include <linux/highmem.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
52
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  
  #include <linux/socket.h>
  #include <linux/sockios.h>
  #include <linux/in.h>
  #include <linux/inet.h>
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
  #include <linux/proc_fs.h>
  #include <linux/stat.h>
  #include <linux/init.h>
  
  #include <net/snmp.h>
  #include <net/ip.h>
  #include <net/protocol.h>
  #include <net/route.h>
cfacb0577   Patrick McHardy   [IPV4]: ip_output...
68
  #include <net/xfrm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
69
70
71
72
  #include <linux/skbuff.h>
  #include <net/sock.h>
  #include <net/arp.h>
  #include <net/icmp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73
74
  #include <net/checksum.h>
  #include <net/inetpeer.h>
14972cbd3   Roopa Prabhu   net: lwtunnel: Ha...
75
  #include <net/lwtunnel.h>
33b486793   Daniel Mack   net: ipv4, ipv6: ...
76
  #include <linux/bpf-cgroup.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
77
78
79
  #include <linux/igmp.h>
  #include <linux/netfilter_ipv4.h>
  #include <linux/netfilter_bridge.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
80
  #include <linux/netlink.h>
6cbb0df78   Arnaldo Carvalho de Melo   [SOCK]: Introduce...
81
  #include <linux/tcp.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82

694869b3c   Eric W. Biederman   ipv4: Pass struct...
83
84
85
86
  static int
  ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
  	    unsigned int mtu,
  	    int (*output)(struct net *, struct sock *, struct sk_buff *));
49d16b23c   Andy Zhou   bridge_netfilter:...
87

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
88
  /* Generate a checksum for an outgoing IP datagram. */
2fbd96797   Denis Efremov   ipv4: ip_output: ...
89
  void ip_send_check(struct iphdr *iph)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
92
93
  {
  	iph->check = 0;
  	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
  }
4bc2f18ba   Eric Dumazet   net/ipv4: EXPORT_...
94
  EXPORT_SYMBOL(ip_send_check);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
95

cf91a99da   Eric W. Biederman   ipv4, ipv6: Pass ...
96
  int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
c439cb2e4   Herbert Xu   [IPV4]: Add ip_lo...
97
98
99
100
101
  {
  	struct iphdr *iph = ip_hdr(skb);
  
  	iph->tot_len = htons(skb->len);
  	ip_send_check(iph);
a8e3e1a9f   David Ahern   net: l3mdev: Add ...
102
103
104
105
106
107
108
  
  	/* if egress device is enslaved to an L3 master device pass the
  	 * skb to its handler for processing
  	 */
  	skb = l3mdev_ip_out(sk, skb);
  	if (unlikely(!skb))
  		return 0;
f41804391   Eli Cooper   ipv4: Set skb->pr...
109
  	skb->protocol = htons(ETH_P_IP);
29a26a568   Eric W. Biederman   netfilter: Pass s...
110
111
  	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
  		       net, sk, skb, NULL, skb_dst(skb)->dev,
13206b6bf   Eric W. Biederman   net: Pass net int...
112
  		       dst_output);
7026b1ddb   David Miller   netfilter: Pass s...
113
  }
33224b16f   Eric W. Biederman   ipv4, ipv6: Pass ...
114
  int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
c439cb2e4   Herbert Xu   [IPV4]: Add ip_lo...
115
116
  {
  	int err;
cf91a99da   Eric W. Biederman   ipv4, ipv6: Pass ...
117
  	err = __ip_local_out(net, sk, skb);
c439cb2e4   Herbert Xu   [IPV4]: Add ip_lo...
118
  	if (likely(err == 1))
13206b6bf   Eric W. Biederman   net: Pass net int...
119
  		err = dst_output(net, sk, skb);
c439cb2e4   Herbert Xu   [IPV4]: Add ip_lo...
120
121
122
  
  	return err;
  }
e2cb77db0   Eric W. Biederman   ipv4: Merge ip_lo...
123
  EXPORT_SYMBOL_GPL(ip_local_out);
c439cb2e4   Herbert Xu   [IPV4]: Add ip_lo...
124

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
126
127
128
129
  static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
  {
  	int ttl = inet->uc_ttl;
  
  	if (ttl < 0)
323e126f0   David S. Miller   ipv4: Don't pre-s...
130
  		ttl = ip4_dst_hoplimit(dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
131
132
  	return ttl;
  }
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
133
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
135
136
   *		Add an ip header to a skbuff and send it out.
   *
   */
cfe673b0a   Eric Dumazet   ip: constify ip_b...
137
  int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
f6d8bd051   Eric Dumazet   inet: add RCU pro...
138
  			  __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
139
140
  {
  	struct inet_sock *inet = inet_sk(sk);
511c3f92a   Eric Dumazet   net: skb->rtable ...
141
  	struct rtable *rt = skb_rtable(skb);
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
142
  	struct net *net = sock_net(sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
143
144
145
  	struct iphdr *iph;
  
  	/* Build the IP header. */
f6d8bd051   Eric Dumazet   inet: add RCU pro...
146
  	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
8856dfa3e   Arnaldo Carvalho de Melo   [SK_BUFF]: Use sk...
147
  	skb_reset_network_header(skb);
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
148
  	iph = ip_hdr(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
150
151
  	iph->version  = 4;
  	iph->ihl      = 5;
  	iph->tos      = inet->tos;
d8d1f30b9   Changli Gao   net-next: remove ...
152
  	iph->ttl      = ip_select_ttl(inet, &rt->dst);
dd927a269   David S. Miller   ipv4: In ip_build...
153
154
  	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
  	iph->saddr    = saddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155
  	iph->protocol = sk->sk_protocol;
cfe673b0a   Eric Dumazet   ip: constify ip_b...
156
157
158
159
160
  	if (ip_dont_fragment(sk, &rt->dst)) {
  		iph->frag_off = htons(IP_DF);
  		iph->id = 0;
  	} else {
  		iph->frag_off = 0;
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
161
  		__ip_select_ident(net, iph, 1);
cfe673b0a   Eric Dumazet   ip: constify ip_b...
162
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163

f6d8bd051   Eric Dumazet   inet: add RCU pro...
164
165
166
  	if (opt && opt->opt.optlen) {
  		iph->ihl += opt->opt.optlen>>2;
  		ip_options_build(skb, &opt->opt, daddr, rt, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
167
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
168
169
  
  	skb->priority = sk->sk_priority;
e05a90ec9   Jamal Hadi Salim   net: reflect mark...
170
171
  	if (!skb->mark)
  		skb->mark = sk->sk_mark;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
172
173
  
  	/* Send it out. */
33224b16f   Eric W. Biederman   ipv4, ipv6: Pass ...
174
  	return ip_local_out(net, skb->sk, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
  }
d8c97a945   Arnaldo Carvalho de Melo   [NET]: Export sym...
176
  EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
694869b3c   Eric W. Biederman   ipv4: Pass struct...
177
  static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
178
  {
adf30907d   Eric Dumazet   net: skb->dst acc...
179
  	struct dst_entry *dst = skb_dst(skb);
80787ebc2   Mitsuru Chinen   [IPV4] SNMP: Supp...
180
  	struct rtable *rt = (struct rtable *)dst;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
181
  	struct net_device *dev = dst->dev;
c2636b4d9   Chuck Lever   [NET]: Treat the ...
182
  	unsigned int hh_len = LL_RESERVED_SPACE(dev);
f6b72b621   David S. Miller   net: Embed hh_cac...
183
  	struct neighbour *neigh;
a263b3093   David S. Miller   ipv4: Make neigh ...
184
  	u32 nexthop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185

edf391ff1   Neil Horman   snmp: add missing...
186
  	if (rt->rt_type == RTN_MULTICAST) {
4ba1bf429   Eric W. Biederman   ipv4: Only comput...
187
  		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
edf391ff1   Neil Horman   snmp: add missing...
188
  	} else if (rt->rt_type == RTN_BROADCAST)
4ba1bf429   Eric W. Biederman   ipv4: Only comput...
189
  		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
80787ebc2   Mitsuru Chinen   [IPV4] SNMP: Supp...
190

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
191
  	/* Be paranoid, rather than too clever. */
3b04ddde0   Stephen Hemminger   [NET]: Move hardw...
192
  	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193
194
195
  		struct sk_buff *skb2;
  
  		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
51456b291   Ian Morris   ipv4: coding styl...
196
  		if (!skb2) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
197
198
199
200
201
  			kfree_skb(skb);
  			return -ENOMEM;
  		}
  		if (skb->sk)
  			skb_set_owner_w(skb2, skb->sk);
5d0ba55b6   Eric Dumazet   net: use consume_...
202
  		consume_skb(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
204
  		skb = skb2;
  	}
14972cbd3   Roopa Prabhu   net: lwtunnel: Ha...
205
206
207
208
209
210
  	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
  		int res = lwtunnel_xmit(skb);
  
  		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
  			return res;
  	}
a263b3093   David S. Miller   ipv4: Make neigh ...
211
  	rcu_read_lock_bh();
155e8336c   Julian Anastasov   ipv4: introduce r...
212
  	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
a263b3093   David S. Miller   ipv4: Make neigh ...
213
214
215
  	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
  	if (unlikely(!neigh))
  		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
9871f1ad6   Vasiliy Kulikov   ip: fix error han...
216
  	if (!IS_ERR(neigh)) {
4ff062035   Julian Anastasov   net: add dst_pend...
217
218
219
  		int res;
  
  		sock_confirm_neigh(skb, neigh);
c16ec1859   Julian Anastasov   net: rename dst_n...
220
  		res = neigh_output(neigh, skb);
f2c31e32b   Eric Dumazet   net: fix NULL der...
221

a263b3093   David S. Miller   ipv4: Make neigh ...
222
  		rcu_read_unlock_bh();
f2c31e32b   Eric Dumazet   net: fix NULL der...
223
224
  		return res;
  	}
a263b3093   David S. Miller   ipv4: Make neigh ...
225
  	rcu_read_unlock_bh();
05e3aa094   David S. Miller   net: Create and u...
226

e87cc4728   Joe Perches   net: Convert net_...
227
228
229
  	net_dbg_ratelimited("%s: No header cache and no neighbour!
  ",
  			    __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
231
232
  	kfree_skb(skb);
  	return -EINVAL;
  }
694869b3c   Eric W. Biederman   ipv4: Pass struct...
233
234
  static int ip_finish_output_gso(struct net *net, struct sock *sk,
  				struct sk_buff *skb, unsigned int mtu)
c7ba65d7b   Florian Westphal   net: ip: push gso...
235
236
237
238
  {
  	netdev_features_t features;
  	struct sk_buff *segs;
  	int ret = 0;
9ee6c5dc8   Lance Richardson   ipv4: allow local...
239
  	/* common case: seglen is <= mtu
359ebda25   Shmulik Ladkani   net/ipv4: Introdu...
240
  	 */
9ee6c5dc8   Lance Richardson   ipv4: allow local...
241
  	if (skb_gso_validate_mtu(skb, mtu))
694869b3c   Eric W. Biederman   ipv4: Pass struct...
242
  		return ip_finish_output2(net, sk, skb);
c7ba65d7b   Florian Westphal   net: ip: push gso...
243

0ace81ec7   Lance Richardson   ipv4: update comm...
244
  	/* Slowpath -  GSO segment length exceeds the egress MTU.
c7ba65d7b   Florian Westphal   net: ip: push gso...
245
  	 *
0ace81ec7   Lance Richardson   ipv4: update comm...
246
247
248
249
250
251
252
253
254
255
  	 * This can happen in several cases:
  	 *  - Forwarding of a TCP GRO skb, when DF flag is not set.
  	 *  - Forwarding of an skb that arrived on a virtualization interface
  	 *    (virtio-net/vhost/tap) with TSO/GSO size set by other network
  	 *    stack.
  	 *  - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
  	 *    interface with a smaller MTU.
  	 *  - Arriving GRO skb (or GSO skb in a virtualized environment) that is
  	 *    bridged to a NETIF_F_TSO tunnel stacked over an interface with an
  	 *    insufficent MTU.
c7ba65d7b   Florian Westphal   net: ip: push gso...
256
257
  	 */
  	features = netif_skb_features(skb);
9207f9d45   Konstantin Khlebnikov   net: preserve IP ...
258
  	BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
c7ba65d7b   Florian Westphal   net: ip: push gso...
259
  	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
330966e50   Florian Westphal   net: make skb_gso...
260
  	if (IS_ERR_OR_NULL(segs)) {
c7ba65d7b   Florian Westphal   net: ip: push gso...
261
262
263
264
265
266
267
268
269
270
271
  		kfree_skb(skb);
  		return -ENOMEM;
  	}
  
  	consume_skb(skb);
  
  	do {
  		struct sk_buff *nskb = segs->next;
  		int err;
  
  		segs->next = NULL;
694869b3c   Eric W. Biederman   ipv4: Pass struct...
272
  		err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
c7ba65d7b   Florian Westphal   net: ip: push gso...
273
274
275
276
277
278
279
280
  
  		if (err && ret == 0)
  			ret = err;
  		segs = nskb;
  	} while (segs);
  
  	return ret;
  }
0c4b51f00   Eric W. Biederman   netfilter: Pass n...
281
  static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
282
  {
c5501eb34   Florian Westphal   net: ipv4: avoid ...
283
  	unsigned int mtu;
33b486793   Daniel Mack   net: ipv4, ipv6: ...
284
285
286
287
288
289
290
  	int ret;
  
  	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
  	if (ret) {
  		kfree_skb(skb);
  		return ret;
  	}
c5501eb34   Florian Westphal   net: ipv4: avoid ...
291

5c901daae   Patrick McHardy   [NETFILTER]: Redo...
292
293
  #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
  	/* Policy lookup after SNAT yielded a new policy */
00db41243   Ian Morris   ipv4: coding styl...
294
  	if (skb_dst(skb)->xfrm) {
48d5cad87   Patrick McHardy   [XFRM]: Fix SNAT-...
295
  		IPCB(skb)->flags |= IPSKB_REROUTED;
13206b6bf   Eric W. Biederman   net: Pass net int...
296
  		return dst_output(net, sk, skb);
48d5cad87   Patrick McHardy   [XFRM]: Fix SNAT-...
297
  	}
5c901daae   Patrick McHardy   [NETFILTER]: Redo...
298
  #endif
fedbb6b4f   Shmulik Ladkani   ipv4: Fix ip_skb_...
299
  	mtu = ip_skb_dst_mtu(sk, skb);
c7ba65d7b   Florian Westphal   net: ip: push gso...
300
  	if (skb_is_gso(skb))
694869b3c   Eric W. Biederman   ipv4: Pass struct...
301
  		return ip_finish_output_gso(net, sk, skb, mtu);
c7ba65d7b   Florian Westphal   net: ip: push gso...
302

d6b915e29   Florian Westphal   ip_fragment: don'...
303
  	if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
694869b3c   Eric W. Biederman   ipv4: Pass struct...
304
  		return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
c7ba65d7b   Florian Westphal   net: ip: push gso...
305

694869b3c   Eric W. Biederman   ipv4: Pass struct...
306
  	return ip_finish_output2(net, sk, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
  }
33b486793   Daniel Mack   net: ipv4, ipv6: ...
308
309
310
311
312
313
314
315
316
317
318
319
320
  static int ip_mc_finish_output(struct net *net, struct sock *sk,
  			       struct sk_buff *skb)
  {
  	int ret;
  
  	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
  	if (ret) {
  		kfree_skb(skb);
  		return ret;
  	}
  
  	return dev_loopback_xmit(net, sk, skb);
  }
ede2059db   Eric W. Biederman   dst: Pass net int...
321
  int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
322
  {
511c3f92a   Eric Dumazet   net: skb->rtable ...
323
  	struct rtable *rt = skb_rtable(skb);
d8d1f30b9   Changli Gao   net-next: remove ...
324
  	struct net_device *dev = rt->dst.dev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
325
326
327
328
  
  	/*
  	 *	If the indicated interface is up and running, send the packet.
  	 */
88f5cc245   Eric W. Biederman   ipv4: Remember th...
329
  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
330
331
332
333
334
335
336
337
338
  
  	skb->dev = dev;
  	skb->protocol = htons(ETH_P_IP);
  
  	/*
  	 *	Multicasts are looped back for other local users
  	 */
  
  	if (rt->rt_flags&RTCF_MULTICAST) {
7ad6848c7   Octavian Purdila   ip: fix mc_loop c...
339
  		if (sk_mc_loop(sk)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
342
343
344
345
346
347
348
  #ifdef CONFIG_IP_MROUTE
  		/* Small optimization: do not loopback not local frames,
  		   which returned after forwarding; they will be  dropped
  		   by ip_mr_input in any case.
  		   Note, that local frames are looped back to be delivered
  		   to local recipients.
  
  		   This check is duplicated in ip_mr_input at the moment.
  		 */
9d4fb27db   Joe Perches   net/ipv4: Move &&...
349
350
351
  		    &&
  		    ((rt->rt_flags & RTCF_LOCAL) ||
  		     !(IPCB(skb)->flags & IPSKB_FORWARDED))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
352
  #endif
9d4fb27db   Joe Perches   net/ipv4: Move &&...
353
  		   ) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
355
  			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  			if (newskb)
9bbc768aa   Jan Engelhardt   netfilter: ipv4: ...
356
  				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
29a26a568   Eric W. Biederman   netfilter: Pass s...
357
  					net, sk, newskb, NULL, newskb->dev,
33b486793   Daniel Mack   net: ipv4, ipv6: ...
358
  					ip_mc_finish_output);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
359
360
361
  		}
  
  		/* Multicasts with ttl 0 must not go beyond the host */
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
362
  		if (ip_hdr(skb)->ttl == 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
365
366
367
368
369
370
  			kfree_skb(skb);
  			return 0;
  		}
  	}
  
  	if (rt->rt_flags&RTCF_BROADCAST) {
  		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  		if (newskb)
29a26a568   Eric W. Biederman   netfilter: Pass s...
371
372
  			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
  				net, sk, newskb, NULL, newskb->dev,
33b486793   Daniel Mack   net: ipv4, ipv6: ...
373
  				ip_mc_finish_output);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  	}
29a26a568   Eric W. Biederman   netfilter: Pass s...
375
376
377
  	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
  			    net, sk, skb, NULL, skb->dev,
  			    ip_finish_output,
48d5cad87   Patrick McHardy   [XFRM]: Fix SNAT-...
378
  			    !(IPCB(skb)->flags & IPSKB_REROUTED));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
379
  }
ede2059db   Eric W. Biederman   dst: Pass net int...
380
  int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
381
  {
adf30907d   Eric Dumazet   net: skb->dst acc...
382
  	struct net_device *dev = skb_dst(skb)->dev;
1bd9bef6f   Patrick McHardy   [NETFILTER]: Call...
383

88f5cc245   Eric W. Biederman   ipv4: Remember th...
384
  	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
385

1bd9bef6f   Patrick McHardy   [NETFILTER]: Call...
386
387
  	skb->dev = dev;
  	skb->protocol = htons(ETH_P_IP);
29a26a568   Eric W. Biederman   netfilter: Pass s...
388
389
  	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
  			    net, sk, skb, NULL, dev,
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
390
  			    ip_finish_output,
48d5cad87   Patrick McHardy   [XFRM]: Fix SNAT-...
391
  			    !(IPCB(skb)->flags & IPSKB_REROUTED));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
392
  }
84f9307c5   Eric Dumazet   ipv4: use a 64bit...
393
394
395
396
397
398
399
400
401
402
403
404
405
  /*
   * copy saddr and daddr, possibly using 64bit load/stores
   * Equivalent to :
   *   iph->saddr = fl4->saddr;
   *   iph->daddr = fl4->daddr;
   */
  static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
  {
  	BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
  		     offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
  	memcpy(&iph->saddr, &fl4->saddr,
  	       sizeof(fl4->saddr) + sizeof(fl4->daddr));
  }
b0270e910   Eric Dumazet   ipv4: add a sock ...
406
407
  /* Note: skb->sk can be different from sk, in case of tunnels */
  int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
409
  	struct inet_sock *inet = inet_sk(sk);
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
410
  	struct net *net = sock_net(sk);
f6d8bd051   Eric Dumazet   inet: add RCU pro...
411
  	struct ip_options_rcu *inet_opt;
b57ae01a8   David S. Miller   ipv4: Use cork fl...
412
  	struct flowi4 *fl4;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
413
414
  	struct rtable *rt;
  	struct iphdr *iph;
ab6e3feba   Eric Dumazet   net: No dst refco...
415
  	int res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
418
419
  
  	/* Skip all of this if the packet is already routed,
  	 * f.e. by something like SCTP.
  	 */
ab6e3feba   Eric Dumazet   net: No dst refco...
420
  	rcu_read_lock();
f6d8bd051   Eric Dumazet   inet: add RCU pro...
421
  	inet_opt = rcu_dereference(inet->inet_opt);
ea4fc0d61   David S. Miller   ipv4: Don't use r...
422
  	fl4 = &fl->u.ip4;
511c3f92a   Eric Dumazet   net: skb->rtable ...
423
  	rt = skb_rtable(skb);
00db41243   Ian Morris   ipv4: coding styl...
424
  	if (rt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
426
427
428
  		goto packet_routed;
  
  	/* Make sure we can route this packet. */
  	rt = (struct rtable *)__sk_dst_check(sk, 0);
51456b291   Ian Morris   ipv4: coding styl...
429
  	if (!rt) {
3ca3c68e7   Al Viro   [IPV4]: struct ip...
430
  		__be32 daddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
431
432
  
  		/* Use correct destination address if we have options. */
c720c7e83   Eric Dumazet   inet: rename some...
433
  		daddr = inet->inet_daddr;
f6d8bd051   Eric Dumazet   inet: add RCU pro...
434
435
  		if (inet_opt && inet_opt->opt.srr)
  			daddr = inet_opt->opt.faddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436

78fbfd8a6   David S. Miller   ipv4: Create and ...
437
438
439
440
  		/* If this fails, retransmit mechanism of transport layer will
  		 * keep trying until route appears or the connection times
  		 * itself out.
  		 */
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
441
  		rt = ip_route_output_ports(net, fl4, sk,
78fbfd8a6   David S. Miller   ipv4: Create and ...
442
443
444
445
446
447
448
449
  					   daddr, inet->inet_saddr,
  					   inet->inet_dport,
  					   inet->inet_sport,
  					   sk->sk_protocol,
  					   RT_CONN_FLAGS(sk),
  					   sk->sk_bound_dev_if);
  		if (IS_ERR(rt))
  			goto no_route;
d8d1f30b9   Changli Gao   net-next: remove ...
450
  		sk_setup_caps(sk, &rt->dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
451
  	}
d8d1f30b9   Changli Gao   net-next: remove ...
452
  	skb_dst_set_noref(skb, &rt->dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
454
  
  packet_routed:
155e8336c   Julian Anastasov   ipv4: introduce r...
455
  	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
456
457
458
  		goto no_route;
  
  	/* OK, we know where to send it, allocate and build IP header. */
f6d8bd051   Eric Dumazet   inet: add RCU pro...
459
  	skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
8856dfa3e   Arnaldo Carvalho de Melo   [SK_BUFF]: Use sk...
460
  	skb_reset_network_header(skb);
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
461
  	iph = ip_hdr(skb);
714e85be3   Al Viro   [IPV6]: Assorted ...
462
  	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
60ff74673   WANG Cong   net: rename local...
463
  	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
465
466
  		iph->frag_off = htons(IP_DF);
  	else
  		iph->frag_off = 0;
d8d1f30b9   Changli Gao   net-next: remove ...
467
  	iph->ttl      = ip_select_ttl(inet, &rt->dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
468
  	iph->protocol = sk->sk_protocol;
84f9307c5   Eric Dumazet   ipv4: use a 64bit...
469
  	ip_copy_addrs(iph, fl4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
  	/* Transport layer set skb->h.foo itself. */
f6d8bd051   Eric Dumazet   inet: add RCU pro...
471
472
473
  	if (inet_opt && inet_opt->opt.optlen) {
  		iph->ihl += inet_opt->opt.optlen >> 2;
  		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
  	}
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
475
  	ip_select_ident_segs(net, skb, sk,
b6a7719ae   Hannes Frederic Sowa   ipv4: hash net pt...
476
  			     skb_shinfo(skb)->gso_segs ?: 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
477

b0270e910   Eric Dumazet   ipv4: add a sock ...
478
  	/* TODO : should we use skb->sk here instead of sk ? */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
  	skb->priority = sk->sk_priority;
4a19ec580   Laszlo Attila Toth   [NET]: Introducin...
480
  	skb->mark = sk->sk_mark;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
481

33224b16f   Eric W. Biederman   ipv4, ipv6: Pass ...
482
  	res = ip_local_out(net, sk, skb);
ab6e3feba   Eric Dumazet   net: No dst refco...
483
484
  	rcu_read_unlock();
  	return res;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
  
  no_route:
ab6e3feba   Eric Dumazet   net: No dst refco...
487
  	rcu_read_unlock();
77589ce0f   Eric W. Biederman   ipv4: Cache net i...
488
  	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
489
490
491
  	kfree_skb(skb);
  	return -EHOSTUNREACH;
  }
4bc2f18ba   Eric Dumazet   net/ipv4: EXPORT_...
492
  EXPORT_SYMBOL(ip_queue_xmit);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
494
495
496
497
498
  static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
  {
  	to->pkt_type = from->pkt_type;
  	to->priority = from->priority;
  	to->protocol = from->protocol;
adf30907d   Eric Dumazet   net: skb->dst acc...
499
  	skb_dst_drop(to);
fe76cda30   Eric Dumazet   ipv4: use skb_dst...
500
  	skb_dst_copy(to, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
501
  	to->dev = from->dev;
82e91ffef   Thomas Graf   [NET]: Turn nfmar...
502
  	to->mark = from->mark;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
503

c2ce657fd   Paolo Abeni   ip: hash fragment...
504
  	skb_copy_hash(to, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
505
506
507
508
509
510
  	/* Copy the flags to each fragment. */
  	IPCB(to)->flags = IPCB(from)->flags;
  
  #ifdef CONFIG_NET_SCHED
  	to->tc_index = from->tc_index;
  #endif
e7ac05f34   Yasuyuki Kozakai   [NETFILTER]: nf_c...
511
  	nf_copy(to, from);
6ca40d4e8   Javier Martinez Canillas   ipv4: use IS_ENAB...
512
  #if IS_ENABLED(CONFIG_IP_VS)
c98d80edc   Julian Anastasov   [SK_BUFF]: ipvs_p...
513
514
  	to->ipvs_property = from->ipvs_property;
  #endif
984bc16cc   James Morris   [SECMARK]: Add se...
515
  	skb_copy_secmark(to, from);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  }
694869b3c   Eric W. Biederman   ipv4: Pass struct...
517
  static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
c5501eb34   Florian Westphal   net: ipv4: avoid ...
518
  		       unsigned int mtu,
694869b3c   Eric W. Biederman   ipv4: Pass struct...
519
  		       int (*output)(struct net *, struct sock *, struct sk_buff *))
49d16b23c   Andy Zhou   bridge_netfilter:...
520
521
  {
  	struct iphdr *iph = ip_hdr(skb);
49d16b23c   Andy Zhou   bridge_netfilter:...
522

d6b915e29   Florian Westphal   ip_fragment: don'...
523
  	if ((iph->frag_off & htons(IP_DF)) == 0)
694869b3c   Eric W. Biederman   ipv4: Pass struct...
524
  		return ip_do_fragment(net, sk, skb, output);
d6b915e29   Florian Westphal   ip_fragment: don'...
525
526
  
  	if (unlikely(!skb->ignore_df ||
49d16b23c   Andy Zhou   bridge_netfilter:...
527
528
  		     (IPCB(skb)->frag_max_size &&
  		      IPCB(skb)->frag_max_size > mtu))) {
9479b0af4   Eric W. Biederman   ipv4: Explicitly ...
529
  		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
49d16b23c   Andy Zhou   bridge_netfilter:...
530
531
532
533
534
  		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
  			  htonl(mtu));
  		kfree_skb(skb);
  		return -EMSGSIZE;
  	}
694869b3c   Eric W. Biederman   ipv4: Pass struct...
535
  	return ip_do_fragment(net, sk, skb, output);
49d16b23c   Andy Zhou   bridge_netfilter:...
536
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
537
538
539
540
541
542
  /*
   *	This IP datagram is too large to be sent in one piece.  Break it up into
   *	smaller pieces (each of size equal to IP header plus
   *	a block of the data of the original IP data part) that will yet fit in a
   *	single device frame, and queue such a frame for sending.
   */
694869b3c   Eric W. Biederman   ipv4: Pass struct...
543
544
  int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
  		   int (*output)(struct net *, struct sock *, struct sk_buff *))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
545
546
  {
  	struct iphdr *iph;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
  	int ptr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
  	struct sk_buff *skb2;
c893b8066   Changli Gao   ip_fragment: fix ...
549
  	unsigned int mtu, hlen, left, len, ll_rs;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
550
  	int offset;
76ab608d8   Alexey Dobriyan   [NET]: Endian-ann...
551
  	__be16 not_last_frag;
511c3f92a   Eric Dumazet   net: skb->rtable ...
552
  	struct rtable *rt = skb_rtable(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553
  	int err = 0;
dbd3393c5   Hannes Frederic Sowa   ipv4: add defensi...
554
555
556
557
  	/* for offloaded checksums cleanup checksum before fragmentation */
  	if (skb->ip_summed == CHECKSUM_PARTIAL &&
  	    (err = skb_checksum_help(skb)))
  		goto fail;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
558
559
560
  	/*
  	 *	Point into the IP datagram header.
  	 */
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
561
  	iph = ip_hdr(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562

fedbb6b4f   Shmulik Ladkani   ipv4: Fix ip_skb_...
563
  	mtu = ip_skb_dst_mtu(sk, skb);
d6b915e29   Florian Westphal   ip_fragment: don'...
564
565
  	if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
  		mtu = IPCB(skb)->frag_max_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
567
568
569
570
571
  
  	/*
  	 *	Setup starting values.
  	 */
  
  	hlen = iph->ihl * 4;
f87c10a8a   Hannes Frederic Sowa   ipv4: introduce i...
572
  	mtu = mtu - hlen;	/* Size of data space */
89cee8b1c   Herbert Xu   [IPV4]: Safer rea...
573
  	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
254d900b8   Vasily Averin   ipv4: ip_do_fragm...
574
  	ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
575
576
577
578
579
580
581
582
  
  	/* When frag_list is given, use it. First, check its validity:
  	 * some transformers could create wrong frag_list or break existing
  	 * one, it is not prohibited. In this case fall back to copying.
  	 *
  	 * LATER: this step can be merged to real generation of fragments,
  	 * we can switch to copy when see the first bad fragment.
  	 */
21dc33015   David S. Miller   net: Rename skb_h...
583
  	if (skb_has_frag_list(skb)) {
3d13008e7   Eric Dumazet   ip: fix truesize ...
584
  		struct sk_buff *frag, *frag2;
c72d8cdaa   Alexey Dobriyan   net: fix bogus ca...
585
  		unsigned int first_len = skb_pagelen(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
587
588
  
  		if (first_len - hlen > mtu ||
  		    ((first_len - hlen) & 7) ||
56f8a75c1   Paul Gortmaker   ip: introduce ip_...
589
  		    ip_is_fragment(iph) ||
254d900b8   Vasily Averin   ipv4: ip_do_fragm...
590
591
  		    skb_cloned(skb) ||
  		    skb_headroom(skb) < ll_rs)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592
  			goto slow_path;
d7fcf1a5c   David S. Miller   ipv4: Use frag li...
593
  		skb_walk_frags(skb, frag) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
596
  			/* Correct geometry. */
  			if (frag->len > mtu ||
  			    ((frag->len & 7) && frag->next) ||
254d900b8   Vasily Averin   ipv4: ip_do_fragm...
597
  			    skb_headroom(frag) < hlen + ll_rs)
3d13008e7   Eric Dumazet   ip: fix truesize ...
598
  				goto slow_path_clean;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599
600
601
  
  			/* Partially cloned skb? */
  			if (skb_shared(frag))
3d13008e7   Eric Dumazet   ip: fix truesize ...
602
  				goto slow_path_clean;
2fdba6b08   Herbert Xu   [IPV4/IPV6] Ensur...
603
604
605
  
  			BUG_ON(frag->sk);
  			if (skb->sk) {
2fdba6b08   Herbert Xu   [IPV4/IPV6] Ensur...
606
607
  				frag->sk = skb->sk;
  				frag->destructor = sock_wfree;
2fdba6b08   Herbert Xu   [IPV4/IPV6] Ensur...
608
  			}
3d13008e7   Eric Dumazet   ip: fix truesize ...
609
  			skb->truesize -= frag->truesize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
611
612
613
614
615
616
  		}
  
  		/* Everything is OK. Generate! */
  
  		err = 0;
  		offset = 0;
  		frag = skb_shinfo(skb)->frag_list;
d7fcf1a5c   David S. Miller   ipv4: Use frag li...
617
  		skb_frag_list_init(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
620
621
622
623
624
625
626
627
628
  		skb->data_len = first_len - skb_headlen(skb);
  		skb->len = first_len;
  		iph->tot_len = htons(first_len);
  		iph->frag_off = htons(IP_MF);
  		ip_send_check(iph);
  
  		for (;;) {
  			/* Prepare header of the next frame,
  			 * before previous one went down. */
  			if (frag) {
  				frag->ip_summed = CHECKSUM_NONE;
badff6d01   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
629
  				skb_reset_transport_header(frag);
e2d1bca7e   Arnaldo Carvalho de Melo   [SK_BUFF]: Use sk...
630
631
  				__skb_push(frag, hlen);
  				skb_reset_network_header(frag);
d56f90a7c   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
632
  				memcpy(skb_network_header(frag), iph, hlen);
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
633
  				iph = ip_hdr(frag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
634
635
636
637
638
639
  				iph->tot_len = htons(frag->len);
  				ip_copy_metadata(frag, skb);
  				if (offset == 0)
  					ip_options_fragment(frag);
  				offset += skb->len - hlen;
  				iph->frag_off = htons(offset>>3);
00db41243   Ian Morris   ipv4: coding styl...
640
  				if (frag->next)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
641
642
643
644
  					iph->frag_off |= htons(IP_MF);
  				/* Ready, complete checksum */
  				ip_send_check(iph);
  			}
694869b3c   Eric W. Biederman   ipv4: Pass struct...
645
  			err = output(net, sk, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
646

dafee4908   Wei Dong   [IPV6]: SNMPv2 "i...
647
  			if (!err)
26a949dbd   Eric W. Biederman   ipv4: Only comput...
648
  				IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
649
650
651
652
653
654
655
656
657
  			if (err || !frag)
  				break;
  
  			skb = frag;
  			frag = skb->next;
  			skb->next = NULL;
  		}
  
  		if (err == 0) {
26a949dbd   Eric W. Biederman   ipv4: Only comput...
658
  			IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
659
660
661
662
663
664
665
666
  			return 0;
  		}
  
  		while (frag) {
  			skb = frag->next;
  			kfree_skb(frag);
  			frag = skb;
  		}
26a949dbd   Eric W. Biederman   ipv4: Only comput...
667
  		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
668
  		return err;
3d13008e7   Eric Dumazet   ip: fix truesize ...
669
670
671
672
673
674
675
676
677
  
  slow_path_clean:
  		skb_walk_frags(skb, frag2) {
  			if (frag2 == frag)
  				break;
  			frag2->sk = NULL;
  			frag2->destructor = NULL;
  			skb->truesize += frag2->truesize;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
678
679
680
  	}
  
  slow_path:
c9af6db4c   Pravin B Shelar   net: Fix possible...
681
  	iph = ip_hdr(skb);
fc70fb640   Alexander Duyck   net: Handle encap...
682

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
683
  	left = skb->len - hlen;		/* Space per frame */
49085bd7d   George Kadianakis   net/ipv4/ip_outpu...
684
  	ptr = hlen;		/* Where to start from */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
686
687
688
689
690
691
692
693
694
695
  	/*
  	 *	Fragment the datagram.
  	 */
  
  	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
  	not_last_frag = iph->frag_off & htons(IP_MF);
  
  	/*
  	 *	Keep copying data until we run out.
  	 */
132adf546   Stephen Hemminger   [IPV4]: cleanup
696
  	while (left > 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
697
698
699
700
  		len = left;
  		/* IF: it doesn't fit, use 'mtu' - the data space left */
  		if (len > mtu)
  			len = mtu;
25985edce   Lucas De Marchi   Fix common misspe...
701
  		/* IF: we are not sending up to and including the packet end
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
704
705
  		   then align the next start on an eight byte boundary */
  		if (len < left)	{
  			len &= ~7;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
706

cbffccc97   Joe Perches   net; ipv[46] - Re...
707
708
709
  		/* Allocate buffer */
  		skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
  		if (!skb2) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
710
711
712
713
714
715
716
717
718
719
720
  			err = -ENOMEM;
  			goto fail;
  		}
  
  		/*
  		 *	Set up data on packet
  		 */
  
  		ip_copy_metadata(skb2, skb);
  		skb_reserve(skb2, ll_rs);
  		skb_put(skb2, len + hlen);
c1d2bbe1c   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
721
  		skb_reset_network_header(skb2);
b0e380b1d   Arnaldo Carvalho de Melo   [SK_BUFF]: unions...
722
  		skb2->transport_header = skb2->network_header + hlen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
723
724
725
726
727
728
729
730
731
732
733
734
  
  		/*
  		 *	Charge the memory for the fragment to any owner
  		 *	it might possess
  		 */
  
  		if (skb->sk)
  			skb_set_owner_w(skb2, skb->sk);
  
  		/*
  		 *	Copy the packet header into the new buffer.
  		 */
d626f62b1   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
735
  		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
736
737
738
739
  
  		/*
  		 *	Copy a block of the IP datagram.
  		 */
bff9b61ce   Arnaldo Carvalho de Melo   [SK_BUFF]: Use th...
740
  		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
741
742
743
744
745
746
  			BUG();
  		left -= len;
  
  		/*
  		 *	Fill in the new header fields.
  		 */
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
747
  		iph = ip_hdr(skb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
  		iph->frag_off = htons((offset >> 3));
d6b915e29   Florian Westphal   ip_fragment: don'...
749
750
  		if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
  			iph->frag_off |= htons(IP_DF);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
  		/* ANK: dirty, but effective trick. Upgrade options only if
  		 * the segment to be fragmented was THE FIRST (otherwise,
  		 * options are already fixed) and make it ONCE
  		 * on the initial skb, so that all the following fragments
  		 * will inherit fixed options.
  		 */
  		if (offset == 0)
  			ip_options_fragment(skb);
  
  		/*
  		 *	Added AC : If we are fragmenting a fragment that's not the
  		 *		   last fragment then keep MF on each bit
  		 */
  		if (left > 0 || not_last_frag)
  			iph->frag_off |= htons(IP_MF);
  		ptr += len;
  		offset += len;
  
  		/*
  		 *	Put this fragment into the sending queue.
  		 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
774
  		iph->tot_len = htons(len + hlen);
  
  		ip_send_check(iph);
694869b3c   Eric W. Biederman   ipv4: Pass struct...
775
  		err = output(net, sk, skb2);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
777
  		if (err)
  			goto fail;
dafee4908   Wei Dong   [IPV6]: SNMPv2 "i...
778

26a949dbd   Eric W. Biederman   ipv4: Only comput...
779
  		IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
780
  	}
5d0ba55b6   Eric Dumazet   net: use consume_...
781
  	consume_skb(skb);
26a949dbd   Eric W. Biederman   ipv4: Only comput...
782
  	IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
784
785
  	return err;
  
  fail:
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
786
  	kfree_skb(skb);
26a949dbd   Eric W. Biederman   ipv4: Only comput...
787
  	IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
788
789
  	return err;
  }
49d16b23c   Andy Zhou   bridge_netfilter:...
790
  EXPORT_SYMBOL(ip_do_fragment);
2e2f7aefa   Patrick McHardy   [NETFILTER]: Fix ...
791

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
792
793
794
  int
  ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
  {
f69e6d131   Al Viro   ip_generic_getfra...
795
  	struct msghdr *msg = from;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
796

84fa7933a   Patrick McHardy   [NET]: Replace CH...
797
  	if (skb->ip_summed == CHECKSUM_PARTIAL) {
0b62fca26   Al Viro   switch getfrag ca...
798
  		if (!copy_from_iter_full(to, len, &msg->msg_iter))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
799
800
  			return -EFAULT;
  	} else {
44bb93633   Al Viro   [NET]: Annotate c...
801
  		__wsum csum = 0;
0b62fca26   Al Viro   switch getfrag ca...
802
  		if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
803
804
805
806
807
  			return -EFAULT;
  		skb->csum = csum_block_add(skb->csum, csum, odd);
  	}
  	return 0;
  }
4bc2f18ba   Eric Dumazet   net/ipv4: EXPORT_...
808
  EXPORT_SYMBOL(ip_generic_getfrag);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809

44bb93633   Al Viro   [NET]: Annotate c...
810
  static inline __wsum
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811
812
813
  csum_page(struct page *page, int offset, int copy)
  {
  	char *kaddr;
44bb93633   Al Viro   [NET]: Annotate c...
814
  	__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
815
816
817
818
819
  	kaddr = kmap(page);
  	csum = csum_partial(kaddr + offset, copy, 0);
  	kunmap(page);
  	return csum;
  }
f5fca6086   David S. Miller   ipv4: Pass flow k...
820
821
822
  static int __ip_append_data(struct sock *sk,
  			    struct flowi4 *fl4,
  			    struct sk_buff_head *queue,
1470ddf7f   Herbert Xu   inet: Remove expl...
823
  			    struct inet_cork *cork,
5640f7685   Eric Dumazet   net: use a per ta...
824
  			    struct page_frag *pfrag,
1470ddf7f   Herbert Xu   inet: Remove expl...
825
826
827
828
  			    int getfrag(void *from, char *to, int offset,
  					int len, int odd, struct sk_buff *skb),
  			    void *from, int length, int transhdrlen,
  			    unsigned int flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
831
  {
  	struct inet_sock *inet = inet_sk(sk);
  	struct sk_buff *skb;
07df5294a   Herbert Xu   inet: Replace lef...
832
  	struct ip_options *opt = cork->opt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
833
834
835
836
837
838
  	int hh_len;
  	int exthdrlen;
  	int mtu;
  	int copy;
  	int err;
  	int offset = 0;
daba287b2   Hannes Frederic Sowa   ipv4: fix DO and ...
839
  	unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
840
  	int csummode = CHECKSUM_NONE;
1470ddf7f   Herbert Xu   inet: Remove expl...
841
  	struct rtable *rt = (struct rtable *)cork->dst;
09c2d251b   Willem de Bruijn   net-timestamp: ad...
842
  	u32 tskey = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843

96d7303e9   Steffen Klassert   ipv4: Fix packet ...
844
845
846
  	skb = skb_peek_tail(queue);
  
  	exthdrlen = !skb ? rt->dst.header_len : 0;
07df5294a   Herbert Xu   inet: Replace lef...
847
  	mtu = cork->fragsize;
09c2d251b   Willem de Bruijn   net-timestamp: ad...
848
849
850
  	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
  	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
  		tskey = sk->sk_tskey++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851

d8d1f30b9   Changli Gao   net-next: remove ...
852
  	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
853
854
855
  
  	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
  	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
60ff74673   WANG Cong   net: rename local...
856
  	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
857

daba287b2   Hannes Frederic Sowa   ipv4: fix DO and ...
858
  	if (cork->length + length > maxnonfragsize - fragheaderlen) {
f5fca6086   David S. Miller   ipv4: Pass flow k...
859
  		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
61e7f09d0   Hannes Frederic Sowa   ipv4: consistent ...
860
  			       mtu - (opt ? opt->optlen : 0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
861
862
863
864
865
866
867
868
869
  		return -EMSGSIZE;
  	}
  
  	/*
  	 * transhdrlen > 0 means that this is the first fragment and we wish
  	 * it won't be fragmented in the future.
  	 */
  	if (transhdrlen &&
  	    length + fragheaderlen <= mtu &&
c8cd0989b   Tom Herbert   net: Eliminate NE...
870
  	    rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) &&
d749c9cbf   Hannes Frederic Sowa   ipv4: no CHECKSUM...
871
  	    !(flags & MSG_MORE) &&
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
872
  	    !exthdrlen)
84fa7933a   Patrick McHardy   [NET]: Replace CH...
873
  		csummode = CHECKSUM_PARTIAL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
874

1470ddf7f   Herbert Xu   inet: Remove expl...
875
  	cork->length += length;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
876
877
878
879
880
881
882
  
  	/* So, what's going on in the loop below?
  	 *
  	 * We use calculated fragment length to generate chained skb,
  	 * each of segments is IP fragment ready for sending to network after
  	 * adding appropriate IP header.
  	 */
26cde9f7e   Herbert Xu   udp: Fix bogus UF...
883
  	if (!skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
  		goto alloc_new_skb;
  
  	while (length > 0) {
  		/* Check if the remaining data fits into current packet. */
  		copy = mtu - skb->len;
  		if (copy < length)
  			copy = maxfraglen - skb->len;
  		if (copy <= 0) {
  			char *data;
  			unsigned int datalen;
  			unsigned int fraglen;
  			unsigned int fraggap;
  			unsigned int alloclen;
  			struct sk_buff *skb_prev;
  alloc_new_skb:
  			skb_prev = skb;
  			if (skb_prev)
  				fraggap = skb_prev->len - maxfraglen;
  			else
  				fraggap = 0;
  
  			/*
  			 * If remaining data exceeds the mtu,
  			 * we know we need more fragment(s).
  			 */
  			datalen = length + fraggap;
  			if (datalen > mtu - fragheaderlen)
  				datalen = maxfraglen - fragheaderlen;
  			fraglen = datalen + fragheaderlen;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
913
  			if ((flags & MSG_MORE) &&
d8d1f30b9   Changli Gao   net-next: remove ...
914
  			    !(rt->dst.dev->features&NETIF_F_SG))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
915
916
  				alloclen = mtu;
  			else
59104f062   Eric Dumazet   ip: take care of ...
917
  				alloclen = fraglen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
918

353e5c9ab   Steffen Klassert   ipv4: Fix IPsec s...
919
  			alloclen += exthdrlen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
920
921
922
923
924
  			/* The last fragment gets additional space at tail.
  			 * Note, with MSG_MORE we overallocate on fragments,
  			 * because we have no idea what fragment will be
  			 * the last.
  			 */
33f99dc7f   Steffen Klassert   ipv4: Fix packet ...
925
  			if (datalen == length + fraggap)
d8d1f30b9   Changli Gao   net-next: remove ...
926
  				alloclen += rt->dst.trailer_len;
33f99dc7f   Steffen Klassert   ipv4: Fix packet ...
927

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928
  			if (transhdrlen) {
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
929
  				skb = sock_alloc_send_skb(sk,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
930
931
932
933
  						alloclen + hh_len + 15,
  						(flags & MSG_DONTWAIT), &err);
  			} else {
  				skb = NULL;
14afee4b6   Reshetova, Elena   net: convert sock...
934
  				if (refcount_read(&sk->sk_wmem_alloc) <=
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
935
  				    2 * sk->sk_sndbuf)
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
936
  					skb = sock_wmalloc(sk,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
937
938
  							   alloclen + hh_len + 15, 1,
  							   sk->sk_allocation);
51456b291   Ian Morris   ipv4: coding styl...
939
  				if (unlikely(!skb))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
940
941
  					err = -ENOBUFS;
  			}
51456b291   Ian Morris   ipv4: coding styl...
942
  			if (!skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
943
944
945
946
947
948
949
950
  				goto error;
  
  			/*
  			 *	Fill in the control structures
  			 */
  			skb->ip_summed = csummode;
  			skb->csum = 0;
  			skb_reserve(skb, hh_len);
11878b40e   Willem de Bruijn   net-timestamp: SO...
951
952
  
  			/* only the initial fragment is time stamped */
1470ddf7f   Herbert Xu   inet: Remove expl...
953
  			skb_shinfo(skb)->tx_flags = cork->tx_flags;
11878b40e   Willem de Bruijn   net-timestamp: SO...
954
  			cork->tx_flags = 0;
09c2d251b   Willem de Bruijn   net-timestamp: ad...
955
956
  			skb_shinfo(skb)->tskey = tskey;
  			tskey = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
957
958
959
960
  
  			/*
  			 *	Find where to start putting bytes.
  			 */
353e5c9ab   Steffen Klassert   ipv4: Fix IPsec s...
961
  			data = skb_put(skb, fraglen + exthdrlen);
c14d2450c   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
962
  			skb_set_network_header(skb, exthdrlen);
b0e380b1d   Arnaldo Carvalho de Melo   [SK_BUFF]: unions...
963
964
  			skb->transport_header = (skb->network_header +
  						 fragheaderlen);
353e5c9ab   Steffen Klassert   ipv4: Fix IPsec s...
965
  			data += fragheaderlen + exthdrlen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
966
967
968
969
970
971
972
973
  
  			if (fraggap) {
  				skb->csum = skb_copy_and_csum_bits(
  					skb_prev, maxfraglen,
  					data + transhdrlen, fraggap, 0);
  				skb_prev->csum = csum_sub(skb_prev->csum,
  							  skb->csum);
  				data += fraggap;
e9fa4f7bd   Herbert Xu   [INET]: Use pskb_...
974
  				pskb_trim_unique(skb_prev, maxfraglen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
976
977
978
979
980
981
982
983
984
985
986
987
988
  			}
  
  			copy = datalen - transhdrlen - fraggap;
  			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
  				err = -EFAULT;
  				kfree_skb(skb);
  				goto error;
  			}
  
  			offset += copy;
  			length -= datalen - fraggap;
  			transhdrlen = 0;
  			exthdrlen = 0;
  			csummode = CHECKSUM_NONE;
0dec879f6   Julian Anastasov   net: use dst_conf...
989
990
  			if ((flags & MSG_CONFIRM) && !skb_prev)
  				skb_set_dst_pending_confirm(skb, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
991
992
993
  			/*
  			 * Put the packet on the pending queue.
  			 */
1470ddf7f   Herbert Xu   inet: Remove expl...
994
  			__skb_queue_tail(queue, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
995
996
997
998
999
  			continue;
  		}
  
  		if (copy > length)
  			copy = length;
c02756173   Willem de Bruijn   net: test tailroo...
1000
1001
  		if (!(rt->dst.dev->features&NETIF_F_SG) &&
  		    skb_tailroom(skb) >= copy) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1002
1003
1004
  			unsigned int off;
  
  			off = skb->len;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
1005
  			if (getfrag(from, skb_put(skb, copy),
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1006
1007
1008
1009
1010
1011
1012
  					offset, copy, off, skb) < 0) {
  				__skb_trim(skb, off);
  				err = -EFAULT;
  				goto error;
  			}
  		} else {
  			int i = skb_shinfo(skb)->nr_frags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1013

5640f7685   Eric Dumazet   net: use a per ta...
1014
1015
  			err = -ENOMEM;
  			if (!sk_page_frag_refill(sk, pfrag))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1016
  				goto error;
5640f7685   Eric Dumazet   net: use a per ta...
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
  
  			if (!skb_can_coalesce(skb, i, pfrag->page,
  					      pfrag->offset)) {
  				err = -EMSGSIZE;
  				if (i == MAX_SKB_FRAGS)
  					goto error;
  
  				__skb_fill_page_desc(skb, i, pfrag->page,
  						     pfrag->offset, 0);
  				skb_shinfo(skb)->nr_frags = ++i;
  				get_page(pfrag->page);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
  			}
5640f7685   Eric Dumazet   net: use a per ta...
1029
1030
1031
1032
1033
1034
1035
1036
  			copy = min_t(int, copy, pfrag->size - pfrag->offset);
  			if (getfrag(from,
  				    page_address(pfrag->page) + pfrag->offset,
  				    offset, copy, skb->len, skb) < 0)
  				goto error_efault;
  
  			pfrag->offset += copy;
  			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1037
1038
  			skb->len += copy;
  			skb->data_len += copy;
f945fa7ad   Herbert Xu   [INET]: Fix trues...
1039
  			skb->truesize += copy;
14afee4b6   Reshetova, Elena   net: convert sock...
1040
  			refcount_add(copy, &sk->sk_wmem_alloc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1041
1042
1043
1044
1045
1046
  		}
  		offset += copy;
  		length -= copy;
  	}
  
  	return 0;
5640f7685   Eric Dumazet   net: use a per ta...
1047
1048
  error_efault:
  	err = -EFAULT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1049
  error:
1470ddf7f   Herbert Xu   inet: Remove expl...
1050
  	cork->length -= length;
5e38e2704   Pavel Emelyanov   mib: add net to I...
1051
  	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
1052
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1053
  }
1470ddf7f   Herbert Xu   inet: Remove expl...
1054
1055
1056
  static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
  			 struct ipcm_cookie *ipc, struct rtable **rtp)
  {
f6d8bd051   Eric Dumazet   inet: add RCU pro...
1057
  	struct ip_options_rcu *opt;
1470ddf7f   Herbert Xu   inet: Remove expl...
1058
1059
1060
1061
1062
1063
1064
  	struct rtable *rt;
  
  	/*
  	 * setup for corking.
  	 */
  	opt = ipc->opt;
  	if (opt) {
51456b291   Ian Morris   ipv4: coding styl...
1065
  		if (!cork->opt) {
1470ddf7f   Herbert Xu   inet: Remove expl...
1066
1067
  			cork->opt = kmalloc(sizeof(struct ip_options) + 40,
  					    sk->sk_allocation);
51456b291   Ian Morris   ipv4: coding styl...
1068
  			if (unlikely(!cork->opt))
1470ddf7f   Herbert Xu   inet: Remove expl...
1069
1070
  				return -ENOBUFS;
  		}
f6d8bd051   Eric Dumazet   inet: add RCU pro...
1071
  		memcpy(cork->opt, &opt->opt, sizeof(struct ip_options) + opt->opt.optlen);
1470ddf7f   Herbert Xu   inet: Remove expl...
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
  		cork->flags |= IPCORK_OPT;
  		cork->addr = ipc->addr;
  	}
  	rt = *rtp;
  	if (unlikely(!rt))
  		return -EFAULT;
  	/*
  	 * We steal reference to this route, caller should not release it
  	 */
  	*rtp = NULL;
482fc6094   Hannes Frederic Sowa   ipv4: introduce n...
1082
1083
  	cork->fragsize = ip_sk_use_pmtu(sk) ?
  			 dst_mtu(&rt->dst) : rt->dst.dev->mtu;
1470ddf7f   Herbert Xu   inet: Remove expl...
1084
1085
  	cork->dst = &rt->dst;
  	cork->length = 0;
aa6615814   Francesco Fusco   ipv4: processing ...
1086
1087
1088
  	cork->ttl = ipc->ttl;
  	cork->tos = ipc->tos;
  	cork->priority = ipc->priority;
1470ddf7f   Herbert Xu   inet: Remove expl...
1089
  	cork->tx_flags = ipc->tx_flags;
1470ddf7f   Herbert Xu   inet: Remove expl...
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
  
  	return 0;
  }
  
  /*
   *	ip_append_data() and ip_append_page() can make one large IP datagram
   *	from many pieces of data. Each pieces will be holded on the socket
   *	until ip_push_pending_frames() is called. Each piece can be a page
   *	or non-page data.
   *
   *	Not only UDP, other transport protocols - e.g. raw sockets - can use
   *	this interface potentially.
   *
   *	LATER: length must be adjusted by pad at tail, when it is required.
   */
f5fca6086   David S. Miller   ipv4: Pass flow k...
1105
  int ip_append_data(struct sock *sk, struct flowi4 *fl4,
1470ddf7f   Herbert Xu   inet: Remove expl...
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
  		   int getfrag(void *from, char *to, int offset, int len,
  			       int odd, struct sk_buff *skb),
  		   void *from, int length, int transhdrlen,
  		   struct ipcm_cookie *ipc, struct rtable **rtp,
  		   unsigned int flags)
  {
  	struct inet_sock *inet = inet_sk(sk);
  	int err;
  
  	if (flags&MSG_PROBE)
  		return 0;
  
  	if (skb_queue_empty(&sk->sk_write_queue)) {
bdc712b4c   David S. Miller   inet: Decrease ov...
1119
  		err = ip_setup_cork(sk, &inet->cork.base, ipc, rtp);
1470ddf7f   Herbert Xu   inet: Remove expl...
1120
1121
1122
1123
1124
  		if (err)
  			return err;
  	} else {
  		transhdrlen = 0;
  	}
5640f7685   Eric Dumazet   net: use a per ta...
1125
1126
  	return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
  				sk_page_frag(sk), getfrag,
1470ddf7f   Herbert Xu   inet: Remove expl...
1127
1128
  				from, length, transhdrlen, flags);
  }
f5fca6086   David S. Miller   ipv4: Pass flow k...
1129
  ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1130
1131
1132
1133
1134
1135
  		       int offset, size_t size, int flags)
  {
  	struct inet_sock *inet = inet_sk(sk);
  	struct sk_buff *skb;
  	struct rtable *rt;
  	struct ip_options *opt = NULL;
bdc712b4c   David S. Miller   inet: Decrease ov...
1136
  	struct inet_cork *cork;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1137
1138
1139
1140
  	int hh_len;
  	int mtu;
  	int len;
  	int err;
daba287b2   Hannes Frederic Sowa   ipv4: fix DO and ...
1141
  	unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
1143
1144
1145
1146
1147
1148
1149
1150
  
  	if (inet->hdrincl)
  		return -EPERM;
  
  	if (flags&MSG_PROBE)
  		return 0;
  
  	if (skb_queue_empty(&sk->sk_write_queue))
  		return -EINVAL;
bdc712b4c   David S. Miller   inet: Decrease ov...
1151
1152
1153
1154
  	cork = &inet->cork.base;
  	rt = (struct rtable *)cork->dst;
  	if (cork->flags & IPCORK_OPT)
  		opt = cork->opt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1155

d8d1f30b9   Changli Gao   net-next: remove ...
1156
  	if (!(rt->dst.dev->features&NETIF_F_SG))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1157
  		return -EOPNOTSUPP;
d8d1f30b9   Changli Gao   net-next: remove ...
1158
  	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
bdc712b4c   David S. Miller   inet: Decrease ov...
1159
  	mtu = cork->fragsize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1160
1161
1162
  
  	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
  	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
60ff74673   WANG Cong   net: rename local...
1163
  	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1164

daba287b2   Hannes Frederic Sowa   ipv4: fix DO and ...
1165
  	if (cork->length + size > maxnonfragsize - fragheaderlen) {
61e7f09d0   Hannes Frederic Sowa   ipv4: consistent ...
1166
1167
  		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
  			       mtu - (opt ? opt->optlen : 0));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1168
1169
  		return -EMSGSIZE;
  	}
51456b291   Ian Morris   ipv4: coding styl...
1170
1171
  	skb = skb_peek_tail(&sk->sk_write_queue);
  	if (!skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1172
  		return -EINVAL;
a8c4a2522   Hannes Frederic Sowa   ipv4: only create...
1173
  	cork->length += size;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
1174

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1175
  	while (size > 0) {
ab2fb7e32   Willem de Bruijn   udp: remove unrea...
1176
1177
1178
1179
  		/* Check if the remaining data fits into current packet. */
  		len = mtu - skb->len;
  		if (len < size)
  			len = maxfraglen - skb->len;
e89e9cf53   Ananda Raju   [IPv4/IPv6]: UFO ...
1180

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1181
1182
  		if (len <= 0) {
  			struct sk_buff *skb_prev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1183
1184
1185
  			int alloclen;
  
  			skb_prev = skb;
0d0d2bba9   Jayachandran C   [IPV4]: Remove de...
1186
  			fraggap = skb_prev->len - maxfraglen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
  
  			alloclen = fragheaderlen + hh_len + fraggap + 15;
  			skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
  			if (unlikely(!skb)) {
  				err = -ENOBUFS;
  				goto error;
  			}
  
  			/*
  			 *	Fill in the control structures
  			 */
  			skb->ip_summed = CHECKSUM_NONE;
  			skb->csum = 0;
  			skb_reserve(skb, hh_len);
  
  			/*
  			 *	Find where to start putting bytes.
  			 */
967b05f64   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1205
  			skb_put(skb, fragheaderlen + fraggap);
2ca9e6f2c   Arnaldo Carvalho de Melo   [SK_BUFF]: Some m...
1206
  			skb_reset_network_header(skb);
b0e380b1d   Arnaldo Carvalho de Melo   [SK_BUFF]: unions...
1207
1208
  			skb->transport_header = (skb->network_header +
  						 fragheaderlen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1209
  			if (fraggap) {
967b05f64   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1210
1211
  				skb->csum = skb_copy_and_csum_bits(skb_prev,
  								   maxfraglen,
9c70220b7   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1212
  						    skb_transport_header(skb),
967b05f64   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1213
  								   fraggap, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1214
1215
  				skb_prev->csum = csum_sub(skb_prev->csum,
  							  skb->csum);
e9fa4f7bd   Herbert Xu   [INET]: Use pskb_...
1216
  				pskb_trim_unique(skb_prev, maxfraglen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1217
1218
1219
1220
1221
1222
1223
1224
  			}
  
  			/*
  			 * Put the packet on the pending queue.
  			 */
  			__skb_queue_tail(&sk->sk_write_queue, skb);
  			continue;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1225
1226
  		if (len > size)
  			len = size;
be12a1fe2   Hannes Frederic Sowa   net: skbuff: add ...
1227
1228
  
  		if (skb_append_pagefrags(skb, page, offset, len)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1229
1230
1231
1232
1233
  			err = -EMSGSIZE;
  			goto error;
  		}
  
  		if (skb->ip_summed == CHECKSUM_NONE) {
44bb93633   Al Viro   [NET]: Annotate c...
1234
  			__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1235
1236
1237
1238
1239
1240
  			csum = csum_page(page, offset, len);
  			skb->csum = csum_block_add(skb->csum, csum, skb->len);
  		}
  
  		skb->len += len;
  		skb->data_len += len;
1e34a11d5   David S. Miller   [IPV4]: Add missi...
1241
  		skb->truesize += len;
14afee4b6   Reshetova, Elena   net: convert sock...
1242
  		refcount_add(len, &sk->sk_wmem_alloc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1243
1244
1245
1246
1247
1248
  		offset += len;
  		size -= len;
  	}
  	return 0;
  
  error:
bdc712b4c   David S. Miller   inet: Decrease ov...
1249
  	cork->length -= size;
5e38e2704   Pavel Emelyanov   mib: add net to I...
1250
  	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1251
1252
  	return err;
  }
1470ddf7f   Herbert Xu   inet: Remove expl...
1253
  static void ip_cork_release(struct inet_cork *cork)
429f08e95   Pavel Emelyanov   [IPV4]: Consolida...
1254
  {
1470ddf7f   Herbert Xu   inet: Remove expl...
1255
1256
1257
1258
1259
  	cork->flags &= ~IPCORK_OPT;
  	kfree(cork->opt);
  	cork->opt = NULL;
  	dst_release(cork->dst);
  	cork->dst = NULL;
429f08e95   Pavel Emelyanov   [IPV4]: Consolida...
1260
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1261
1262
1263
1264
  /*
   *	Combined all pending IP fragments on the socket as one IP datagram
   *	and push them out.
   */
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1265
  struct sk_buff *__ip_make_skb(struct sock *sk,
77968b782   David S. Miller   ipv4: Pass flow k...
1266
  			      struct flowi4 *fl4,
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1267
1268
  			      struct sk_buff_head *queue,
  			      struct inet_cork *cork)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1269
1270
1271
1272
  {
  	struct sk_buff *skb, *tmp_skb;
  	struct sk_buff **tail_skb;
  	struct inet_sock *inet = inet_sk(sk);
0388b0042   Pavel Emelyanov   icmp: add struct ...
1273
  	struct net *net = sock_net(sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274
  	struct ip_options *opt = NULL;
1470ddf7f   Herbert Xu   inet: Remove expl...
1275
  	struct rtable *rt = (struct rtable *)cork->dst;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1276
  	struct iphdr *iph;
76ab608d8   Alexey Dobriyan   [NET]: Endian-ann...
1277
  	__be16 df = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1278
  	__u8 ttl;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1279

51456b291   Ian Morris   ipv4: coding styl...
1280
1281
  	skb = __skb_dequeue(queue);
  	if (!skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1282
1283
1284
1285
  		goto out;
  	tail_skb = &(skb_shinfo(skb)->frag_list);
  
  	/* move skb->data to ip header from ext header */
d56f90a7c   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1286
  	if (skb->data < skb_network_header(skb))
bbe735e42   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1287
  		__skb_pull(skb, skb_network_offset(skb));
1470ddf7f   Herbert Xu   inet: Remove expl...
1288
  	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
cfe1fc775   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1289
  		__skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1290
1291
1292
1293
1294
  		*tail_skb = tmp_skb;
  		tail_skb = &(tmp_skb->next);
  		skb->len += tmp_skb->len;
  		skb->data_len += tmp_skb->len;
  		skb->truesize += tmp_skb->truesize;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1295
1296
1297
1298
1299
1300
1301
1302
  		tmp_skb->destructor = NULL;
  		tmp_skb->sk = NULL;
  	}
  
  	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
  	 * to fragment the frame generated here. No matter, what transforms
  	 * how transforms change size of the packet, it will come out.
  	 */
60ff74673   WANG Cong   net: rename local...
1303
  	skb->ignore_df = ip_sk_ignore_df(sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1304
1305
  
  	/* DF bit is set when we want to see DF on outgoing frames.
60ff74673   WANG Cong   net: rename local...
1306
  	 * If ignore_df is set too, we still allow to fragment this frame
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
  	 * locally. */
482fc6094   Hannes Frederic Sowa   ipv4: introduce n...
1308
1309
  	if (inet->pmtudisc == IP_PMTUDISC_DO ||
  	    inet->pmtudisc == IP_PMTUDISC_PROBE ||
d8d1f30b9   Changli Gao   net-next: remove ...
1310
1311
  	    (skb->len <= dst_mtu(&rt->dst) &&
  	     ip_dont_fragment(sk, &rt->dst)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
  		df = htons(IP_DF);
1470ddf7f   Herbert Xu   inet: Remove expl...
1313
1314
  	if (cork->flags & IPCORK_OPT)
  		opt = cork->opt;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1315

aa6615814   Francesco Fusco   ipv4: processing ...
1316
1317
1318
  	if (cork->ttl != 0)
  		ttl = cork->ttl;
  	else if (rt->rt_type == RTN_MULTICAST)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1319
1320
  		ttl = inet->mc_ttl;
  	else
d8d1f30b9   Changli Gao   net-next: remove ...
1321
  		ttl = ip_select_ttl(inet, &rt->dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1322

749154aa5   Ansis Atteka   ip: use ip_hdr() ...
1323
  	iph = ip_hdr(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1324
1325
  	iph->version = 4;
  	iph->ihl = 5;
aa6615814   Francesco Fusco   ipv4: processing ...
1326
  	iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1327
  	iph->frag_off = df;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1328
1329
  	iph->ttl = ttl;
  	iph->protocol = sk->sk_protocol;
84f9307c5   Eric Dumazet   ipv4: use a 64bit...
1330
  	ip_copy_addrs(iph, fl4);
b6a7719ae   Hannes Frederic Sowa   ipv4: hash net pt...
1331
  	ip_select_ident(net, skb, sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1332

22f728f8f   David S. Miller   ipv4: Always call...
1333
1334
1335
1336
  	if (opt) {
  		iph->ihl += opt->optlen>>2;
  		ip_options_build(skb, opt, cork->addr, rt, 0);
  	}
aa6615814   Francesco Fusco   ipv4: processing ...
1337
  	skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
4a19ec580   Laszlo Attila Toth   [NET]: Introducin...
1338
  	skb->mark = sk->sk_mark;
a21bba945   Eric Dumazet   net: avoid a pair...
1339
1340
1341
1342
  	/*
  	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
  	 * on dst refcount
  	 */
1470ddf7f   Herbert Xu   inet: Remove expl...
1343
  	cork->dst = NULL;
d8d1f30b9   Changli Gao   net-next: remove ...
1344
  	skb_dst_set(skb, &rt->dst);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1345

96793b482   David L Stevens   [IPV4]: Add ICMPM...
1346
  	if (iph->protocol == IPPROTO_ICMP)
0388b0042   Pavel Emelyanov   icmp: add struct ...
1347
  		icmp_out_count(net, ((struct icmphdr *)
96793b482   David L Stevens   [IPV4]: Add ICMPM...
1348
  			skb_transport_header(skb))->type);
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1349
1350
1351
1352
  	ip_cork_release(cork);
  out:
  	return skb;
  }
b5ec8eeac   Eric Dumazet   ipv4: fix ip_send...
1353
  int ip_send_skb(struct net *net, struct sk_buff *skb)
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1354
  {
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1355
  	int err;
33224b16f   Eric W. Biederman   ipv4, ipv6: Pass ...
1356
  	err = ip_local_out(net, skb->sk, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1357
1358
  	if (err) {
  		if (err > 0)
6ce9e7b5f   Eric Dumazet   ip: Report qdisc ...
1359
  			err = net_xmit_errno(err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1360
  		if (err)
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1361
  			IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1363
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1364
  }
77968b782   David S. Miller   ipv4: Pass flow k...
1365
  int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
1470ddf7f   Herbert Xu   inet: Remove expl...
1366
  {
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1367
  	struct sk_buff *skb;
77968b782   David S. Miller   ipv4: Pass flow k...
1368
  	skb = ip_finish_skb(sk, fl4);
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1369
1370
1371
1372
  	if (!skb)
  		return 0;
  
  	/* Netfilter gets whole the not fragmented skb. */
b5ec8eeac   Eric Dumazet   ipv4: fix ip_send...
1373
  	return ip_send_skb(sock_net(sk), skb);
1470ddf7f   Herbert Xu   inet: Remove expl...
1374
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1375
1376
1377
  /*
   *	Throw away all pending data on the socket.
   */
1470ddf7f   Herbert Xu   inet: Remove expl...
1378
1379
1380
  static void __ip_flush_pending_frames(struct sock *sk,
  				      struct sk_buff_head *queue,
  				      struct inet_cork *cork)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1381
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1382
  	struct sk_buff *skb;
1470ddf7f   Herbert Xu   inet: Remove expl...
1383
  	while ((skb = __skb_dequeue_tail(queue)) != NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384
  		kfree_skb(skb);
1470ddf7f   Herbert Xu   inet: Remove expl...
1385
1386
1387
1388
1389
  	ip_cork_release(cork);
  }
  
  void ip_flush_pending_frames(struct sock *sk)
  {
bdc712b4c   David S. Miller   inet: Decrease ov...
1390
  	__ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1391
  }
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1392
  struct sk_buff *ip_make_skb(struct sock *sk,
77968b782   David S. Miller   ipv4: Pass flow k...
1393
  			    struct flowi4 *fl4,
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1394
1395
1396
1397
1398
1399
  			    int getfrag(void *from, char *to, int offset,
  					int len, int odd, struct sk_buff *skb),
  			    void *from, int length, int transhdrlen,
  			    struct ipcm_cookie *ipc, struct rtable **rtp,
  			    unsigned int flags)
  {
b80d72261   David S. Miller   ipv4: Initialize ...
1400
  	struct inet_cork cork;
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1401
1402
1403
1404
1405
1406
1407
  	struct sk_buff_head queue;
  	int err;
  
  	if (flags & MSG_PROBE)
  		return NULL;
  
  	__skb_queue_head_init(&queue);
b80d72261   David S. Miller   ipv4: Initialize ...
1408
1409
  	cork.flags = 0;
  	cork.addr = 0;
706527280   David S. Miller   ipv4: Initialize ...
1410
  	cork.opt = NULL;
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1411
1412
1413
  	err = ip_setup_cork(sk, &cork, ipc, rtp);
  	if (err)
  		return ERR_PTR(err);
5640f7685   Eric Dumazet   net: use a per ta...
1414
1415
  	err = __ip_append_data(sk, fl4, &queue, &cork,
  			       &current->task_frag, getfrag,
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1416
1417
1418
1419
1420
  			       from, length, transhdrlen, flags);
  	if (err) {
  		__ip_flush_pending_frames(sk, &queue, &cork);
  		return ERR_PTR(err);
  	}
77968b782   David S. Miller   ipv4: Pass flow k...
1421
  	return __ip_make_skb(sk, fl4, &queue, &cork);
1c32c5ad6   Herbert Xu   inet: Add ip_make...
1422
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1423
1424
1425
1426
  
  /*
   *	Fetch data from kernel space and fill in checksum if needed.
   */
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
1427
  static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1428
1429
  			      int len, int odd, struct sk_buff *skb)
  {
5084205fa   Al Viro   [NET]: Annotate c...
1430
  	__wsum csum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1431
1432
1433
  
  	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
  	skb->csum = csum_block_add(skb->csum, csum, odd);
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
1434
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
  }
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
1436
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1437
   *	Generic function to send a packet as reply to another packet.
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1438
   *	Used to send some TCP resets/acks so far.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1439
   */
bdbbb8527   Eric Dumazet   ipv4: tcp: get ri...
1440
  void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
24a2d43d8   Eric Dumazet   ipv4: rename ip_o...
1441
1442
1443
  			   const struct ip_options *sopt,
  			   __be32 daddr, __be32 saddr,
  			   const struct ip_reply_arg *arg,
70e734167   David S. Miller   ipv4: Show that i...
1444
  			   unsigned int len)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
  {
f6d8bd051   Eric Dumazet   inet: add RCU pro...
1446
  	struct ip_options_data replyopts;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
  	struct ipcm_cookie ipc;
77968b782   David S. Miller   ipv4: Pass flow k...
1448
  	struct flowi4 fl4;
511c3f92a   Eric Dumazet   net: skb->rtable ...
1449
  	struct rtable *rt = skb_rtable(skb);
bdbbb8527   Eric Dumazet   ipv4: tcp: get ri...
1450
  	struct net *net = sock_net(sk);
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1451
  	struct sk_buff *nskb;
4062090e3   Vasily Averin   ipv4: dst_entry l...
1452
  	int err;
f7ba868b7   David Ahern   net: Use VRF inde...
1453
  	int oif;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1454

91ed1e666   Paolo Abeni   ip/options: expli...
1455
  	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
  		return;
0a5ebb800   David S. Miller   ipv4: Pass explic...
1457
  	ipc.addr = daddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1458
  	ipc.opt = NULL;
2244d07bf   Oliver Hartkopp   net: simplify fla...
1459
  	ipc.tx_flags = 0;
aa6615814   Francesco Fusco   ipv4: processing ...
1460
1461
  	ipc.ttl = 0;
  	ipc.tos = -1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1462

f6d8bd051   Eric Dumazet   inet: add RCU pro...
1463
  	if (replyopts.opt.opt.optlen) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
  		ipc.opt = &replyopts.opt;
f6d8bd051   Eric Dumazet   inet: add RCU pro...
1465
1466
  		if (replyopts.opt.opt.srr)
  			daddr = replyopts.opt.opt.faddr;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467
  	}
f7ba868b7   David Ahern   net: Use VRF inde...
1468
  	oif = arg->bound_dev_if;
9b6c14d51   David Ahern   net: tcp response...
1469
1470
  	if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
  		oif = skb->skb_iif;
f7ba868b7   David Ahern   net: Use VRF inde...
1471
1472
  
  	flowi4_init_output(&fl4, oif,
e110861f8   Lorenzo Colitti   net: add a sysctl...
1473
  			   IP4_REPLY_MARK(net, skb->mark),
66b13d99d   Eric Dumazet   ipv4: tcp: fix TO...
1474
  			   RT_TOS(arg->tos),
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1475
  			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
77968b782   David S. Miller   ipv4: Pass flow k...
1476
  			   ip_reply_arg_flowi_flags(arg),
70e734167   David S. Miller   ipv4: Show that i...
1477
  			   daddr, saddr,
e2d118a1c   Lorenzo Colitti   net: inet: Suppor...
1478
1479
  			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
  			   arg->uid);
77968b782   David S. Miller   ipv4: Pass flow k...
1480
  	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1481
  	rt = ip_route_output_key(net, &fl4);
77968b782   David S. Miller   ipv4: Pass flow k...
1482
1483
  	if (IS_ERR(rt))
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1484

bdbbb8527   Eric Dumazet   ipv4: tcp: get ri...
1485
  	inet_sk(sk)->tos = arg->tos;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1486

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1487
  	sk->sk_priority = skb->priority;
eddc9ec53   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1488
  	sk->sk_protocol = ip_hdr(skb)->protocol;
f0e48dbfc   Patrick McHardy   [TCP]: Honour sk_...
1489
  	sk->sk_bound_dev_if = arg->bound_dev_if;
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1490
  	sk->sk_sndbuf = sysctl_wmem_default;
bf99b4ded   Pau Espin Pedrol   tcp: fix mark pro...
1491
  	sk->sk_mark = fl4.flowi4_mark;
4062090e3   Vasily Averin   ipv4: dst_entry l...
1492
1493
1494
1495
1496
1497
  	err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
  			     len, 0, &ipc, &rt, MSG_DONTWAIT);
  	if (unlikely(err)) {
  		ip_flush_pending_frames(sk);
  		goto out;
  	}
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1498
1499
  	nskb = skb_peek(&sk->sk_write_queue);
  	if (nskb) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1500
  		if (arg->csumoffset >= 0)
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1501
1502
  			*((__sum16 *)skb_transport_header(nskb) +
  			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
9c70220b7   Arnaldo Carvalho de Melo   [SK_BUFF]: Introd...
1503
  								arg->csum));
be9f4a44e   Eric Dumazet   ipv4: tcp: remove...
1504
  		nskb->ip_summed = CHECKSUM_NONE;
77968b782   David S. Miller   ipv4: Pass flow k...
1505
  		ip_push_pending_frames(sk, &fl4);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1506
  	}
4062090e3   Vasily Averin   ipv4: dst_entry l...
1507
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1508
1509
  	ip_rt_put(rt);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
1511
  void __init ip_init(void)
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1512
1513
  	ip_rt_init();
  	inet_initpeers();
72c1d3bdd   WANG Cong   ipv4: register ig...
1514
1515
  #if defined(CONFIG_IP_MULTICAST)
  	igmp_mc_init();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1516
1517
  #endif
  }