Commit f11cb2c2aa1ba28091910eaecaa40906ec31101a

Authored by Julian Anastasov
Committed by Pablo Neira Ayuso
1 parent 183dce554a

ipvs: do not use skb_share_check

We run in contexts like ip_rcv, ipv6_rcv, br_handle_frame,
do not expect shared skbs.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>

Showing 1 changed file with 2 additions and 43 deletions Inline Diff

net/netfilter/ipvs/ip_vs_xmit.c
1 /* 1 /*
2 * ip_vs_xmit.c: various packet transmitters for IPVS 2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 * 3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes:
13 * 13 *
14 * Description of forwarding methods: 14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and 15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD 16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example, 17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used 18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet 19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules: 20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet 22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL 23 * - the only place where we can see skb->sk != NULL
24 */ 24 */
25 25
26 #define KMSG_COMPONENT "IPVS" 26 #define KMSG_COMPONENT "IPVS"
27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28 28
29 #include <linux/kernel.h> 29 #include <linux/kernel.h>
30 #include <linux/slab.h> 30 #include <linux/slab.h>
31 #include <linux/tcp.h> /* for tcphdr */ 31 #include <linux/tcp.h> /* for tcphdr */
32 #include <net/ip.h> 32 #include <net/ip.h>
33 #include <net/tcp.h> /* for csum_tcpudp_magic */ 33 #include <net/tcp.h> /* for csum_tcpudp_magic */
34 #include <net/udp.h> 34 #include <net/udp.h>
35 #include <net/icmp.h> /* for icmp_send */ 35 #include <net/icmp.h> /* for icmp_send */
36 #include <net/route.h> /* for ip_route_output */ 36 #include <net/route.h> /* for ip_route_output */
37 #include <net/ipv6.h> 37 #include <net/ipv6.h>
38 #include <net/ip6_route.h> 38 #include <net/ip6_route.h>
39 #include <net/addrconf.h> 39 #include <net/addrconf.h>
40 #include <linux/icmpv6.h> 40 #include <linux/icmpv6.h>
41 #include <linux/netfilter.h> 41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv4.h> 42 #include <linux/netfilter_ipv4.h>
43 43
44 #include <net/ip_vs.h> 44 #include <net/ip_vs.h>
45 45
46 enum { 46 enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ 47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ 48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to 49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local 50 * local
51 */ 51 */
52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
54 }; 54 };
55 55
56 /* 56 /*
57 * Destination cache to speed up outgoing route lookup 57 * Destination cache to speed up outgoing route lookup
58 */ 58 */
59 static inline void 59 static inline void
60 __ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) 60 __ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie)
61 { 61 {
62 struct dst_entry *old_dst; 62 struct dst_entry *old_dst;
63 63
64 old_dst = dest->dst_cache; 64 old_dst = dest->dst_cache;
65 dest->dst_cache = dst; 65 dest->dst_cache = dst;
66 dest->dst_cookie = dst_cookie; 66 dest->dst_cookie = dst_cookie;
67 dst_release(old_dst); 67 dst_release(old_dst);
68 } 68 }
69 69
70 static inline struct dst_entry * 70 static inline struct dst_entry *
71 __ip_vs_dst_check(struct ip_vs_dest *dest) 71 __ip_vs_dst_check(struct ip_vs_dest *dest)
72 { 72 {
73 struct dst_entry *dst = dest->dst_cache; 73 struct dst_entry *dst = dest->dst_cache;
74 74
75 if (!dst) 75 if (!dst)
76 return NULL; 76 return NULL;
77 if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { 77 if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) {
78 dest->dst_cache = NULL; 78 dest->dst_cache = NULL;
79 dst_release(dst); 79 dst_release(dst);
80 return NULL; 80 return NULL;
81 } 81 }
82 dst_hold(dst); 82 dst_hold(dst);
83 return dst; 83 return dst;
84 } 84 }
85 85
86 static inline bool 86 static inline bool
87 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 87 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
88 { 88 {
89 if (IP6CB(skb)->frag_max_size) { 89 if (IP6CB(skb)->frag_max_size) {
90 /* frag_max_size tell us that, this packet have been 90 /* frag_max_size tell us that, this packet have been
91 * defragmented by netfilter IPv6 conntrack module. 91 * defragmented by netfilter IPv6 conntrack module.
92 */ 92 */
93 if (IP6CB(skb)->frag_max_size > mtu) 93 if (IP6CB(skb)->frag_max_size > mtu)
94 return true; /* largest fragment violate MTU */ 94 return true; /* largest fragment violate MTU */
95 } 95 }
96 else if (skb->len > mtu && !skb_is_gso(skb)) { 96 else if (skb->len > mtu && !skb_is_gso(skb)) {
97 return true; /* Packet size violate MTU size */ 97 return true; /* Packet size violate MTU size */
98 } 98 }
99 return false; 99 return false;
100 } 100 }
101 101
102 /* Get route to daddr, update *saddr, optionally bind route to saddr */ 102 /* Get route to daddr, update *saddr, optionally bind route to saddr */
103 static struct rtable *do_output_route4(struct net *net, __be32 daddr, 103 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
104 int rt_mode, __be32 *saddr) 104 int rt_mode, __be32 *saddr)
105 { 105 {
106 struct flowi4 fl4; 106 struct flowi4 fl4;
107 struct rtable *rt; 107 struct rtable *rt;
108 int loop = 0; 108 int loop = 0;
109 109
110 memset(&fl4, 0, sizeof(fl4)); 110 memset(&fl4, 0, sizeof(fl4));
111 fl4.daddr = daddr; 111 fl4.daddr = daddr;
112 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; 112 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
113 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 113 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
114 FLOWI_FLAG_KNOWN_NH : 0; 114 FLOWI_FLAG_KNOWN_NH : 0;
115 115
116 retry: 116 retry:
117 rt = ip_route_output_key(net, &fl4); 117 rt = ip_route_output_key(net, &fl4);
118 if (IS_ERR(rt)) { 118 if (IS_ERR(rt)) {
119 /* Invalid saddr ? */ 119 /* Invalid saddr ? */
120 if (PTR_ERR(rt) == -EINVAL && *saddr && 120 if (PTR_ERR(rt) == -EINVAL && *saddr &&
121 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 121 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
122 *saddr = 0; 122 *saddr = 0;
123 flowi4_update_output(&fl4, 0, 0, daddr, 0); 123 flowi4_update_output(&fl4, 0, 0, daddr, 0);
124 goto retry; 124 goto retry;
125 } 125 }
126 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 126 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
127 return NULL; 127 return NULL;
128 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 128 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
129 ip_rt_put(rt); 129 ip_rt_put(rt);
130 *saddr = fl4.saddr; 130 *saddr = fl4.saddr;
131 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr); 131 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
132 loop++; 132 loop++;
133 goto retry; 133 goto retry;
134 } 134 }
135 *saddr = fl4.saddr; 135 *saddr = fl4.saddr;
136 return rt; 136 return rt;
137 } 137 }
138 138
139 /* Get route to destination or remote server */ 139 /* Get route to destination or remote server */
140 static struct rtable * 140 static struct rtable *
141 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 141 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
142 __be32 daddr, int rt_mode, __be32 *ret_saddr) 142 __be32 daddr, int rt_mode, __be32 *ret_saddr)
143 { 143 {
144 struct net *net = dev_net(skb_dst(skb)->dev); 144 struct net *net = dev_net(skb_dst(skb)->dev);
145 struct rtable *rt; /* Route to the other host */ 145 struct rtable *rt; /* Route to the other host */
146 struct rtable *ort; /* Original route */ 146 struct rtable *ort; /* Original route */
147 int local; 147 int local;
148 148
149 if (dest) { 149 if (dest) {
150 spin_lock(&dest->dst_lock); 150 spin_lock(&dest->dst_lock);
151 rt = (struct rtable *) __ip_vs_dst_check(dest); 151 rt = (struct rtable *) __ip_vs_dst_check(dest);
152 if (!rt) { 152 if (!rt) {
153 rt = do_output_route4(net, dest->addr.ip, rt_mode, 153 rt = do_output_route4(net, dest->addr.ip, rt_mode,
154 &dest->dst_saddr.ip); 154 &dest->dst_saddr.ip);
155 if (!rt) { 155 if (!rt) {
156 spin_unlock(&dest->dst_lock); 156 spin_unlock(&dest->dst_lock);
157 return NULL; 157 return NULL;
158 } 158 }
159 __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); 159 __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0);
160 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", 160 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
161 &dest->addr.ip, &dest->dst_saddr.ip, 161 &dest->addr.ip, &dest->dst_saddr.ip,
162 atomic_read(&rt->dst.__refcnt)); 162 atomic_read(&rt->dst.__refcnt));
163 } 163 }
164 daddr = dest->addr.ip; 164 daddr = dest->addr.ip;
165 if (ret_saddr) 165 if (ret_saddr)
166 *ret_saddr = dest->dst_saddr.ip; 166 *ret_saddr = dest->dst_saddr.ip;
167 spin_unlock(&dest->dst_lock); 167 spin_unlock(&dest->dst_lock);
168 } else { 168 } else {
169 __be32 saddr = htonl(INADDR_ANY); 169 __be32 saddr = htonl(INADDR_ANY);
170 170
171 /* For such unconfigured boxes avoid many route lookups 171 /* For such unconfigured boxes avoid many route lookups
172 * for performance reasons because we do not remember saddr 172 * for performance reasons because we do not remember saddr
173 */ 173 */
174 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 174 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
175 rt = do_output_route4(net, daddr, rt_mode, &saddr); 175 rt = do_output_route4(net, daddr, rt_mode, &saddr);
176 if (!rt) 176 if (!rt)
177 return NULL; 177 return NULL;
178 if (ret_saddr) 178 if (ret_saddr)
179 *ret_saddr = saddr; 179 *ret_saddr = saddr;
180 } 180 }
181 181
182 local = rt->rt_flags & RTCF_LOCAL; 182 local = rt->rt_flags & RTCF_LOCAL;
183 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 183 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
184 rt_mode)) { 184 rt_mode)) {
185 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 185 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
186 (rt->rt_flags & RTCF_LOCAL) ? 186 (rt->rt_flags & RTCF_LOCAL) ?
187 "local":"non-local", &daddr); 187 "local":"non-local", &daddr);
188 ip_rt_put(rt); 188 ip_rt_put(rt);
189 return NULL; 189 return NULL;
190 } 190 }
191 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 191 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
192 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { 192 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
193 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " 193 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
194 "requires NAT method, dest: %pI4\n", 194 "requires NAT method, dest: %pI4\n",
195 &ip_hdr(skb)->daddr, &daddr); 195 &ip_hdr(skb)->daddr, &daddr);
196 ip_rt_put(rt); 196 ip_rt_put(rt);
197 return NULL; 197 return NULL;
198 } 198 }
199 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { 199 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
200 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " 200 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
201 "to non-local address, dest: %pI4\n", 201 "to non-local address, dest: %pI4\n",
202 &ip_hdr(skb)->saddr, &daddr); 202 &ip_hdr(skb)->saddr, &daddr);
203 ip_rt_put(rt); 203 ip_rt_put(rt);
204 return NULL; 204 return NULL;
205 } 205 }
206 206
207 return rt; 207 return rt;
208 } 208 }
209 209
210 #ifdef CONFIG_IP_VS_IPV6 210 #ifdef CONFIG_IP_VS_IPV6
211 211
212 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) 212 static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
213 { 213 {
214 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; 214 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
215 } 215 }
216 216
217 static struct dst_entry * 217 static struct dst_entry *
218 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 218 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
219 struct in6_addr *ret_saddr, int do_xfrm) 219 struct in6_addr *ret_saddr, int do_xfrm)
220 { 220 {
221 struct dst_entry *dst; 221 struct dst_entry *dst;
222 struct flowi6 fl6 = { 222 struct flowi6 fl6 = {
223 .daddr = *daddr, 223 .daddr = *daddr,
224 }; 224 };
225 225
226 dst = ip6_route_output(net, NULL, &fl6); 226 dst = ip6_route_output(net, NULL, &fl6);
227 if (dst->error) 227 if (dst->error)
228 goto out_err; 228 goto out_err;
229 if (!ret_saddr) 229 if (!ret_saddr)
230 return dst; 230 return dst;
231 if (ipv6_addr_any(&fl6.saddr) && 231 if (ipv6_addr_any(&fl6.saddr) &&
232 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 232 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
233 &fl6.daddr, 0, &fl6.saddr) < 0) 233 &fl6.daddr, 0, &fl6.saddr) < 0)
234 goto out_err; 234 goto out_err;
235 if (do_xfrm) { 235 if (do_xfrm) {
236 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 236 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
237 if (IS_ERR(dst)) { 237 if (IS_ERR(dst)) {
238 dst = NULL; 238 dst = NULL;
239 goto out_err; 239 goto out_err;
240 } 240 }
241 } 241 }
242 *ret_saddr = fl6.saddr; 242 *ret_saddr = fl6.saddr;
243 return dst; 243 return dst;
244 244
245 out_err: 245 out_err:
246 dst_release(dst); 246 dst_release(dst);
247 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); 247 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
248 return NULL; 248 return NULL;
249 } 249 }
250 250
251 /* 251 /*
252 * Get route to destination or remote server 252 * Get route to destination or remote server
253 */ 253 */
254 static struct rt6_info * 254 static struct rt6_info *
255 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, 255 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
256 struct in6_addr *daddr, struct in6_addr *ret_saddr, 256 struct in6_addr *daddr, struct in6_addr *ret_saddr,
257 int do_xfrm, int rt_mode) 257 int do_xfrm, int rt_mode)
258 { 258 {
259 struct net *net = dev_net(skb_dst(skb)->dev); 259 struct net *net = dev_net(skb_dst(skb)->dev);
260 struct rt6_info *rt; /* Route to the other host */ 260 struct rt6_info *rt; /* Route to the other host */
261 struct rt6_info *ort; /* Original route */ 261 struct rt6_info *ort; /* Original route */
262 struct dst_entry *dst; 262 struct dst_entry *dst;
263 int local; 263 int local;
264 264
265 if (dest) { 265 if (dest) {
266 spin_lock(&dest->dst_lock); 266 spin_lock(&dest->dst_lock);
267 rt = (struct rt6_info *)__ip_vs_dst_check(dest); 267 rt = (struct rt6_info *)__ip_vs_dst_check(dest);
268 if (!rt) { 268 if (!rt) {
269 u32 cookie; 269 u32 cookie;
270 270
271 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 271 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
272 &dest->dst_saddr.in6, 272 &dest->dst_saddr.in6,
273 do_xfrm); 273 do_xfrm);
274 if (!dst) { 274 if (!dst) {
275 spin_unlock(&dest->dst_lock); 275 spin_unlock(&dest->dst_lock);
276 return NULL; 276 return NULL;
277 } 277 }
278 rt = (struct rt6_info *) dst; 278 rt = (struct rt6_info *) dst;
279 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 279 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
280 __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); 280 __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie);
281 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 281 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
282 &dest->addr.in6, &dest->dst_saddr.in6, 282 &dest->addr.in6, &dest->dst_saddr.in6,
283 atomic_read(&rt->dst.__refcnt)); 283 atomic_read(&rt->dst.__refcnt));
284 } 284 }
285 if (ret_saddr) 285 if (ret_saddr)
286 *ret_saddr = dest->dst_saddr.in6; 286 *ret_saddr = dest->dst_saddr.in6;
287 spin_unlock(&dest->dst_lock); 287 spin_unlock(&dest->dst_lock);
288 } else { 288 } else {
289 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 289 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
290 if (!dst) 290 if (!dst)
291 return NULL; 291 return NULL;
292 rt = (struct rt6_info *) dst; 292 rt = (struct rt6_info *) dst;
293 } 293 }
294 294
295 local = __ip_vs_is_local_route6(rt); 295 local = __ip_vs_is_local_route6(rt);
296 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 296 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
297 rt_mode)) { 297 rt_mode)) {
298 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", 298 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
299 local ? "local":"non-local", daddr); 299 local ? "local":"non-local", daddr);
300 dst_release(&rt->dst); 300 dst_release(&rt->dst);
301 return NULL; 301 return NULL;
302 } 302 }
303 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 303 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
304 !((ort = (struct rt6_info *) skb_dst(skb)) && 304 !((ort = (struct rt6_info *) skb_dst(skb)) &&
305 __ip_vs_is_local_route6(ort))) { 305 __ip_vs_is_local_route6(ort))) {
306 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " 306 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
307 "requires NAT method, dest: %pI6c\n", 307 "requires NAT method, dest: %pI6c\n",
308 &ipv6_hdr(skb)->daddr, daddr); 308 &ipv6_hdr(skb)->daddr, daddr);
309 dst_release(&rt->dst); 309 dst_release(&rt->dst);
310 return NULL; 310 return NULL;
311 } 311 }
312 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 312 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
313 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 313 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
314 IPV6_ADDR_LOOPBACK)) { 314 IPV6_ADDR_LOOPBACK)) {
315 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " 315 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
316 "to non-local address, dest: %pI6c\n", 316 "to non-local address, dest: %pI6c\n",
317 &ipv6_hdr(skb)->saddr, daddr); 317 &ipv6_hdr(skb)->saddr, daddr);
318 dst_release(&rt->dst); 318 dst_release(&rt->dst);
319 return NULL; 319 return NULL;
320 } 320 }
321 321
322 return rt; 322 return rt;
323 } 323 }
324 #endif 324 #endif
325 325
326 326
327 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ 327 /* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
328 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, 328 static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
329 struct ip_vs_conn *cp) 329 struct ip_vs_conn *cp)
330 { 330 {
331 int ret = NF_ACCEPT; 331 int ret = NF_ACCEPT;
332 332
333 skb->ipvs_property = 1; 333 skb->ipvs_property = 1;
334 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) 334 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
335 ret = ip_vs_confirm_conntrack(skb); 335 ret = ip_vs_confirm_conntrack(skb);
336 if (ret == NF_ACCEPT) { 336 if (ret == NF_ACCEPT) {
337 nf_reset(skb); 337 nf_reset(skb);
338 skb_forward_csum(skb); 338 skb_forward_csum(skb);
339 } 339 }
340 return ret; 340 return ret;
341 } 341 }
342 342
343 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 343 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
344 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, 344 static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
345 struct ip_vs_conn *cp, int local) 345 struct ip_vs_conn *cp, int local)
346 { 346 {
347 int ret = NF_STOLEN; 347 int ret = NF_STOLEN;
348 348
349 skb->ipvs_property = 1; 349 skb->ipvs_property = 1;
350 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 350 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
351 ip_vs_notrack(skb); 351 ip_vs_notrack(skb);
352 else 352 else
353 ip_vs_update_conntrack(skb, cp, 1); 353 ip_vs_update_conntrack(skb, cp, 1);
354 if (!local) { 354 if (!local) {
355 skb_forward_csum(skb); 355 skb_forward_csum(skb);
356 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 356 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
357 dst_output); 357 dst_output);
358 } else 358 } else
359 ret = NF_ACCEPT; 359 ret = NF_ACCEPT;
360 return ret; 360 return ret;
361 } 361 }
362 362
363 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ 363 /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
364 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, 364 static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
365 struct ip_vs_conn *cp, int local) 365 struct ip_vs_conn *cp, int local)
366 { 366 {
367 int ret = NF_STOLEN; 367 int ret = NF_STOLEN;
368 368
369 skb->ipvs_property = 1; 369 skb->ipvs_property = 1;
370 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) 370 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
371 ip_vs_notrack(skb); 371 ip_vs_notrack(skb);
372 if (!local) { 372 if (!local) {
373 skb_forward_csum(skb); 373 skb_forward_csum(skb);
374 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 374 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
375 dst_output); 375 dst_output);
376 } else 376 } else
377 ret = NF_ACCEPT; 377 ret = NF_ACCEPT;
378 return ret; 378 return ret;
379 } 379 }
380 380
381 381
382 /* 382 /*
383 * NULL transmitter (do nothing except return NF_ACCEPT) 383 * NULL transmitter (do nothing except return NF_ACCEPT)
384 */ 384 */
385 int 385 int
386 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 386 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
387 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 387 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
388 { 388 {
389 /* we do not touch skb and do not need pskb ptr */ 389 /* we do not touch skb and do not need pskb ptr */
390 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 390 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
391 } 391 }
392 392
393 393
394 /* 394 /*
395 * Bypass transmitter 395 * Bypass transmitter
396 * Let packets bypass the destination when the destination is not 396 * Let packets bypass the destination when the destination is not
397 * available, it may be only used in transparent cache cluster. 397 * available, it may be only used in transparent cache cluster.
398 */ 398 */
399 int 399 int
400 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 400 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
401 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 401 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
402 { 402 {
403 struct rtable *rt; /* Route to the other host */ 403 struct rtable *rt; /* Route to the other host */
404 struct iphdr *iph = ip_hdr(skb); 404 struct iphdr *iph = ip_hdr(skb);
405 int mtu; 405 int mtu;
406 406
407 EnterFunction(10); 407 EnterFunction(10);
408 408
409 rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, 409 rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
410 NULL); 410 NULL);
411 if (!rt) 411 if (!rt)
412 goto tx_error_icmp; 412 goto tx_error_icmp;
413 413
414 /* MTU checking */ 414 /* MTU checking */
415 mtu = dst_mtu(&rt->dst); 415 mtu = dst_mtu(&rt->dst);
416 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 416 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
417 !skb_is_gso(skb)) { 417 !skb_is_gso(skb)) {
418 ip_rt_put(rt); 418 ip_rt_put(rt);
419 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 419 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
420 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 420 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
421 goto tx_error; 421 goto tx_error;
422 } 422 }
423 423
424 /*
425 * Call ip_send_check because we are not sure it is called
426 * after ip_defrag. Is copy-on-write needed?
427 */
428 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
429 ip_rt_put(rt);
430 return NF_STOLEN;
431 }
432 ip_send_check(ip_hdr(skb)); 424 ip_send_check(ip_hdr(skb));
433 425
434 /* drop old route */ 426 /* drop old route */
435 skb_dst_drop(skb); 427 skb_dst_drop(skb);
436 skb_dst_set(skb, &rt->dst); 428 skb_dst_set(skb, &rt->dst);
437 429
438 /* Another hack: avoid icmp_send in ip_fragment */ 430 /* Another hack: avoid icmp_send in ip_fragment */
439 skb->local_df = 1; 431 skb->local_df = 1;
440 432
441 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 433 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
442 434
443 LeaveFunction(10); 435 LeaveFunction(10);
444 return NF_STOLEN; 436 return NF_STOLEN;
445 437
446 tx_error_icmp: 438 tx_error_icmp:
447 dst_link_failure(skb); 439 dst_link_failure(skb);
448 tx_error: 440 tx_error:
449 kfree_skb(skb); 441 kfree_skb(skb);
450 LeaveFunction(10); 442 LeaveFunction(10);
451 return NF_STOLEN; 443 return NF_STOLEN;
452 } 444 }
453 445
454 #ifdef CONFIG_IP_VS_IPV6 446 #ifdef CONFIG_IP_VS_IPV6
455 int 447 int
456 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 448 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
457 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 449 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
458 { 450 {
459 struct rt6_info *rt; /* Route to the other host */ 451 struct rt6_info *rt; /* Route to the other host */
460 int mtu; 452 int mtu;
461 453
462 EnterFunction(10); 454 EnterFunction(10);
463 455
464 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, 456 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
465 IP_VS_RT_MODE_NON_LOCAL); 457 IP_VS_RT_MODE_NON_LOCAL);
466 if (!rt) 458 if (!rt)
467 goto tx_error_icmp; 459 goto tx_error_icmp;
468 460
469 /* MTU checking */ 461 /* MTU checking */
470 mtu = dst_mtu(&rt->dst); 462 mtu = dst_mtu(&rt->dst);
471 if (__mtu_check_toobig_v6(skb, mtu)) { 463 if (__mtu_check_toobig_v6(skb, mtu)) {
472 if (!skb->dev) { 464 if (!skb->dev) {
473 struct net *net = dev_net(skb_dst(skb)->dev); 465 struct net *net = dev_net(skb_dst(skb)->dev);
474 466
475 skb->dev = net->loopback_dev; 467 skb->dev = net->loopback_dev;
476 } 468 }
477 /* only send ICMP too big on first fragment */ 469 /* only send ICMP too big on first fragment */
478 if (!iph->fragoffs) 470 if (!iph->fragoffs)
479 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 471 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
480 dst_release(&rt->dst); 472 dst_release(&rt->dst);
481 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 473 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
482 goto tx_error; 474 goto tx_error;
483 } 475 }
484 476
485 /*
486 * Call ip_send_check because we are not sure it is called
487 * after ip_defrag. Is copy-on-write needed?
488 */
489 skb = skb_share_check(skb, GFP_ATOMIC);
490 if (unlikely(skb == NULL)) {
491 dst_release(&rt->dst);
492 return NF_STOLEN;
493 }
494
495 /* drop old route */ 477 /* drop old route */
496 skb_dst_drop(skb); 478 skb_dst_drop(skb);
497 skb_dst_set(skb, &rt->dst); 479 skb_dst_set(skb, &rt->dst);
498 480
499 /* Another hack: avoid icmp_send in ip_fragment */ 481 /* Another hack: avoid icmp_send in ip_fragment */
500 skb->local_df = 1; 482 skb->local_df = 1;
501 483
502 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 484 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
503 485
504 LeaveFunction(10); 486 LeaveFunction(10);
505 return NF_STOLEN; 487 return NF_STOLEN;
506 488
507 tx_error_icmp: 489 tx_error_icmp:
508 dst_link_failure(skb); 490 dst_link_failure(skb);
509 tx_error: 491 tx_error:
510 kfree_skb(skb); 492 kfree_skb(skb);
511 LeaveFunction(10); 493 LeaveFunction(10);
512 return NF_STOLEN; 494 return NF_STOLEN;
513 } 495 }
514 #endif 496 #endif
515 497
516 /* 498 /*
517 * NAT transmitter (only for outside-to-inside nat forwarding) 499 * NAT transmitter (only for outside-to-inside nat forwarding)
518 * Not used for related ICMP 500 * Not used for related ICMP
519 */ 501 */
520 int 502 int
521 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 503 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
522 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 504 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
523 { 505 {
524 struct rtable *rt; /* Route to the other host */ 506 struct rtable *rt; /* Route to the other host */
525 int mtu; 507 int mtu;
526 struct iphdr *iph = ip_hdr(skb); 508 struct iphdr *iph = ip_hdr(skb);
527 int local, rc; 509 int local, rc;
528 510
529 EnterFunction(10); 511 EnterFunction(10);
530 512
531 /* check if it is a connection of no-client-port */ 513 /* check if it is a connection of no-client-port */
532 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 514 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
533 __be16 _pt, *p; 515 __be16 _pt, *p;
534 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); 516 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
535 if (p == NULL) 517 if (p == NULL)
536 goto tx_error; 518 goto tx_error;
537 ip_vs_conn_fill_cport(cp, *p); 519 ip_vs_conn_fill_cport(cp, *p);
538 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 520 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
539 } 521 }
540 522
541 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 523 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
542 IP_VS_RT_MODE_LOCAL | 524 IP_VS_RT_MODE_LOCAL |
543 IP_VS_RT_MODE_NON_LOCAL | 525 IP_VS_RT_MODE_NON_LOCAL |
544 IP_VS_RT_MODE_RDR, NULL))) 526 IP_VS_RT_MODE_RDR, NULL)))
545 goto tx_error_icmp; 527 goto tx_error_icmp;
546 local = rt->rt_flags & RTCF_LOCAL; 528 local = rt->rt_flags & RTCF_LOCAL;
547 /* 529 /*
548 * Avoid duplicate tuple in reply direction for NAT traffic 530 * Avoid duplicate tuple in reply direction for NAT traffic
549 * to local address when connection is sync-ed 531 * to local address when connection is sync-ed
550 */ 532 */
551 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 533 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
552 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 534 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
553 enum ip_conntrack_info ctinfo; 535 enum ip_conntrack_info ctinfo;
554 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 536 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
555 537
556 if (ct && !nf_ct_is_untracked(ct)) { 538 if (ct && !nf_ct_is_untracked(ct)) {
557 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 539 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
558 "ip_vs_nat_xmit(): " 540 "ip_vs_nat_xmit(): "
559 "stopping DNAT to local address"); 541 "stopping DNAT to local address");
560 goto tx_error_put; 542 goto tx_error_put;
561 } 543 }
562 } 544 }
563 #endif 545 #endif
564 546
565 /* From world but DNAT to loopback address? */ 547 /* From world but DNAT to loopback address? */
566 if (local && ipv4_is_loopback(cp->daddr.ip) && 548 if (local && ipv4_is_loopback(cp->daddr.ip) &&
567 rt_is_input_route(skb_rtable(skb))) { 549 rt_is_input_route(skb_rtable(skb))) {
568 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 550 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
569 "stopping DNAT to loopback address"); 551 "stopping DNAT to loopback address");
570 goto tx_error_put; 552 goto tx_error_put;
571 } 553 }
572 554
573 /* MTU checking */ 555 /* MTU checking */
574 mtu = dst_mtu(&rt->dst); 556 mtu = dst_mtu(&rt->dst);
575 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 557 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
576 !skb_is_gso(skb)) { 558 !skb_is_gso(skb)) {
577 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 559 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
578 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 560 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
579 "ip_vs_nat_xmit(): frag needed for"); 561 "ip_vs_nat_xmit(): frag needed for");
580 goto tx_error_put; 562 goto tx_error_put;
581 } 563 }
582 564
583 /* copy-on-write the packet before mangling it */ 565 /* copy-on-write the packet before mangling it */
584 if (!skb_make_writable(skb, sizeof(struct iphdr))) 566 if (!skb_make_writable(skb, sizeof(struct iphdr)))
585 goto tx_error_put; 567 goto tx_error_put;
586 568
587 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 569 if (skb_cow(skb, rt->dst.dev->hard_header_len))
588 goto tx_error_put; 570 goto tx_error_put;
589 571
590 /* mangle the packet */ 572 /* mangle the packet */
591 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 573 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
592 goto tx_error_put; 574 goto tx_error_put;
593 ip_hdr(skb)->daddr = cp->daddr.ip; 575 ip_hdr(skb)->daddr = cp->daddr.ip;
594 ip_send_check(ip_hdr(skb)); 576 ip_send_check(ip_hdr(skb));
595 577
596 if (!local) { 578 if (!local) {
597 /* drop old route */ 579 /* drop old route */
598 skb_dst_drop(skb); 580 skb_dst_drop(skb);
599 skb_dst_set(skb, &rt->dst); 581 skb_dst_set(skb, &rt->dst);
600 } else 582 } else
601 ip_rt_put(rt); 583 ip_rt_put(rt);
602 584
603 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 585 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
604 586
605 /* FIXME: when application helper enlarges the packet and the length 587 /* FIXME: when application helper enlarges the packet and the length
606 is larger than the MTU of outgoing device, there will be still 588 is larger than the MTU of outgoing device, there will be still
607 MTU problem. */ 589 MTU problem. */
608 590
609 /* Another hack: avoid icmp_send in ip_fragment */ 591 /* Another hack: avoid icmp_send in ip_fragment */
610 skb->local_df = 1; 592 skb->local_df = 1;
611 593
612 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 594 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
613 595
614 LeaveFunction(10); 596 LeaveFunction(10);
615 return rc; 597 return rc;
616 598
617 tx_error_icmp: 599 tx_error_icmp:
618 dst_link_failure(skb); 600 dst_link_failure(skb);
619 tx_error: 601 tx_error:
620 kfree_skb(skb); 602 kfree_skb(skb);
621 LeaveFunction(10); 603 LeaveFunction(10);
622 return NF_STOLEN; 604 return NF_STOLEN;
623 tx_error_put: 605 tx_error_put:
624 ip_rt_put(rt); 606 ip_rt_put(rt);
625 goto tx_error; 607 goto tx_error;
626 } 608 }
627 609
628 #ifdef CONFIG_IP_VS_IPV6 610 #ifdef CONFIG_IP_VS_IPV6
629 int 611 int
630 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 612 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
631 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 613 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
632 { 614 {
633 struct rt6_info *rt; /* Route to the other host */ 615 struct rt6_info *rt; /* Route to the other host */
634 int mtu; 616 int mtu;
635 int local, rc; 617 int local, rc;
636 618
637 EnterFunction(10); 619 EnterFunction(10);
638 620
639 /* check if it is a connection of no-client-port */ 621 /* check if it is a connection of no-client-port */
640 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { 622 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
641 __be16 _pt, *p; 623 __be16 _pt, *p;
642 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); 624 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
643 if (p == NULL) 625 if (p == NULL)
644 goto tx_error; 626 goto tx_error;
645 ip_vs_conn_fill_cport(cp, *p); 627 ip_vs_conn_fill_cport(cp, *p);
646 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 628 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
647 } 629 }
648 630
649 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 631 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
650 0, (IP_VS_RT_MODE_LOCAL | 632 0, (IP_VS_RT_MODE_LOCAL |
651 IP_VS_RT_MODE_NON_LOCAL | 633 IP_VS_RT_MODE_NON_LOCAL |
652 IP_VS_RT_MODE_RDR)))) 634 IP_VS_RT_MODE_RDR))))
653 goto tx_error_icmp; 635 goto tx_error_icmp;
654 local = __ip_vs_is_local_route6(rt); 636 local = __ip_vs_is_local_route6(rt);
655 /* 637 /*
656 * Avoid duplicate tuple in reply direction for NAT traffic 638 * Avoid duplicate tuple in reply direction for NAT traffic
657 * to local address when connection is sync-ed 639 * to local address when connection is sync-ed
658 */ 640 */
659 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 641 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
660 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 642 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
661 enum ip_conntrack_info ctinfo; 643 enum ip_conntrack_info ctinfo;
662 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 644 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
663 645
664 if (ct && !nf_ct_is_untracked(ct)) { 646 if (ct && !nf_ct_is_untracked(ct)) {
665 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 647 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
666 "ip_vs_nat_xmit_v6(): " 648 "ip_vs_nat_xmit_v6(): "
667 "stopping DNAT to local address"); 649 "stopping DNAT to local address");
668 goto tx_error_put; 650 goto tx_error_put;
669 } 651 }
670 } 652 }
671 #endif 653 #endif
672 654
673 /* From world but DNAT to loopback address? */ 655 /* From world but DNAT to loopback address? */
674 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 656 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
675 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 657 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
676 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 658 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
677 "ip_vs_nat_xmit_v6(): " 659 "ip_vs_nat_xmit_v6(): "
678 "stopping DNAT to loopback address"); 660 "stopping DNAT to loopback address");
679 goto tx_error_put; 661 goto tx_error_put;
680 } 662 }
681 663
682 /* MTU checking */ 664 /* MTU checking */
683 mtu = dst_mtu(&rt->dst); 665 mtu = dst_mtu(&rt->dst);
684 if (__mtu_check_toobig_v6(skb, mtu)) { 666 if (__mtu_check_toobig_v6(skb, mtu)) {
685 if (!skb->dev) { 667 if (!skb->dev) {
686 struct net *net = dev_net(skb_dst(skb)->dev); 668 struct net *net = dev_net(skb_dst(skb)->dev);
687 669
688 skb->dev = net->loopback_dev; 670 skb->dev = net->loopback_dev;
689 } 671 }
690 /* only send ICMP too big on first fragment */ 672 /* only send ICMP too big on first fragment */
691 if (!iph->fragoffs) 673 if (!iph->fragoffs)
692 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 674 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
693 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 675 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
694 "ip_vs_nat_xmit_v6(): frag needed for"); 676 "ip_vs_nat_xmit_v6(): frag needed for");
695 goto tx_error_put; 677 goto tx_error_put;
696 } 678 }
697 679
698 /* copy-on-write the packet before mangling it */ 680 /* copy-on-write the packet before mangling it */
699 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 681 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
700 goto tx_error_put; 682 goto tx_error_put;
701 683
702 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 684 if (skb_cow(skb, rt->dst.dev->hard_header_len))
703 goto tx_error_put; 685 goto tx_error_put;
704 686
705 /* mangle the packet */ 687 /* mangle the packet */
706 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) 688 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
707 goto tx_error; 689 goto tx_error;
708 ipv6_hdr(skb)->daddr = cp->daddr.in6; 690 ipv6_hdr(skb)->daddr = cp->daddr.in6;
709 691
710 if (!local || !skb->dev) { 692 if (!local || !skb->dev) {
711 /* drop the old route when skb is not shared */
712 skb_dst_drop(skb); 693 skb_dst_drop(skb);
713 skb_dst_set(skb, &rt->dst); 694 skb_dst_set(skb, &rt->dst);
714 } else { 695 } else {
715 /* destined to loopback, do we need to change route? */ 696 /* destined to loopback, do we need to change route? */
716 dst_release(&rt->dst); 697 dst_release(&rt->dst);
717 } 698 }
718 699
719 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 700 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
720 701
721 /* FIXME: when application helper enlarges the packet and the length 702 /* FIXME: when application helper enlarges the packet and the length
722 is larger than the MTU of outgoing device, there will be still 703 is larger than the MTU of outgoing device, there will be still
723 MTU problem. */ 704 MTU problem. */
724 705
725 /* Another hack: avoid icmp_send in ip_fragment */ 706 /* Another hack: avoid icmp_send in ip_fragment */
726 skb->local_df = 1; 707 skb->local_df = 1;
727 708
728 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 709 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
729 710
730 LeaveFunction(10); 711 LeaveFunction(10);
731 return rc; 712 return rc;
732 713
733 tx_error_icmp: 714 tx_error_icmp:
734 dst_link_failure(skb); 715 dst_link_failure(skb);
735 tx_error: 716 tx_error:
736 LeaveFunction(10); 717 LeaveFunction(10);
737 kfree_skb(skb); 718 kfree_skb(skb);
738 return NF_STOLEN; 719 return NF_STOLEN;
739 tx_error_put: 720 tx_error_put:
740 dst_release(&rt->dst); 721 dst_release(&rt->dst);
741 goto tx_error; 722 goto tx_error;
742 } 723 }
743 #endif 724 #endif
744 725
745 726
746 /* 727 /*
747 * IP Tunneling transmitter 728 * IP Tunneling transmitter
748 * 729 *
749 * This function encapsulates the packet in a new IP packet, its 730 * This function encapsulates the packet in a new IP packet, its
750 * destination will be set to cp->daddr. Most code of this function 731 * destination will be set to cp->daddr. Most code of this function
751 * is taken from ipip.c. 732 * is taken from ipip.c.
752 * 733 *
753 * It is used in VS/TUN cluster. The load balancer selects a real 734 * It is used in VS/TUN cluster. The load balancer selects a real
754 * server from a cluster based on a scheduling algorithm, 735 * server from a cluster based on a scheduling algorithm,
755 * encapsulates the request packet and forwards it to the selected 736 * encapsulates the request packet and forwards it to the selected
756 * server. For example, all real servers are configured with 737 * server. For example, all real servers are configured with
757 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives 738 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
758 * the encapsulated packet, it will decapsulate the packet, processe 739 * the encapsulated packet, it will decapsulate the packet, processe
759 * the request and return the response packets directly to the client 740 * the request and return the response packets directly to the client
760 * without passing the load balancer. This can greatly increase the 741 * without passing the load balancer. This can greatly increase the
761 * scalability of virtual server. 742 * scalability of virtual server.
762 * 743 *
763 * Used for ANY protocol 744 * Used for ANY protocol
764 */ 745 */
765 int 746 int
766 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 747 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
767 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 748 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
768 { 749 {
769 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 750 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
770 struct rtable *rt; /* Route to the other host */ 751 struct rtable *rt; /* Route to the other host */
771 __be32 saddr; /* Source for tunnel */ 752 __be32 saddr; /* Source for tunnel */
772 struct net_device *tdev; /* Device to other host */ 753 struct net_device *tdev; /* Device to other host */
773 struct iphdr *old_iph = ip_hdr(skb); 754 struct iphdr *old_iph = ip_hdr(skb);
774 u8 tos = old_iph->tos; 755 u8 tos = old_iph->tos;
775 __be16 df; 756 __be16 df;
776 struct iphdr *iph; /* Our new IP header */ 757 struct iphdr *iph; /* Our new IP header */
777 unsigned int max_headroom; /* The extra header space needed */ 758 unsigned int max_headroom; /* The extra header space needed */
778 int mtu; 759 int mtu;
779 int ret; 760 int ret;
780 761
781 EnterFunction(10); 762 EnterFunction(10);
782 763
783 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 764 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
784 IP_VS_RT_MODE_LOCAL | 765 IP_VS_RT_MODE_LOCAL |
785 IP_VS_RT_MODE_NON_LOCAL | 766 IP_VS_RT_MODE_NON_LOCAL |
786 IP_VS_RT_MODE_CONNECT, &saddr))) 767 IP_VS_RT_MODE_CONNECT, &saddr)))
787 goto tx_error_icmp; 768 goto tx_error_icmp;
788 if (rt->rt_flags & RTCF_LOCAL) { 769 if (rt->rt_flags & RTCF_LOCAL) {
789 ip_rt_put(rt); 770 ip_rt_put(rt);
790 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 771 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
791 } 772 }
792 773
793 tdev = rt->dst.dev; 774 tdev = rt->dst.dev;
794 775
795 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 776 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
796 if (mtu < 68) { 777 if (mtu < 68) {
797 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 778 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
798 goto tx_error_put; 779 goto tx_error_put;
799 } 780 }
800 if (rt_is_output_route(skb_rtable(skb))) 781 if (rt_is_output_route(skb_rtable(skb)))
801 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 782 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
802 783
803 /* Copy DF, reset fragment offset and MF */ 784 /* Copy DF, reset fragment offset and MF */
804 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; 785 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
805 786
806 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { 787 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
807 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 788 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
808 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 789 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
809 goto tx_error_put; 790 goto tx_error_put;
810 } 791 }
811 792
812 /* 793 /*
813 * Okay, now see if we can stuff it in the buffer as-is. 794 * Okay, now see if we can stuff it in the buffer as-is.
814 */ 795 */
815 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 796 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
816 797
817 if (skb_headroom(skb) < max_headroom 798 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
818 || skb_cloned(skb) || skb_shared(skb)) {
819 struct sk_buff *new_skb = 799 struct sk_buff *new_skb =
820 skb_realloc_headroom(skb, max_headroom); 800 skb_realloc_headroom(skb, max_headroom);
821 if (!new_skb) { 801 if (!new_skb) {
822 ip_rt_put(rt); 802 ip_rt_put(rt);
823 kfree_skb(skb); 803 kfree_skb(skb);
824 IP_VS_ERR_RL("%s(): no memory\n", __func__); 804 IP_VS_ERR_RL("%s(): no memory\n", __func__);
825 return NF_STOLEN; 805 return NF_STOLEN;
826 } 806 }
827 consume_skb(skb); 807 consume_skb(skb);
828 skb = new_skb; 808 skb = new_skb;
829 old_iph = ip_hdr(skb); 809 old_iph = ip_hdr(skb);
830 } 810 }
831 811
832 skb->transport_header = skb->network_header; 812 skb->transport_header = skb->network_header;
833 813
834 /* fix old IP header checksum */ 814 /* fix old IP header checksum */
835 ip_send_check(old_iph); 815 ip_send_check(old_iph);
836 816
837 skb_push(skb, sizeof(struct iphdr)); 817 skb_push(skb, sizeof(struct iphdr));
838 skb_reset_network_header(skb); 818 skb_reset_network_header(skb);
839 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 819 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
840 820
841 /* drop old route */ 821 /* drop old route */
842 skb_dst_drop(skb); 822 skb_dst_drop(skb);
843 skb_dst_set(skb, &rt->dst); 823 skb_dst_set(skb, &rt->dst);
844 824
845 /* 825 /*
846 * Push down and install the IPIP header. 826 * Push down and install the IPIP header.
847 */ 827 */
848 iph = ip_hdr(skb); 828 iph = ip_hdr(skb);
849 iph->version = 4; 829 iph->version = 4;
850 iph->ihl = sizeof(struct iphdr)>>2; 830 iph->ihl = sizeof(struct iphdr)>>2;
851 iph->frag_off = df; 831 iph->frag_off = df;
852 iph->protocol = IPPROTO_IPIP; 832 iph->protocol = IPPROTO_IPIP;
853 iph->tos = tos; 833 iph->tos = tos;
854 iph->daddr = cp->daddr.ip; 834 iph->daddr = cp->daddr.ip;
855 iph->saddr = saddr; 835 iph->saddr = saddr;
856 iph->ttl = old_iph->ttl; 836 iph->ttl = old_iph->ttl;
857 ip_select_ident(iph, &rt->dst, NULL); 837 ip_select_ident(iph, &rt->dst, NULL);
858 838
859 /* Another hack: avoid icmp_send in ip_fragment */ 839 /* Another hack: avoid icmp_send in ip_fragment */
860 skb->local_df = 1; 840 skb->local_df = 1;
861 841
862 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 842 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
863 if (ret == NF_ACCEPT) 843 if (ret == NF_ACCEPT)
864 ip_local_out(skb); 844 ip_local_out(skb);
865 else if (ret == NF_DROP) 845 else if (ret == NF_DROP)
866 kfree_skb(skb); 846 kfree_skb(skb);
867 847
868 LeaveFunction(10); 848 LeaveFunction(10);
869 849
870 return NF_STOLEN; 850 return NF_STOLEN;
871 851
872 tx_error_icmp: 852 tx_error_icmp:
873 dst_link_failure(skb); 853 dst_link_failure(skb);
874 tx_error: 854 tx_error:
875 kfree_skb(skb); 855 kfree_skb(skb);
876 LeaveFunction(10); 856 LeaveFunction(10);
877 return NF_STOLEN; 857 return NF_STOLEN;
878 tx_error_put: 858 tx_error_put:
879 ip_rt_put(rt); 859 ip_rt_put(rt);
880 goto tx_error; 860 goto tx_error;
881 } 861 }
882 862
883 #ifdef CONFIG_IP_VS_IPV6 863 #ifdef CONFIG_IP_VS_IPV6
884 int 864 int
885 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 865 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
886 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 866 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
887 { 867 {
888 struct rt6_info *rt; /* Route to the other host */ 868 struct rt6_info *rt; /* Route to the other host */
889 struct in6_addr saddr; /* Source for tunnel */ 869 struct in6_addr saddr; /* Source for tunnel */
890 struct net_device *tdev; /* Device to other host */ 870 struct net_device *tdev; /* Device to other host */
891 struct ipv6hdr *old_iph = ipv6_hdr(skb); 871 struct ipv6hdr *old_iph = ipv6_hdr(skb);
892 struct ipv6hdr *iph; /* Our new IP header */ 872 struct ipv6hdr *iph; /* Our new IP header */
893 unsigned int max_headroom; /* The extra header space needed */ 873 unsigned int max_headroom; /* The extra header space needed */
894 int mtu; 874 int mtu;
895 int ret; 875 int ret;
896 876
897 EnterFunction(10); 877 EnterFunction(10);
898 878
899 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, 879 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
900 &saddr, 1, (IP_VS_RT_MODE_LOCAL | 880 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
901 IP_VS_RT_MODE_NON_LOCAL)))) 881 IP_VS_RT_MODE_NON_LOCAL))))
902 goto tx_error_icmp; 882 goto tx_error_icmp;
903 if (__ip_vs_is_local_route6(rt)) { 883 if (__ip_vs_is_local_route6(rt)) {
904 dst_release(&rt->dst); 884 dst_release(&rt->dst);
905 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 885 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
906 } 886 }
907 887
908 tdev = rt->dst.dev; 888 tdev = rt->dst.dev;
909 889
910 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 890 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
911 if (mtu < IPV6_MIN_MTU) { 891 if (mtu < IPV6_MIN_MTU) {
912 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 892 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
913 IPV6_MIN_MTU); 893 IPV6_MIN_MTU);
914 goto tx_error_put; 894 goto tx_error_put;
915 } 895 }
916 if (skb_dst(skb)) 896 if (skb_dst(skb))
917 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 897 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
918 898
919 /* MTU checking: Notice that 'mtu' have been adjusted before hand */ 899 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
920 if (__mtu_check_toobig_v6(skb, mtu)) { 900 if (__mtu_check_toobig_v6(skb, mtu)) {
921 if (!skb->dev) { 901 if (!skb->dev) {
922 struct net *net = dev_net(skb_dst(skb)->dev); 902 struct net *net = dev_net(skb_dst(skb)->dev);
923 903
924 skb->dev = net->loopback_dev; 904 skb->dev = net->loopback_dev;
925 } 905 }
926 /* only send ICMP too big on first fragment */ 906 /* only send ICMP too big on first fragment */
927 if (!ipvsh->fragoffs) 907 if (!ipvsh->fragoffs)
928 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 908 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
929 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 909 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
930 goto tx_error_put; 910 goto tx_error_put;
931 } 911 }
932 912
933 /* 913 /*
934 * Okay, now see if we can stuff it in the buffer as-is. 914 * Okay, now see if we can stuff it in the buffer as-is.
935 */ 915 */
936 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 916 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
937 917
938 if (skb_headroom(skb) < max_headroom 918 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
939 || skb_cloned(skb) || skb_shared(skb)) {
940 struct sk_buff *new_skb = 919 struct sk_buff *new_skb =
941 skb_realloc_headroom(skb, max_headroom); 920 skb_realloc_headroom(skb, max_headroom);
942 if (!new_skb) { 921 if (!new_skb) {
943 dst_release(&rt->dst); 922 dst_release(&rt->dst);
944 kfree_skb(skb); 923 kfree_skb(skb);
945 IP_VS_ERR_RL("%s(): no memory\n", __func__); 924 IP_VS_ERR_RL("%s(): no memory\n", __func__);
946 return NF_STOLEN; 925 return NF_STOLEN;
947 } 926 }
948 consume_skb(skb); 927 consume_skb(skb);
949 skb = new_skb; 928 skb = new_skb;
950 old_iph = ipv6_hdr(skb); 929 old_iph = ipv6_hdr(skb);
951 } 930 }
952 931
953 skb->transport_header = skb->network_header; 932 skb->transport_header = skb->network_header;
954 933
955 skb_push(skb, sizeof(struct ipv6hdr)); 934 skb_push(skb, sizeof(struct ipv6hdr));
956 skb_reset_network_header(skb); 935 skb_reset_network_header(skb);
957 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 936 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
958 937
959 /* drop old route */ 938 /* drop old route */
960 skb_dst_drop(skb); 939 skb_dst_drop(skb);
961 skb_dst_set(skb, &rt->dst); 940 skb_dst_set(skb, &rt->dst);
962 941
963 /* 942 /*
964 * Push down and install the IPIP header. 943 * Push down and install the IPIP header.
965 */ 944 */
966 iph = ipv6_hdr(skb); 945 iph = ipv6_hdr(skb);
967 iph->version = 6; 946 iph->version = 6;
968 iph->nexthdr = IPPROTO_IPV6; 947 iph->nexthdr = IPPROTO_IPV6;
969 iph->payload_len = old_iph->payload_len; 948 iph->payload_len = old_iph->payload_len;
970 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 949 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
971 iph->priority = old_iph->priority; 950 iph->priority = old_iph->priority;
972 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 951 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
973 iph->daddr = cp->daddr.in6; 952 iph->daddr = cp->daddr.in6;
974 iph->saddr = saddr; 953 iph->saddr = saddr;
975 iph->hop_limit = old_iph->hop_limit; 954 iph->hop_limit = old_iph->hop_limit;
976 955
977 /* Another hack: avoid icmp_send in ip_fragment */ 956 /* Another hack: avoid icmp_send in ip_fragment */
978 skb->local_df = 1; 957 skb->local_df = 1;
979 958
980 ret = ip_vs_tunnel_xmit_prepare(skb, cp); 959 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
981 if (ret == NF_ACCEPT) 960 if (ret == NF_ACCEPT)
982 ip6_local_out(skb); 961 ip6_local_out(skb);
983 else if (ret == NF_DROP) 962 else if (ret == NF_DROP)
984 kfree_skb(skb); 963 kfree_skb(skb);
985 964
986 LeaveFunction(10); 965 LeaveFunction(10);
987 966
988 return NF_STOLEN; 967 return NF_STOLEN;
989 968
990 tx_error_icmp: 969 tx_error_icmp:
991 dst_link_failure(skb); 970 dst_link_failure(skb);
992 tx_error: 971 tx_error:
993 kfree_skb(skb); 972 kfree_skb(skb);
994 LeaveFunction(10); 973 LeaveFunction(10);
995 return NF_STOLEN; 974 return NF_STOLEN;
996 tx_error_put: 975 tx_error_put:
997 dst_release(&rt->dst); 976 dst_release(&rt->dst);
998 goto tx_error; 977 goto tx_error;
999 } 978 }
1000 #endif 979 #endif
1001 980
1002 981
1003 /* 982 /*
1004 * Direct Routing transmitter 983 * Direct Routing transmitter
1005 * Used for ANY protocol 984 * Used for ANY protocol
1006 */ 985 */
1007 int 986 int
1008 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 987 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1009 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 988 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1010 { 989 {
1011 struct rtable *rt; /* Route to the other host */ 990 struct rtable *rt; /* Route to the other host */
1012 struct iphdr *iph = ip_hdr(skb); 991 struct iphdr *iph = ip_hdr(skb);
1013 int mtu; 992 int mtu;
1014 993
1015 EnterFunction(10); 994 EnterFunction(10);
1016 995
1017 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 996 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1018 IP_VS_RT_MODE_LOCAL | 997 IP_VS_RT_MODE_LOCAL |
1019 IP_VS_RT_MODE_NON_LOCAL | 998 IP_VS_RT_MODE_NON_LOCAL |
1020 IP_VS_RT_MODE_KNOWN_NH, NULL))) 999 IP_VS_RT_MODE_KNOWN_NH, NULL)))
1021 goto tx_error_icmp; 1000 goto tx_error_icmp;
1022 if (rt->rt_flags & RTCF_LOCAL) { 1001 if (rt->rt_flags & RTCF_LOCAL) {
1023 ip_rt_put(rt); 1002 ip_rt_put(rt);
1024 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); 1003 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
1025 } 1004 }
1026 1005
1027 /* MTU checking */ 1006 /* MTU checking */
1028 mtu = dst_mtu(&rt->dst); 1007 mtu = dst_mtu(&rt->dst);
1029 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && 1008 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1030 !skb_is_gso(skb)) { 1009 !skb_is_gso(skb)) {
1031 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 1010 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1032 ip_rt_put(rt); 1011 ip_rt_put(rt);
1033 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1012 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1034 goto tx_error; 1013 goto tx_error;
1035 } 1014 }
1036 1015
1037 /*
1038 * Call ip_send_check because we are not sure it is called
1039 * after ip_defrag. Is copy-on-write needed?
1040 */
1041 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1042 ip_rt_put(rt);
1043 return NF_STOLEN;
1044 }
1045 ip_send_check(ip_hdr(skb)); 1016 ip_send_check(ip_hdr(skb));
1046 1017
1047 /* drop old route */ 1018 /* drop old route */
1048 skb_dst_drop(skb); 1019 skb_dst_drop(skb);
1049 skb_dst_set(skb, &rt->dst); 1020 skb_dst_set(skb, &rt->dst);
1050 1021
1051 /* Another hack: avoid icmp_send in ip_fragment */ 1022 /* Another hack: avoid icmp_send in ip_fragment */
1052 skb->local_df = 1; 1023 skb->local_df = 1;
1053 1024
1054 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); 1025 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
1055 1026
1056 LeaveFunction(10); 1027 LeaveFunction(10);
1057 return NF_STOLEN; 1028 return NF_STOLEN;
1058 1029
1059 tx_error_icmp: 1030 tx_error_icmp:
1060 dst_link_failure(skb); 1031 dst_link_failure(skb);
1061 tx_error: 1032 tx_error:
1062 kfree_skb(skb); 1033 kfree_skb(skb);
1063 LeaveFunction(10); 1034 LeaveFunction(10);
1064 return NF_STOLEN; 1035 return NF_STOLEN;
1065 } 1036 }
1066 1037
1067 #ifdef CONFIG_IP_VS_IPV6 1038 #ifdef CONFIG_IP_VS_IPV6
1068 int 1039 int
1069 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1040 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1070 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 1041 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1071 { 1042 {
1072 struct rt6_info *rt; /* Route to the other host */ 1043 struct rt6_info *rt; /* Route to the other host */
1073 int mtu; 1044 int mtu;
1074 1045
1075 EnterFunction(10); 1046 EnterFunction(10);
1076 1047
1077 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1048 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1078 0, (IP_VS_RT_MODE_LOCAL | 1049 0, (IP_VS_RT_MODE_LOCAL |
1079 IP_VS_RT_MODE_NON_LOCAL)))) 1050 IP_VS_RT_MODE_NON_LOCAL))))
1080 goto tx_error_icmp; 1051 goto tx_error_icmp;
1081 if (__ip_vs_is_local_route6(rt)) { 1052 if (__ip_vs_is_local_route6(rt)) {
1082 dst_release(&rt->dst); 1053 dst_release(&rt->dst);
1083 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); 1054 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
1084 } 1055 }
1085 1056
1086 /* MTU checking */ 1057 /* MTU checking */
1087 mtu = dst_mtu(&rt->dst); 1058 mtu = dst_mtu(&rt->dst);
1088 if (__mtu_check_toobig_v6(skb, mtu)) { 1059 if (__mtu_check_toobig_v6(skb, mtu)) {
1089 if (!skb->dev) { 1060 if (!skb->dev) {
1090 struct net *net = dev_net(skb_dst(skb)->dev); 1061 struct net *net = dev_net(skb_dst(skb)->dev);
1091 1062
1092 skb->dev = net->loopback_dev; 1063 skb->dev = net->loopback_dev;
1093 } 1064 }
1094 /* only send ICMP too big on first fragment */ 1065 /* only send ICMP too big on first fragment */
1095 if (!iph->fragoffs) 1066 if (!iph->fragoffs)
1096 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1067 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1097 dst_release(&rt->dst); 1068 dst_release(&rt->dst);
1098 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1069 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1099 goto tx_error; 1070 goto tx_error;
1100 } 1071 }
1101 1072
1102 /*
1103 * Call ip_send_check because we are not sure it is called
1104 * after ip_defrag. Is copy-on-write needed?
1105 */
1106 skb = skb_share_check(skb, GFP_ATOMIC);
1107 if (unlikely(skb == NULL)) {
1108 dst_release(&rt->dst);
1109 return NF_STOLEN;
1110 }
1111
1112 /* drop old route */ 1073 /* drop old route */
1113 skb_dst_drop(skb); 1074 skb_dst_drop(skb);
1114 skb_dst_set(skb, &rt->dst); 1075 skb_dst_set(skb, &rt->dst);
1115 1076
1116 /* Another hack: avoid icmp_send in ip_fragment */ 1077 /* Another hack: avoid icmp_send in ip_fragment */
1117 skb->local_df = 1; 1078 skb->local_df = 1;
1118 1079
1119 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); 1080 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
1120 1081
1121 LeaveFunction(10); 1082 LeaveFunction(10);
1122 return NF_STOLEN; 1083 return NF_STOLEN;
1123 1084
1124 tx_error_icmp: 1085 tx_error_icmp:
1125 dst_link_failure(skb); 1086 dst_link_failure(skb);
1126 tx_error: 1087 tx_error:
1127 kfree_skb(skb); 1088 kfree_skb(skb);
1128 LeaveFunction(10); 1089 LeaveFunction(10);
1129 return NF_STOLEN; 1090 return NF_STOLEN;
1130 } 1091 }
1131 #endif 1092 #endif
1132 1093
1133 1094
1134 /* 1095 /*
1135 * ICMP packet transmitter 1096 * ICMP packet transmitter
1136 * called by the ip_vs_in_icmp 1097 * called by the ip_vs_in_icmp
1137 */ 1098 */
1138 int 1099 int
1139 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1100 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1140 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1101 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1141 struct ip_vs_iphdr *iph) 1102 struct ip_vs_iphdr *iph)
1142 { 1103 {
1143 struct rtable *rt; /* Route to the other host */ 1104 struct rtable *rt; /* Route to the other host */
1144 int mtu; 1105 int mtu;
1145 int rc; 1106 int rc;
1146 int local; 1107 int local;
1147 int rt_mode; 1108 int rt_mode;
1148 1109
1149 EnterFunction(10); 1110 EnterFunction(10);
1150 1111
1151 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1112 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1152 forwarded directly here, because there is no need to 1113 forwarded directly here, because there is no need to
1153 translate address/port back */ 1114 translate address/port back */
1154 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1115 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1155 if (cp->packet_xmit) 1116 if (cp->packet_xmit)
1156 rc = cp->packet_xmit(skb, cp, pp, iph); 1117 rc = cp->packet_xmit(skb, cp, pp, iph);
1157 else 1118 else
1158 rc = NF_ACCEPT; 1119 rc = NF_ACCEPT;
1159 /* do not touch skb anymore */ 1120 /* do not touch skb anymore */
1160 atomic_inc(&cp->in_pkts); 1121 atomic_inc(&cp->in_pkts);
1161 goto out; 1122 goto out;
1162 } 1123 }
1163 1124
1164 /* 1125 /*
1165 * mangle and send the packet here (only for VS/NAT) 1126 * mangle and send the packet here (only for VS/NAT)
1166 */ 1127 */
1167 1128
1168 /* LOCALNODE from FORWARD hook is not supported */ 1129 /* LOCALNODE from FORWARD hook is not supported */
1169 rt_mode = (hooknum != NF_INET_FORWARD) ? 1130 rt_mode = (hooknum != NF_INET_FORWARD) ?
1170 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1131 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1171 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1132 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1172 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1133 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1173 rt_mode, NULL))) 1134 rt_mode, NULL)))
1174 goto tx_error_icmp; 1135 goto tx_error_icmp;
1175 local = rt->rt_flags & RTCF_LOCAL; 1136 local = rt->rt_flags & RTCF_LOCAL;
1176 1137
1177 /* 1138 /*
1178 * Avoid duplicate tuple in reply direction for NAT traffic 1139 * Avoid duplicate tuple in reply direction for NAT traffic
1179 * to local address when connection is sync-ed 1140 * to local address when connection is sync-ed
1180 */ 1141 */
1181 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1142 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1182 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1143 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1183 enum ip_conntrack_info ctinfo; 1144 enum ip_conntrack_info ctinfo;
1184 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1145 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1185 1146
1186 if (ct && !nf_ct_is_untracked(ct)) { 1147 if (ct && !nf_ct_is_untracked(ct)) {
1187 IP_VS_DBG(10, "%s(): " 1148 IP_VS_DBG(10, "%s(): "
1188 "stopping DNAT to local address %pI4\n", 1149 "stopping DNAT to local address %pI4\n",
1189 __func__, &cp->daddr.ip); 1150 __func__, &cp->daddr.ip);
1190 goto tx_error_put; 1151 goto tx_error_put;
1191 } 1152 }
1192 } 1153 }
1193 #endif 1154 #endif
1194 1155
1195 /* From world but DNAT to loopback address? */ 1156 /* From world but DNAT to loopback address? */
1196 if (local && ipv4_is_loopback(cp->daddr.ip) && 1157 if (local && ipv4_is_loopback(cp->daddr.ip) &&
1197 rt_is_input_route(skb_rtable(skb))) { 1158 rt_is_input_route(skb_rtable(skb))) {
1198 IP_VS_DBG(1, "%s(): " 1159 IP_VS_DBG(1, "%s(): "
1199 "stopping DNAT to loopback %pI4\n", 1160 "stopping DNAT to loopback %pI4\n",
1200 __func__, &cp->daddr.ip); 1161 __func__, &cp->daddr.ip);
1201 goto tx_error_put; 1162 goto tx_error_put;
1202 } 1163 }
1203 1164
1204 /* MTU checking */ 1165 /* MTU checking */
1205 mtu = dst_mtu(&rt->dst); 1166 mtu = dst_mtu(&rt->dst);
1206 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && 1167 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1207 !skb_is_gso(skb)) { 1168 !skb_is_gso(skb)) {
1208 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1169 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1209 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1170 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1210 goto tx_error_put; 1171 goto tx_error_put;
1211 } 1172 }
1212 1173
1213 /* copy-on-write the packet before mangling it */ 1174 /* copy-on-write the packet before mangling it */
1214 if (!skb_make_writable(skb, offset)) 1175 if (!skb_make_writable(skb, offset))
1215 goto tx_error_put; 1176 goto tx_error_put;
1216 1177
1217 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1178 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1218 goto tx_error_put; 1179 goto tx_error_put;
1219 1180
1220 ip_vs_nat_icmp(skb, pp, cp, 0); 1181 ip_vs_nat_icmp(skb, pp, cp, 0);
1221 1182
1222 if (!local) { 1183 if (!local) {
1223 /* drop the old route when skb is not shared */
1224 skb_dst_drop(skb); 1184 skb_dst_drop(skb);
1225 skb_dst_set(skb, &rt->dst); 1185 skb_dst_set(skb, &rt->dst);
1226 } else 1186 } else
1227 ip_rt_put(rt); 1187 ip_rt_put(rt);
1228 1188
1229 /* Another hack: avoid icmp_send in ip_fragment */ 1189 /* Another hack: avoid icmp_send in ip_fragment */
1230 skb->local_df = 1; 1190 skb->local_df = 1;
1231 1191
1232 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); 1192 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
1233 goto out; 1193 goto out;
1234 1194
1235 tx_error_icmp: 1195 tx_error_icmp:
1236 dst_link_failure(skb); 1196 dst_link_failure(skb);
1237 tx_error: 1197 tx_error:
1238 dev_kfree_skb(skb); 1198 dev_kfree_skb(skb);
1239 rc = NF_STOLEN; 1199 rc = NF_STOLEN;
1240 out: 1200 out:
1241 LeaveFunction(10); 1201 LeaveFunction(10);
1242 return rc; 1202 return rc;
1243 tx_error_put: 1203 tx_error_put:
1244 ip_rt_put(rt); 1204 ip_rt_put(rt);
1245 goto tx_error; 1205 goto tx_error;
1246 } 1206 }
1247 1207
1248 #ifdef CONFIG_IP_VS_IPV6 1208 #ifdef CONFIG_IP_VS_IPV6
1249 int 1209 int
1250 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1210 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1251 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1211 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1252 struct ip_vs_iphdr *iph) 1212 struct ip_vs_iphdr *iph)
1253 { 1213 {
1254 struct rt6_info *rt; /* Route to the other host */ 1214 struct rt6_info *rt; /* Route to the other host */
1255 int mtu; 1215 int mtu;
1256 int rc; 1216 int rc;
1257 int local; 1217 int local;
1258 int rt_mode; 1218 int rt_mode;
1259 1219
1260 EnterFunction(10); 1220 EnterFunction(10);
1261 1221
1262 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1222 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1263 forwarded directly here, because there is no need to 1223 forwarded directly here, because there is no need to
1264 translate address/port back */ 1224 translate address/port back */
1265 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1225 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1266 if (cp->packet_xmit) 1226 if (cp->packet_xmit)
1267 rc = cp->packet_xmit(skb, cp, pp, iph); 1227 rc = cp->packet_xmit(skb, cp, pp, iph);
1268 else 1228 else
1269 rc = NF_ACCEPT; 1229 rc = NF_ACCEPT;
1270 /* do not touch skb anymore */ 1230 /* do not touch skb anymore */
1271 atomic_inc(&cp->in_pkts); 1231 atomic_inc(&cp->in_pkts);
1272 goto out; 1232 goto out;
1273 } 1233 }
1274 1234
1275 /* 1235 /*
1276 * mangle and send the packet here (only for VS/NAT) 1236 * mangle and send the packet here (only for VS/NAT)
1277 */ 1237 */
1278 1238
1279 /* LOCALNODE from FORWARD hook is not supported */ 1239 /* LOCALNODE from FORWARD hook is not supported */
1280 rt_mode = (hooknum != NF_INET_FORWARD) ? 1240 rt_mode = (hooknum != NF_INET_FORWARD) ?
1281 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1241 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1282 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1242 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1283 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1243 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1284 0, rt_mode))) 1244 0, rt_mode)))
1285 goto tx_error_icmp; 1245 goto tx_error_icmp;
1286 1246
1287 local = __ip_vs_is_local_route6(rt); 1247 local = __ip_vs_is_local_route6(rt);
1288 /* 1248 /*
1289 * Avoid duplicate tuple in reply direction for NAT traffic 1249 * Avoid duplicate tuple in reply direction for NAT traffic
1290 * to local address when connection is sync-ed 1250 * to local address when connection is sync-ed
1291 */ 1251 */
1292 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1252 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1293 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1253 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1294 enum ip_conntrack_info ctinfo; 1254 enum ip_conntrack_info ctinfo;
1295 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 1255 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1296 1256
1297 if (ct && !nf_ct_is_untracked(ct)) { 1257 if (ct && !nf_ct_is_untracked(ct)) {
1298 IP_VS_DBG(10, "%s(): " 1258 IP_VS_DBG(10, "%s(): "
1299 "stopping DNAT to local address %pI6\n", 1259 "stopping DNAT to local address %pI6\n",
1300 __func__, &cp->daddr.in6); 1260 __func__, &cp->daddr.in6);
1301 goto tx_error_put; 1261 goto tx_error_put;
1302 } 1262 }
1303 } 1263 }
1304 #endif 1264 #endif
1305 1265
1306 /* From world but DNAT to loopback address? */ 1266 /* From world but DNAT to loopback address? */
1307 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1267 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1308 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 1268 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1309 IP_VS_DBG(1, "%s(): " 1269 IP_VS_DBG(1, "%s(): "
1310 "stopping DNAT to loopback %pI6\n", 1270 "stopping DNAT to loopback %pI6\n",
1311 __func__, &cp->daddr.in6); 1271 __func__, &cp->daddr.in6);
1312 goto tx_error_put; 1272 goto tx_error_put;
1313 } 1273 }
1314 1274
1315 /* MTU checking */ 1275 /* MTU checking */
1316 mtu = dst_mtu(&rt->dst); 1276 mtu = dst_mtu(&rt->dst);
1317 if (__mtu_check_toobig_v6(skb, mtu)) { 1277 if (__mtu_check_toobig_v6(skb, mtu)) {
1318 if (!skb->dev) { 1278 if (!skb->dev) {
1319 struct net *net = dev_net(skb_dst(skb)->dev); 1279 struct net *net = dev_net(skb_dst(skb)->dev);
1320 1280
1321 skb->dev = net->loopback_dev; 1281 skb->dev = net->loopback_dev;
1322 } 1282 }
1323 /* only send ICMP too big on first fragment */ 1283 /* only send ICMP too big on first fragment */
1324 if (!iph->fragoffs) 1284 if (!iph->fragoffs)
1325 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1285 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1326 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1286 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1327 goto tx_error_put; 1287 goto tx_error_put;
1328 } 1288 }
1329 1289
1330 /* copy-on-write the packet before mangling it */ 1290 /* copy-on-write the packet before mangling it */
1331 if (!skb_make_writable(skb, offset)) 1291 if (!skb_make_writable(skb, offset))
1332 goto tx_error_put; 1292 goto tx_error_put;
1333 1293
1334 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1294 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1335 goto tx_error_put; 1295 goto tx_error_put;
1336 1296
1337 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1297 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1338 1298
1339 if (!local || !skb->dev) { 1299 if (!local || !skb->dev) {
1340 /* drop the old route when skb is not shared */
1341 skb_dst_drop(skb); 1300 skb_dst_drop(skb);
1342 skb_dst_set(skb, &rt->dst); 1301 skb_dst_set(skb, &rt->dst);
1343 } else { 1302 } else {
1344 /* destined to loopback, do we need to change route? */ 1303 /* destined to loopback, do we need to change route? */
1345 dst_release(&rt->dst); 1304 dst_release(&rt->dst);
1346 } 1305 }
1347 1306
1348 /* Another hack: avoid icmp_send in ip_fragment */ 1307 /* Another hack: avoid icmp_send in ip_fragment */
1349 skb->local_df = 1; 1308 skb->local_df = 1;
1350 1309
1351 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); 1310 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
1352 goto out; 1311 goto out;
1353 1312
1354 tx_error_icmp: 1313 tx_error_icmp:
1355 dst_link_failure(skb); 1314 dst_link_failure(skb);
1356 tx_error: 1315 tx_error:
1357 dev_kfree_skb(skb); 1316 dev_kfree_skb(skb);
1358 rc = NF_STOLEN; 1317 rc = NF_STOLEN;
1359 out: 1318 out:
1360 LeaveFunction(10); 1319 LeaveFunction(10);
1361 return rc; 1320 return rc;
1362 tx_error_put: 1321 tx_error_put:
1363 dst_release(&rt->dst); 1322 dst_release(&rt->dst);
1364 goto tx_error; 1323 goto tx_error;
1365 } 1324 }
1366 #endif 1325 #endif
1367 1326