Commit 545b29019c8959c805abfe8194d47e989f1a6e5f

Authored by David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf-next

Conflicts:
	net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c

Minor conflict due to some IS_ENABLED conversions done
in net-next.

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 7 changed files Inline Diff

include/uapi/linux/in6.h
1 /* 1 /*
2 * Types and definitions for AF_INET6 2 * Types and definitions for AF_INET6
3 * Linux INET6 implementation 3 * Linux INET6 implementation
4 * 4 *
5 * Authors: 5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt> 6 * Pedro Roque <roque@di.fc.ul.pt>
7 * 7 *
8 * Sources: 8 * Sources:
9 * IPv6 Program Interfaces for BSD Systems 9 * IPv6 Program Interfaces for BSD Systems
10 * <draft-ietf-ipngwg-bsd-api-05.txt> 10 * <draft-ietf-ipngwg-bsd-api-05.txt>
11 * 11 *
12 * Advanced Sockets API for IPv6 12 * Advanced Sockets API for IPv6
13 * <draft-stevens-advanced-api-00.txt> 13 * <draft-stevens-advanced-api-00.txt>
14 * 14 *
15 * This program is free software; you can redistribute it and/or 15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License 16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 */ 19 */
20 20
21 #ifndef _UAPI_LINUX_IN6_H 21 #ifndef _UAPI_LINUX_IN6_H
22 #define _UAPI_LINUX_IN6_H 22 #define _UAPI_LINUX_IN6_H
23 23
24 #include <linux/types.h> 24 #include <linux/types.h>
25 25
26 /* 26 /*
27 * IPv6 address structure 27 * IPv6 address structure
28 */ 28 */
29 29
30 struct in6_addr { 30 struct in6_addr {
31 union { 31 union {
32 __u8 u6_addr8[16]; 32 __u8 u6_addr8[16];
33 __be16 u6_addr16[8]; 33 __be16 u6_addr16[8];
34 __be32 u6_addr32[4]; 34 __be32 u6_addr32[4];
35 } in6_u; 35 } in6_u;
36 #define s6_addr in6_u.u6_addr8 36 #define s6_addr in6_u.u6_addr8
37 #define s6_addr16 in6_u.u6_addr16 37 #define s6_addr16 in6_u.u6_addr16
38 #define s6_addr32 in6_u.u6_addr32 38 #define s6_addr32 in6_u.u6_addr32
39 }; 39 };
40 40
41 /* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 41 /* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553
42 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined 42 * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
43 * in network byte order, not in host byte order as are the IPv4 equivalents 43 * in network byte order, not in host byte order as are the IPv4 equivalents
44 */ 44 */
45 45
46 struct sockaddr_in6 { 46 struct sockaddr_in6 {
47 unsigned short int sin6_family; /* AF_INET6 */ 47 unsigned short int sin6_family; /* AF_INET6 */
48 __be16 sin6_port; /* Transport layer port # */ 48 __be16 sin6_port; /* Transport layer port # */
49 __be32 sin6_flowinfo; /* IPv6 flow information */ 49 __be32 sin6_flowinfo; /* IPv6 flow information */
50 struct in6_addr sin6_addr; /* IPv6 address */ 50 struct in6_addr sin6_addr; /* IPv6 address */
51 __u32 sin6_scope_id; /* scope id (new in RFC2553) */ 51 __u32 sin6_scope_id; /* scope id (new in RFC2553) */
52 }; 52 };
53 53
54 struct ipv6_mreq { 54 struct ipv6_mreq {
55 /* IPv6 multicast address of group */ 55 /* IPv6 multicast address of group */
56 struct in6_addr ipv6mr_multiaddr; 56 struct in6_addr ipv6mr_multiaddr;
57 57
58 /* local IPv6 address of interface */ 58 /* local IPv6 address of interface */
59 int ipv6mr_ifindex; 59 int ipv6mr_ifindex;
60 }; 60 };
61 61
62 #define ipv6mr_acaddr ipv6mr_multiaddr 62 #define ipv6mr_acaddr ipv6mr_multiaddr
63 63
64 struct in6_flowlabel_req { 64 struct in6_flowlabel_req {
65 struct in6_addr flr_dst; 65 struct in6_addr flr_dst;
66 __be32 flr_label; 66 __be32 flr_label;
67 __u8 flr_action; 67 __u8 flr_action;
68 __u8 flr_share; 68 __u8 flr_share;
69 __u16 flr_flags; 69 __u16 flr_flags;
70 __u16 flr_expires; 70 __u16 flr_expires;
71 __u16 flr_linger; 71 __u16 flr_linger;
72 __u32 __flr_pad; 72 __u32 __flr_pad;
73 /* Options in format of IPV6_PKTOPTIONS */ 73 /* Options in format of IPV6_PKTOPTIONS */
74 }; 74 };
75 75
76 #define IPV6_FL_A_GET 0 76 #define IPV6_FL_A_GET 0
77 #define IPV6_FL_A_PUT 1 77 #define IPV6_FL_A_PUT 1
78 #define IPV6_FL_A_RENEW 2 78 #define IPV6_FL_A_RENEW 2
79 79
80 #define IPV6_FL_F_CREATE 1 80 #define IPV6_FL_F_CREATE 1
81 #define IPV6_FL_F_EXCL 2 81 #define IPV6_FL_F_EXCL 2
82 82
83 #define IPV6_FL_S_NONE 0 83 #define IPV6_FL_S_NONE 0
84 #define IPV6_FL_S_EXCL 1 84 #define IPV6_FL_S_EXCL 1
85 #define IPV6_FL_S_PROCESS 2 85 #define IPV6_FL_S_PROCESS 2
86 #define IPV6_FL_S_USER 3 86 #define IPV6_FL_S_USER 3
87 #define IPV6_FL_S_ANY 255 87 #define IPV6_FL_S_ANY 255
88 88
89 89
90 /* 90 /*
91 * Bitmask constant declarations to help applications select out the 91 * Bitmask constant declarations to help applications select out the
92 * flow label and priority fields. 92 * flow label and priority fields.
93 * 93 *
94 * Note that this are in host byte order while the flowinfo field of 94 * Note that this are in host byte order while the flowinfo field of
95 * sockaddr_in6 is in network byte order. 95 * sockaddr_in6 is in network byte order.
96 */ 96 */
97 97
98 #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff 98 #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff
99 #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 99 #define IPV6_FLOWINFO_PRIORITY 0x0ff00000
100 100
101 /* These definitions are obsolete */ 101 /* These definitions are obsolete */
102 #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 102 #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000
103 #define IPV6_PRIORITY_FILLER 0x0100 103 #define IPV6_PRIORITY_FILLER 0x0100
104 #define IPV6_PRIORITY_UNATTENDED 0x0200 104 #define IPV6_PRIORITY_UNATTENDED 0x0200
105 #define IPV6_PRIORITY_RESERVED1 0x0300 105 #define IPV6_PRIORITY_RESERVED1 0x0300
106 #define IPV6_PRIORITY_BULK 0x0400 106 #define IPV6_PRIORITY_BULK 0x0400
107 #define IPV6_PRIORITY_RESERVED2 0x0500 107 #define IPV6_PRIORITY_RESERVED2 0x0500
108 #define IPV6_PRIORITY_INTERACTIVE 0x0600 108 #define IPV6_PRIORITY_INTERACTIVE 0x0600
109 #define IPV6_PRIORITY_CONTROL 0x0700 109 #define IPV6_PRIORITY_CONTROL 0x0700
110 #define IPV6_PRIORITY_8 0x0800 110 #define IPV6_PRIORITY_8 0x0800
111 #define IPV6_PRIORITY_9 0x0900 111 #define IPV6_PRIORITY_9 0x0900
112 #define IPV6_PRIORITY_10 0x0a00 112 #define IPV6_PRIORITY_10 0x0a00
113 #define IPV6_PRIORITY_11 0x0b00 113 #define IPV6_PRIORITY_11 0x0b00
114 #define IPV6_PRIORITY_12 0x0c00 114 #define IPV6_PRIORITY_12 0x0c00
115 #define IPV6_PRIORITY_13 0x0d00 115 #define IPV6_PRIORITY_13 0x0d00
116 #define IPV6_PRIORITY_14 0x0e00 116 #define IPV6_PRIORITY_14 0x0e00
117 #define IPV6_PRIORITY_15 0x0f00 117 #define IPV6_PRIORITY_15 0x0f00
118 118
119 /* 119 /*
120 * IPV6 extension headers 120 * IPV6 extension headers
121 */ 121 */
122 #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */ 122 #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */
123 #define IPPROTO_ROUTING 43 /* IPv6 routing header */ 123 #define IPPROTO_ROUTING 43 /* IPv6 routing header */
124 #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */ 124 #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */
125 #define IPPROTO_ICMPV6 58 /* ICMPv6 */ 125 #define IPPROTO_ICMPV6 58 /* ICMPv6 */
126 #define IPPROTO_NONE 59 /* IPv6 no next header */ 126 #define IPPROTO_NONE 59 /* IPv6 no next header */
127 #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ 127 #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */
128 #define IPPROTO_MH 135 /* IPv6 mobility header */ 128 #define IPPROTO_MH 135 /* IPv6 mobility header */
129 129
130 /* 130 /*
131 * IPv6 TLV options. 131 * IPv6 TLV options.
132 */ 132 */
133 #define IPV6_TLV_PAD1 0 133 #define IPV6_TLV_PAD1 0
134 #define IPV6_TLV_PADN 1 134 #define IPV6_TLV_PADN 1
135 #define IPV6_TLV_ROUTERALERT 5 135 #define IPV6_TLV_ROUTERALERT 5
136 #define IPV6_TLV_JUMBO 194 136 #define IPV6_TLV_JUMBO 194
137 #define IPV6_TLV_HAO 201 /* home address option */ 137 #define IPV6_TLV_HAO 201 /* home address option */
138 138
139 /* 139 /*
140 * IPV6 socket options 140 * IPV6 socket options
141 */ 141 */
142 142
143 #define IPV6_ADDRFORM 1 143 #define IPV6_ADDRFORM 1
144 #define IPV6_2292PKTINFO 2 144 #define IPV6_2292PKTINFO 2
145 #define IPV6_2292HOPOPTS 3 145 #define IPV6_2292HOPOPTS 3
146 #define IPV6_2292DSTOPTS 4 146 #define IPV6_2292DSTOPTS 4
147 #define IPV6_2292RTHDR 5 147 #define IPV6_2292RTHDR 5
148 #define IPV6_2292PKTOPTIONS 6 148 #define IPV6_2292PKTOPTIONS 6
149 #define IPV6_CHECKSUM 7 149 #define IPV6_CHECKSUM 7
150 #define IPV6_2292HOPLIMIT 8 150 #define IPV6_2292HOPLIMIT 8
151 #define IPV6_NEXTHOP 9 151 #define IPV6_NEXTHOP 9
152 #define IPV6_AUTHHDR 10 /* obsolete */ 152 #define IPV6_AUTHHDR 10 /* obsolete */
153 #define IPV6_FLOWINFO 11 153 #define IPV6_FLOWINFO 11
154 154
155 #define IPV6_UNICAST_HOPS 16 155 #define IPV6_UNICAST_HOPS 16
156 #define IPV6_MULTICAST_IF 17 156 #define IPV6_MULTICAST_IF 17
157 #define IPV6_MULTICAST_HOPS 18 157 #define IPV6_MULTICAST_HOPS 18
158 #define IPV6_MULTICAST_LOOP 19 158 #define IPV6_MULTICAST_LOOP 19
159 #define IPV6_ADD_MEMBERSHIP 20 159 #define IPV6_ADD_MEMBERSHIP 20
160 #define IPV6_DROP_MEMBERSHIP 21 160 #define IPV6_DROP_MEMBERSHIP 21
161 #define IPV6_ROUTER_ALERT 22 161 #define IPV6_ROUTER_ALERT 22
162 #define IPV6_MTU_DISCOVER 23 162 #define IPV6_MTU_DISCOVER 23
163 #define IPV6_MTU 24 163 #define IPV6_MTU 24
164 #define IPV6_RECVERR 25 164 #define IPV6_RECVERR 25
165 #define IPV6_V6ONLY 26 165 #define IPV6_V6ONLY 26
166 #define IPV6_JOIN_ANYCAST 27 166 #define IPV6_JOIN_ANYCAST 27
167 #define IPV6_LEAVE_ANYCAST 28 167 #define IPV6_LEAVE_ANYCAST 28
168 168
169 /* IPV6_MTU_DISCOVER values */ 169 /* IPV6_MTU_DISCOVER values */
170 #define IPV6_PMTUDISC_DONT 0 170 #define IPV6_PMTUDISC_DONT 0
171 #define IPV6_PMTUDISC_WANT 1 171 #define IPV6_PMTUDISC_WANT 1
172 #define IPV6_PMTUDISC_DO 2 172 #define IPV6_PMTUDISC_DO 2
173 #define IPV6_PMTUDISC_PROBE 3 173 #define IPV6_PMTUDISC_PROBE 3
174 174
175 /* Flowlabel */ 175 /* Flowlabel */
176 #define IPV6_FLOWLABEL_MGR 32 176 #define IPV6_FLOWLABEL_MGR 32
177 #define IPV6_FLOWINFO_SEND 33 177 #define IPV6_FLOWINFO_SEND 33
178 178
179 #define IPV6_IPSEC_POLICY 34 179 #define IPV6_IPSEC_POLICY 34
180 #define IPV6_XFRM_POLICY 35 180 #define IPV6_XFRM_POLICY 35
181 181
182 /* 182 /*
183 * Multicast: 183 * Multicast:
184 * Following socket options are shared between IPv4 and IPv6. 184 * Following socket options are shared between IPv4 and IPv6.
185 * 185 *
186 * MCAST_JOIN_GROUP 42 186 * MCAST_JOIN_GROUP 42
187 * MCAST_BLOCK_SOURCE 43 187 * MCAST_BLOCK_SOURCE 43
188 * MCAST_UNBLOCK_SOURCE 44 188 * MCAST_UNBLOCK_SOURCE 44
189 * MCAST_LEAVE_GROUP 45 189 * MCAST_LEAVE_GROUP 45
190 * MCAST_JOIN_SOURCE_GROUP 46 190 * MCAST_JOIN_SOURCE_GROUP 46
191 * MCAST_LEAVE_SOURCE_GROUP 47 191 * MCAST_LEAVE_SOURCE_GROUP 47
192 * MCAST_MSFILTER 48 192 * MCAST_MSFILTER 48
193 */ 193 */
194 194
195 /* 195 /*
196 * Advanced API (RFC3542) (1) 196 * Advanced API (RFC3542) (1)
197 * 197 *
198 * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c. 198 * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c.
199 */ 199 */
200 200
201 #define IPV6_RECVPKTINFO 49 201 #define IPV6_RECVPKTINFO 49
202 #define IPV6_PKTINFO 50 202 #define IPV6_PKTINFO 50
203 #define IPV6_RECVHOPLIMIT 51 203 #define IPV6_RECVHOPLIMIT 51
204 #define IPV6_HOPLIMIT 52 204 #define IPV6_HOPLIMIT 52
205 #define IPV6_RECVHOPOPTS 53 205 #define IPV6_RECVHOPOPTS 53
206 #define IPV6_HOPOPTS 54 206 #define IPV6_HOPOPTS 54
207 #define IPV6_RTHDRDSTOPTS 55 207 #define IPV6_RTHDRDSTOPTS 55
208 #define IPV6_RECVRTHDR 56 208 #define IPV6_RECVRTHDR 56
209 #define IPV6_RTHDR 57 209 #define IPV6_RTHDR 57
210 #define IPV6_RECVDSTOPTS 58 210 #define IPV6_RECVDSTOPTS 58
211 #define IPV6_DSTOPTS 59 211 #define IPV6_DSTOPTS 59
212 #define IPV6_RECVPATHMTU 60 212 #define IPV6_RECVPATHMTU 60
213 #define IPV6_PATHMTU 61 213 #define IPV6_PATHMTU 61
214 #define IPV6_DONTFRAG 62 214 #define IPV6_DONTFRAG 62
215 #if 0 /* not yet */ 215 #if 0 /* not yet */
216 #define IPV6_USE_MIN_MTU 63 216 #define IPV6_USE_MIN_MTU 63
217 #endif 217 #endif
218 218
219 /* 219 /*
220 * Netfilter (1) 220 * Netfilter (1)
221 * 221 *
222 * Following socket options are used in ip6_tables; 222 * Following socket options are used in ip6_tables;
223 * see include/linux/netfilter_ipv6/ip6_tables.h. 223 * see include/linux/netfilter_ipv6/ip6_tables.h.
224 * 224 *
225 * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 225 * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64
226 * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 226 * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65
227 */ 227 */
228 228
229 /* 229 /*
230 * Advanced API (RFC3542) (2) 230 * Advanced API (RFC3542) (2)
231 */ 231 */
232 #define IPV6_RECVTCLASS 66 232 #define IPV6_RECVTCLASS 66
233 #define IPV6_TCLASS 67 233 #define IPV6_TCLASS 67
234 234
235 /* 235 /*
236 * Netfilter (2) 236 * Netfilter (2)
237 * 237 *
238 * Following socket options are used in ip6_tables; 238 * Following socket options are used in ip6_tables;
239 * see include/linux/netfilter_ipv6/ip6_tables.h. 239 * see include/linux/netfilter_ipv6/ip6_tables.h.
240 * 240 *
241 * IP6T_SO_GET_REVISION_MATCH 68 241 * IP6T_SO_GET_REVISION_MATCH 68
242 * IP6T_SO_GET_REVISION_TARGET 69 242 * IP6T_SO_GET_REVISION_TARGET 69
243 * IP6T_SO_ORIGINAL_DST 80
243 */ 244 */
244 245
245 /* RFC5014: Source address selection */ 246 /* RFC5014: Source address selection */
246 #define IPV6_ADDR_PREFERENCES 72 247 #define IPV6_ADDR_PREFERENCES 72
247 248
248 #define IPV6_PREFER_SRC_TMP 0x0001 249 #define IPV6_PREFER_SRC_TMP 0x0001
249 #define IPV6_PREFER_SRC_PUBLIC 0x0002 250 #define IPV6_PREFER_SRC_PUBLIC 0x0002
250 #define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100 251 #define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100
251 #define IPV6_PREFER_SRC_COA 0x0004 252 #define IPV6_PREFER_SRC_COA 0x0004
252 #define IPV6_PREFER_SRC_HOME 0x0400 253 #define IPV6_PREFER_SRC_HOME 0x0400
253 #define IPV6_PREFER_SRC_CGA 0x0008 254 #define IPV6_PREFER_SRC_CGA 0x0008
254 #define IPV6_PREFER_SRC_NONCGA 0x0800 255 #define IPV6_PREFER_SRC_NONCGA 0x0800
255 256
256 /* RFC5082: Generalized Ttl Security Mechanism */ 257 /* RFC5082: Generalized Ttl Security Mechanism */
257 #define IPV6_MINHOPCOUNT 73 258 #define IPV6_MINHOPCOUNT 73
258 259
259 #define IPV6_ORIGDSTADDR 74 260 #define IPV6_ORIGDSTADDR 74
260 #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR 261 #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR
261 #define IPV6_TRANSPARENT 75 262 #define IPV6_TRANSPARENT 75
262 #define IPV6_UNICAST_IF 76 263 #define IPV6_UNICAST_IF 76
263 264
264 /* 265 /*
265 * Multicast Routing: 266 * Multicast Routing:
266 * see include/linux/mroute6.h. 267 * see include/linux/mroute6.h.
267 * 268 *
268 * MRT6_INIT 200 269 * MRT6_INIT 200
269 * MRT6_DONE 201 270 * MRT6_DONE 201
270 * MRT6_ADD_MIF 202 271 * MRT6_ADD_MIF 202
271 * MRT6_DEL_MIF 203 272 * MRT6_DEL_MIF 203
272 * MRT6_ADD_MFC 204 273 * MRT6_ADD_MFC 204
273 * MRT6_DEL_MFC 205 274 * MRT6_DEL_MFC 205
274 * MRT6_VERSION 206 275 * MRT6_VERSION 206
275 * MRT6_ASSERT 207 276 * MRT6_ASSERT 207
276 * MRT6_PIM 208 277 * MRT6_PIM 208
277 * (reserved) 209 278 * (reserved) 209
278 */ 279 */
279 #endif /* _UAPI_LINUX_IN6_H */ 280 #endif /* _UAPI_LINUX_IN6_H */
280 281
include/uapi/linux/netfilter_ipv6/ip6_tables.h
1 /* 1 /*
2 * 25-Jul-1998 Major changes to allow for ip chain table 2 * 25-Jul-1998 Major changes to allow for ip chain table
3 * 3 *
4 * 3-Jan-2000 Named tables to allow packet selection for different uses. 4 * 3-Jan-2000 Named tables to allow packet selection for different uses.
5 */ 5 */
6 6
7 /* 7 /*
8 * Format of an IP6 firewall descriptor 8 * Format of an IP6 firewall descriptor
9 * 9 *
10 * src, dst, src_mask, dst_mask are always stored in network byte order. 10 * src, dst, src_mask, dst_mask are always stored in network byte order.
11 * flags are stored in host byte order (of course). 11 * flags are stored in host byte order (of course).
12 * Port numbers are stored in HOST byte order. 12 * Port numbers are stored in HOST byte order.
13 */ 13 */
14 14
15 #ifndef _UAPI_IP6_TABLES_H 15 #ifndef _UAPI_IP6_TABLES_H
16 #define _UAPI_IP6_TABLES_H 16 #define _UAPI_IP6_TABLES_H
17 17
18 #include <linux/types.h> 18 #include <linux/types.h>
19 #include <linux/compiler.h> 19 #include <linux/compiler.h>
20 #include <linux/netfilter_ipv6.h> 20 #include <linux/netfilter_ipv6.h>
21 21
22 #include <linux/netfilter/x_tables.h> 22 #include <linux/netfilter/x_tables.h>
23 23
24 #ifndef __KERNEL__ 24 #ifndef __KERNEL__
25 #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN 25 #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN
26 #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN 26 #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN
27 #define ip6t_match xt_match 27 #define ip6t_match xt_match
28 #define ip6t_target xt_target 28 #define ip6t_target xt_target
29 #define ip6t_table xt_table 29 #define ip6t_table xt_table
30 #define ip6t_get_revision xt_get_revision 30 #define ip6t_get_revision xt_get_revision
31 #define ip6t_entry_match xt_entry_match 31 #define ip6t_entry_match xt_entry_match
32 #define ip6t_entry_target xt_entry_target 32 #define ip6t_entry_target xt_entry_target
33 #define ip6t_standard_target xt_standard_target 33 #define ip6t_standard_target xt_standard_target
34 #define ip6t_error_target xt_error_target 34 #define ip6t_error_target xt_error_target
35 #define ip6t_counters xt_counters 35 #define ip6t_counters xt_counters
36 #define IP6T_CONTINUE XT_CONTINUE 36 #define IP6T_CONTINUE XT_CONTINUE
37 #define IP6T_RETURN XT_RETURN 37 #define IP6T_RETURN XT_RETURN
38 38
39 /* Pre-iptables-1.4.0 */ 39 /* Pre-iptables-1.4.0 */
40 #include <linux/netfilter/xt_tcpudp.h> 40 #include <linux/netfilter/xt_tcpudp.h>
41 #define ip6t_tcp xt_tcp 41 #define ip6t_tcp xt_tcp
42 #define ip6t_udp xt_udp 42 #define ip6t_udp xt_udp
43 #define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT 43 #define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT
44 #define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT 44 #define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT
45 #define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS 45 #define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS
46 #define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION 46 #define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION
47 #define IP6T_TCP_INV_MASK XT_TCP_INV_MASK 47 #define IP6T_TCP_INV_MASK XT_TCP_INV_MASK
48 #define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT 48 #define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT
49 #define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT 49 #define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT
50 #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK 50 #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK
51 51
52 #define ip6t_counters_info xt_counters_info 52 #define ip6t_counters_info xt_counters_info
53 #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET 53 #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET
54 #define IP6T_ERROR_TARGET XT_ERROR_TARGET 54 #define IP6T_ERROR_TARGET XT_ERROR_TARGET
55 #define IP6T_MATCH_ITERATE(e, fn, args...) \ 55 #define IP6T_MATCH_ITERATE(e, fn, args...) \
56 XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) 56 XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args)
57 #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ 57 #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
58 XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) 58 XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args)
59 #endif 59 #endif
60 60
61 /* Yes, Virginia, you have to zero the padding. */ 61 /* Yes, Virginia, you have to zero the padding. */
62 struct ip6t_ip6 { 62 struct ip6t_ip6 {
63 /* Source and destination IP6 addr */ 63 /* Source and destination IP6 addr */
64 struct in6_addr src, dst; 64 struct in6_addr src, dst;
65 /* Mask for src and dest IP6 addr */ 65 /* Mask for src and dest IP6 addr */
66 struct in6_addr smsk, dmsk; 66 struct in6_addr smsk, dmsk;
67 char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; 67 char iniface[IFNAMSIZ], outiface[IFNAMSIZ];
68 unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; 68 unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ];
69 69
70 /* Upper protocol number 70 /* Upper protocol number
71 * - The allowed value is 0 (any) or protocol number of last parsable 71 * - The allowed value is 0 (any) or protocol number of last parsable
72 * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or 72 * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or
73 * the non IPv6 extension headers. 73 * the non IPv6 extension headers.
74 * - The protocol numbers of IPv6 extension headers except of ESP and 74 * - The protocol numbers of IPv6 extension headers except of ESP and
75 * MH do not match any packets. 75 * MH do not match any packets.
76 * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. 76 * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol.
77 */ 77 */
78 __u16 proto; 78 __u16 proto;
79 /* TOS to match iff flags & IP6T_F_TOS */ 79 /* TOS to match iff flags & IP6T_F_TOS */
80 __u8 tos; 80 __u8 tos;
81 81
82 /* Flags word */ 82 /* Flags word */
83 __u8 flags; 83 __u8 flags;
84 /* Inverse flags */ 84 /* Inverse flags */
85 __u8 invflags; 85 __u8 invflags;
86 }; 86 };
87 87
88 /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ 88 /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */
89 #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper 89 #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper
90 protocols */ 90 protocols */
91 #define IP6T_F_TOS 0x02 /* Match the TOS. */ 91 #define IP6T_F_TOS 0x02 /* Match the TOS. */
92 #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ 92 #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */
93 #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ 93 #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */
94 94
95 /* Values for "inv" field in struct ip6t_ip6. */ 95 /* Values for "inv" field in struct ip6t_ip6. */
96 #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ 96 #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */
97 #define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ 97 #define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */
98 #define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ 98 #define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */
99 #define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ 99 #define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */
100 #define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ 100 #define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */
101 #define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ 101 #define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */
102 #define IP6T_INV_PROTO XT_INV_PROTO 102 #define IP6T_INV_PROTO XT_INV_PROTO
103 #define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ 103 #define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */
104 104
105 /* This structure defines each of the firewall rules. Consists of 3 105 /* This structure defines each of the firewall rules. Consists of 3
106 parts which are 1) general IP header stuff 2) match specific 106 parts which are 1) general IP header stuff 2) match specific
107 stuff 3) the target to perform if the rule matches */ 107 stuff 3) the target to perform if the rule matches */
108 struct ip6t_entry { 108 struct ip6t_entry {
109 struct ip6t_ip6 ipv6; 109 struct ip6t_ip6 ipv6;
110 110
111 /* Mark with fields that we care about. */ 111 /* Mark with fields that we care about. */
112 unsigned int nfcache; 112 unsigned int nfcache;
113 113
114 /* Size of ipt_entry + matches */ 114 /* Size of ipt_entry + matches */
115 __u16 target_offset; 115 __u16 target_offset;
116 /* Size of ipt_entry + matches + target */ 116 /* Size of ipt_entry + matches + target */
117 __u16 next_offset; 117 __u16 next_offset;
118 118
119 /* Back pointer */ 119 /* Back pointer */
120 unsigned int comefrom; 120 unsigned int comefrom;
121 121
122 /* Packet and byte counters. */ 122 /* Packet and byte counters. */
123 struct xt_counters counters; 123 struct xt_counters counters;
124 124
125 /* The matches (if any), then the target. */ 125 /* The matches (if any), then the target. */
126 unsigned char elems[0]; 126 unsigned char elems[0];
127 }; 127 };
128 128
129 /* Standard entry */ 129 /* Standard entry */
130 struct ip6t_standard { 130 struct ip6t_standard {
131 struct ip6t_entry entry; 131 struct ip6t_entry entry;
132 struct xt_standard_target target; 132 struct xt_standard_target target;
133 }; 133 };
134 134
135 struct ip6t_error { 135 struct ip6t_error {
136 struct ip6t_entry entry; 136 struct ip6t_entry entry;
137 struct xt_error_target target; 137 struct xt_error_target target;
138 }; 138 };
139 139
140 #define IP6T_ENTRY_INIT(__size) \ 140 #define IP6T_ENTRY_INIT(__size) \
141 { \ 141 { \
142 .target_offset = sizeof(struct ip6t_entry), \ 142 .target_offset = sizeof(struct ip6t_entry), \
143 .next_offset = (__size), \ 143 .next_offset = (__size), \
144 } 144 }
145 145
146 #define IP6T_STANDARD_INIT(__verdict) \ 146 #define IP6T_STANDARD_INIT(__verdict) \
147 { \ 147 { \
148 .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ 148 .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \
149 .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ 149 .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \
150 sizeof(struct xt_standard_target)), \ 150 sizeof(struct xt_standard_target)), \
151 .target.verdict = -(__verdict) - 1, \ 151 .target.verdict = -(__verdict) - 1, \
152 } 152 }
153 153
154 #define IP6T_ERROR_INIT \ 154 #define IP6T_ERROR_INIT \
155 { \ 155 { \
156 .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ 156 .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \
157 .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ 157 .target = XT_TARGET_INIT(XT_ERROR_TARGET, \
158 sizeof(struct xt_error_target)), \ 158 sizeof(struct xt_error_target)), \
159 .target.errorname = "ERROR", \ 159 .target.errorname = "ERROR", \
160 } 160 }
161 161
162 /* 162 /*
163 * New IP firewall options for [gs]etsockopt at the RAW IP level. 163 * New IP firewall options for [gs]etsockopt at the RAW IP level.
164 * Unlike BSD Linux inherits IP options so you don't have to use 164 * Unlike BSD Linux inherits IP options so you don't have to use
165 * a raw socket for this. Instead we check rights in the calls. 165 * a raw socket for this. Instead we check rights in the calls.
166 * 166 *
167 * ATTENTION: check linux/in6.h before adding new number here. 167 * ATTENTION: check linux/in6.h before adding new number here.
168 */ 168 */
169 #define IP6T_BASE_CTL 64 169 #define IP6T_BASE_CTL 64
170 170
171 #define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) 171 #define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL)
172 #define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) 172 #define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1)
173 #define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS 173 #define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS
174 174
175 #define IP6T_SO_GET_INFO (IP6T_BASE_CTL) 175 #define IP6T_SO_GET_INFO (IP6T_BASE_CTL)
176 #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) 176 #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1)
177 #define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) 177 #define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4)
178 #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) 178 #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5)
179 #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET 179 #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET
180 180
181 /* obtain original address if REDIRECT'd connection */
182 #define IP6T_SO_ORIGINAL_DST 80
183
181 /* ICMP matching stuff */ 184 /* ICMP matching stuff */
182 struct ip6t_icmp { 185 struct ip6t_icmp {
183 __u8 type; /* type to match */ 186 __u8 type; /* type to match */
184 __u8 code[2]; /* range of code */ 187 __u8 code[2]; /* range of code */
185 __u8 invflags; /* Inverse flags */ 188 __u8 invflags; /* Inverse flags */
186 }; 189 };
187 190
188 /* Values for "inv" field for struct ipt_icmp. */ 191 /* Values for "inv" field for struct ipt_icmp. */
189 #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ 192 #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */
190 193
191 /* The argument to IP6T_SO_GET_INFO */ 194 /* The argument to IP6T_SO_GET_INFO */
192 struct ip6t_getinfo { 195 struct ip6t_getinfo {
193 /* Which table: caller fills this in. */ 196 /* Which table: caller fills this in. */
194 char name[XT_TABLE_MAXNAMELEN]; 197 char name[XT_TABLE_MAXNAMELEN];
195 198
196 /* Kernel fills these in. */ 199 /* Kernel fills these in. */
197 /* Which hook entry points are valid: bitmask */ 200 /* Which hook entry points are valid: bitmask */
198 unsigned int valid_hooks; 201 unsigned int valid_hooks;
199 202
200 /* Hook entry points: one per netfilter hook. */ 203 /* Hook entry points: one per netfilter hook. */
201 unsigned int hook_entry[NF_INET_NUMHOOKS]; 204 unsigned int hook_entry[NF_INET_NUMHOOKS];
202 205
203 /* Underflow points. */ 206 /* Underflow points. */
204 unsigned int underflow[NF_INET_NUMHOOKS]; 207 unsigned int underflow[NF_INET_NUMHOOKS];
205 208
206 /* Number of entries */ 209 /* Number of entries */
207 unsigned int num_entries; 210 unsigned int num_entries;
208 211
209 /* Size of entries. */ 212 /* Size of entries. */
210 unsigned int size; 213 unsigned int size;
211 }; 214 };
212 215
213 /* The argument to IP6T_SO_SET_REPLACE. */ 216 /* The argument to IP6T_SO_SET_REPLACE. */
214 struct ip6t_replace { 217 struct ip6t_replace {
215 /* Which table. */ 218 /* Which table. */
216 char name[XT_TABLE_MAXNAMELEN]; 219 char name[XT_TABLE_MAXNAMELEN];
217 220
218 /* Which hook entry points are valid: bitmask. You can't 221 /* Which hook entry points are valid: bitmask. You can't
219 change this. */ 222 change this. */
220 unsigned int valid_hooks; 223 unsigned int valid_hooks;
221 224
222 /* Number of entries */ 225 /* Number of entries */
223 unsigned int num_entries; 226 unsigned int num_entries;
224 227
225 /* Total size of new entries */ 228 /* Total size of new entries */
226 unsigned int size; 229 unsigned int size;
227 230
228 /* Hook entry points. */ 231 /* Hook entry points. */
229 unsigned int hook_entry[NF_INET_NUMHOOKS]; 232 unsigned int hook_entry[NF_INET_NUMHOOKS];
230 233
231 /* Underflow points. */ 234 /* Underflow points. */
232 unsigned int underflow[NF_INET_NUMHOOKS]; 235 unsigned int underflow[NF_INET_NUMHOOKS];
233 236
234 /* Information about old entries: */ 237 /* Information about old entries: */
235 /* Number of counters (must be equal to current number of entries). */ 238 /* Number of counters (must be equal to current number of entries). */
236 unsigned int num_counters; 239 unsigned int num_counters;
237 /* The old entries' counters. */ 240 /* The old entries' counters. */
238 struct xt_counters __user *counters; 241 struct xt_counters __user *counters;
239 242
240 /* The entries (hang off end: not really an array). */ 243 /* The entries (hang off end: not really an array). */
241 struct ip6t_entry entries[0]; 244 struct ip6t_entry entries[0];
242 }; 245 };
243 246
244 /* The argument to IP6T_SO_GET_ENTRIES. */ 247 /* The argument to IP6T_SO_GET_ENTRIES. */
245 struct ip6t_get_entries { 248 struct ip6t_get_entries {
246 /* Which table: user fills this in. */ 249 /* Which table: user fills this in. */
247 char name[XT_TABLE_MAXNAMELEN]; 250 char name[XT_TABLE_MAXNAMELEN];
248 251
249 /* User fills this in: total entry size. */ 252 /* User fills this in: total entry size. */
250 unsigned int size; 253 unsigned int size;
251 254
252 /* The entries. */ 255 /* The entries. */
253 struct ip6t_entry entrytable[0]; 256 struct ip6t_entry entrytable[0];
254 }; 257 };
255 258
256 /* Helper functions */ 259 /* Helper functions */
257 static __inline__ struct xt_entry_target * 260 static __inline__ struct xt_entry_target *
258 ip6t_get_target(struct ip6t_entry *e) 261 ip6t_get_target(struct ip6t_entry *e)
259 { 262 {
260 return (void *)e + e->target_offset; 263 return (void *)e + e->target_offset;
261 } 264 }
262 265
263 /* 266 /*
264 * Main firewall chains definitions and global var's definitions. 267 * Main firewall chains definitions and global var's definitions.
265 */ 268 */
266 269
267 #endif /* _UAPI_IP6_TABLES_H */ 270 #endif /* _UAPI_IP6_TABLES_H */
268 271
net/ipv4/netfilter/iptable_nat.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2011 Patrick McHardy <kaber@trash.net> 3 * (C) 2011 Patrick McHardy <kaber@trash.net>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <linux/netfilter.h> 11 #include <linux/netfilter.h>
12 #include <linux/netfilter_ipv4.h> 12 #include <linux/netfilter_ipv4.h>
13 #include <linux/netfilter_ipv4/ip_tables.h> 13 #include <linux/netfilter_ipv4/ip_tables.h>
14 #include <linux/ip.h> 14 #include <linux/ip.h>
15 #include <net/ip.h> 15 #include <net/ip.h>
16 16
17 #include <net/netfilter/nf_nat.h> 17 #include <net/netfilter/nf_nat.h>
18 #include <net/netfilter/nf_nat_core.h> 18 #include <net/netfilter/nf_nat_core.h>
19 #include <net/netfilter/nf_nat_l3proto.h> 19 #include <net/netfilter/nf_nat_l3proto.h>
20 20
21 static const struct xt_table nf_nat_ipv4_table = { 21 static const struct xt_table nf_nat_ipv4_table = {
22 .name = "nat", 22 .name = "nat",
23 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 23 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
24 (1 << NF_INET_POST_ROUTING) | 24 (1 << NF_INET_POST_ROUTING) |
25 (1 << NF_INET_LOCAL_OUT) | 25 (1 << NF_INET_LOCAL_OUT) |
26 (1 << NF_INET_LOCAL_IN), 26 (1 << NF_INET_LOCAL_IN),
27 .me = THIS_MODULE, 27 .me = THIS_MODULE,
28 .af = NFPROTO_IPV4, 28 .af = NFPROTO_IPV4,
29 }; 29 };
30 30
31 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 31 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
32 { 32 {
33 /* Force range to this IP; let proto decide mapping for 33 /* Force range to this IP; let proto decide mapping for
34 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 34 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
35 */ 35 */
36 struct nf_nat_range range; 36 struct nf_nat_range range;
37 37
38 range.flags = 0; 38 range.flags = 0;
39 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, 39 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
40 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? 40 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
41 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : 41 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
42 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 42 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
43 43
44 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 44 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
45 } 45 }
46 46
47 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, 47 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
48 const struct net_device *in, 48 const struct net_device *in,
49 const struct net_device *out, 49 const struct net_device *out,
50 struct nf_conn *ct) 50 struct nf_conn *ct)
51 { 51 {
52 struct net *net = nf_ct_net(ct); 52 struct net *net = nf_ct_net(ct);
53 unsigned int ret; 53 unsigned int ret;
54 54
55 ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); 55 ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
56 if (ret == NF_ACCEPT) { 56 if (ret == NF_ACCEPT) {
57 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) 57 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
58 ret = alloc_null_binding(ct, hooknum); 58 ret = alloc_null_binding(ct, hooknum);
59 } 59 }
60 return ret; 60 return ret;
61 } 61 }
62 62
63 static unsigned int 63 static unsigned int
64 nf_nat_ipv4_fn(unsigned int hooknum, 64 nf_nat_ipv4_fn(unsigned int hooknum,
65 struct sk_buff *skb, 65 struct sk_buff *skb,
66 const struct net_device *in, 66 const struct net_device *in,
67 const struct net_device *out, 67 const struct net_device *out,
68 int (*okfn)(struct sk_buff *)) 68 int (*okfn)(struct sk_buff *))
69 { 69 {
70 struct nf_conn *ct; 70 struct nf_conn *ct;
71 enum ip_conntrack_info ctinfo; 71 enum ip_conntrack_info ctinfo;
72 struct nf_conn_nat *nat; 72 struct nf_conn_nat *nat;
73 /* maniptype == SRC for postrouting. */ 73 /* maniptype == SRC for postrouting. */
74 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 74 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
75 75
76 /* We never see fragments: conntrack defrags on pre-routing 76 /* We never see fragments: conntrack defrags on pre-routing
77 * and local-out, and nf_nat_out protects post-routing. 77 * and local-out, and nf_nat_out protects post-routing.
78 */ 78 */
79 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); 79 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
80 80
81 ct = nf_ct_get(skb, &ctinfo); 81 ct = nf_ct_get(skb, &ctinfo);
82 /* Can't track? It's not due to stress, or conntrack would 82 /* Can't track? It's not due to stress, or conntrack would
83 * have dropped it. Hence it's the user's responsibilty to 83 * have dropped it. Hence it's the user's responsibilty to
84 * packet filter it out, or implement conntrack/NAT for that 84 * packet filter it out, or implement conntrack/NAT for that
85 * protocol. 8) --RR 85 * protocol. 8) --RR
86 */ 86 */
87 if (!ct) 87 if (!ct)
88 return NF_ACCEPT; 88 return NF_ACCEPT;
89 89
90 /* Don't try to NAT if this packet is not conntracked */ 90 /* Don't try to NAT if this packet is not conntracked */
91 if (nf_ct_is_untracked(ct)) 91 if (nf_ct_is_untracked(ct))
92 return NF_ACCEPT; 92 return NF_ACCEPT;
93 93
94 nat = nfct_nat(ct); 94 nat = nfct_nat(ct);
95 if (!nat) { 95 if (!nat) {
96 /* NAT module was loaded late. */ 96 /* NAT module was loaded late. */
97 if (nf_ct_is_confirmed(ct)) 97 if (nf_ct_is_confirmed(ct))
98 return NF_ACCEPT; 98 return NF_ACCEPT;
99 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 99 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
100 if (nat == NULL) { 100 if (nat == NULL) {
101 pr_debug("failed to add NAT extension\n"); 101 pr_debug("failed to add NAT extension\n");
102 return NF_ACCEPT; 102 return NF_ACCEPT;
103 } 103 }
104 } 104 }
105 105
106 switch (ctinfo) { 106 switch (ctinfo) {
107 case IP_CT_RELATED: 107 case IP_CT_RELATED:
108 case IP_CT_RELATED_REPLY: 108 case IP_CT_RELATED_REPLY:
109 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 109 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
110 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 110 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111 hooknum)) 111 hooknum))
112 return NF_DROP; 112 return NF_DROP;
113 else 113 else
114 return NF_ACCEPT; 114 return NF_ACCEPT;
115 } 115 }
116 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 116 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
117 case IP_CT_NEW: 117 case IP_CT_NEW:
118 /* Seen it before? This can happen for loopback, retrans, 118 /* Seen it before? This can happen for loopback, retrans,
119 * or local packets. 119 * or local packets.
120 */ 120 */
121 if (!nf_nat_initialized(ct, maniptype)) { 121 if (!nf_nat_initialized(ct, maniptype)) {
122 unsigned int ret; 122 unsigned int ret;
123 123
124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct); 124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125 if (ret != NF_ACCEPT) 125 if (ret != NF_ACCEPT)
126 return ret; 126 return ret;
127 } else 127 } else
128 pr_debug("Already setup manip %s for ct %p\n", 128 pr_debug("Already setup manip %s for ct %p\n",
129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 ct); 130 ct);
131 break; 131 break;
132 132
133 default: 133 default:
134 /* ESTABLISHED */ 134 /* ESTABLISHED */
135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 135 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 ctinfo == IP_CT_ESTABLISHED_REPLY); 136 ctinfo == IP_CT_ESTABLISHED_REPLY);
137 } 137 }
138 138
139 return nf_nat_packet(ct, ctinfo, hooknum, skb); 139 return nf_nat_packet(ct, ctinfo, hooknum, skb);
140 } 140 }
141 141
142 static unsigned int 142 static unsigned int
143 nf_nat_ipv4_in(unsigned int hooknum, 143 nf_nat_ipv4_in(unsigned int hooknum,
144 struct sk_buff *skb, 144 struct sk_buff *skb,
145 const struct net_device *in, 145 const struct net_device *in,
146 const struct net_device *out, 146 const struct net_device *out,
147 int (*okfn)(struct sk_buff *)) 147 int (*okfn)(struct sk_buff *))
148 { 148 {
149 unsigned int ret; 149 unsigned int ret;
150 __be32 daddr = ip_hdr(skb)->daddr; 150 __be32 daddr = ip_hdr(skb)->daddr;
151 151
152 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 152 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
153 if (ret != NF_DROP && ret != NF_STOLEN && 153 if (ret != NF_DROP && ret != NF_STOLEN &&
154 daddr != ip_hdr(skb)->daddr) 154 daddr != ip_hdr(skb)->daddr)
155 skb_dst_drop(skb); 155 skb_dst_drop(skb);
156 156
157 return ret; 157 return ret;
158 } 158 }
159 159
160 static unsigned int 160 static unsigned int
161 nf_nat_ipv4_out(unsigned int hooknum, 161 nf_nat_ipv4_out(unsigned int hooknum,
162 struct sk_buff *skb, 162 struct sk_buff *skb,
163 const struct net_device *in, 163 const struct net_device *in,
164 const struct net_device *out, 164 const struct net_device *out,
165 int (*okfn)(struct sk_buff *)) 165 int (*okfn)(struct sk_buff *))
166 { 166 {
167 #ifdef CONFIG_XFRM 167 #ifdef CONFIG_XFRM
168 const struct nf_conn *ct; 168 const struct nf_conn *ct;
169 enum ip_conntrack_info ctinfo; 169 enum ip_conntrack_info ctinfo;
170 #endif 170 #endif
171 unsigned int ret; 171 unsigned int ret;
172 172
173 /* root is playing with raw sockets. */ 173 /* root is playing with raw sockets. */
174 if (skb->len < sizeof(struct iphdr) || 174 if (skb->len < sizeof(struct iphdr) ||
175 ip_hdrlen(skb) < sizeof(struct iphdr)) 175 ip_hdrlen(skb) < sizeof(struct iphdr))
176 return NF_ACCEPT; 176 return NF_ACCEPT;
177 177
178 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 178 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
179 #ifdef CONFIG_XFRM 179 #ifdef CONFIG_XFRM
180 if (ret != NF_DROP && ret != NF_STOLEN && 180 if (ret != NF_DROP && ret != NF_STOLEN &&
181 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 181 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
182 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 182 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
183 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 183 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
184 184
185 if ((ct->tuplehash[dir].tuple.src.u3.ip != 185 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
186 ct->tuplehash[!dir].tuple.dst.u3.ip) || 186 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
187 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 187 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
188 ct->tuplehash[dir].tuple.src.u.all != 188 ct->tuplehash[dir].tuple.src.u.all !=
189 ct->tuplehash[!dir].tuple.dst.u.all)) 189 ct->tuplehash[!dir].tuple.dst.u.all))
190 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 190 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
191 ret = NF_DROP; 191 ret = NF_DROP;
192 } 192 }
193 #endif 193 #endif
194 return ret; 194 return ret;
195 } 195 }
196 196
197 static unsigned int 197 static unsigned int
198 nf_nat_ipv4_local_fn(unsigned int hooknum, 198 nf_nat_ipv4_local_fn(unsigned int hooknum,
199 struct sk_buff *skb, 199 struct sk_buff *skb,
200 const struct net_device *in, 200 const struct net_device *in,
201 const struct net_device *out, 201 const struct net_device *out,
202 int (*okfn)(struct sk_buff *)) 202 int (*okfn)(struct sk_buff *))
203 { 203 {
204 const struct nf_conn *ct; 204 const struct nf_conn *ct;
205 enum ip_conntrack_info ctinfo; 205 enum ip_conntrack_info ctinfo;
206 unsigned int ret; 206 unsigned int ret;
207 207
208 /* root is playing with raw sockets. */ 208 /* root is playing with raw sockets. */
209 if (skb->len < sizeof(struct iphdr) || 209 if (skb->len < sizeof(struct iphdr) ||
210 ip_hdrlen(skb) < sizeof(struct iphdr)) 210 ip_hdrlen(skb) < sizeof(struct iphdr))
211 return NF_ACCEPT; 211 return NF_ACCEPT;
212 212
213 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 213 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
214 if (ret != NF_DROP && ret != NF_STOLEN && 214 if (ret != NF_DROP && ret != NF_STOLEN &&
215 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 215 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
216 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 216 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
217 217
218 if (ct->tuplehash[dir].tuple.dst.u3.ip != 218 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
219 ct->tuplehash[!dir].tuple.src.u3.ip) { 219 ct->tuplehash[!dir].tuple.src.u3.ip) {
220 if (ip_route_me_harder(skb, RTN_UNSPEC)) 220 if (ip_route_me_harder(skb, RTN_UNSPEC))
221 ret = NF_DROP; 221 ret = NF_DROP;
222 } 222 }
223 #ifdef CONFIG_XFRM 223 #ifdef CONFIG_XFRM
224 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 224 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
225 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 225 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
226 ct->tuplehash[dir].tuple.dst.u.all != 226 ct->tuplehash[dir].tuple.dst.u.all !=
227 ct->tuplehash[!dir].tuple.src.u.all) 227 ct->tuplehash[!dir].tuple.src.u.all)
228 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 228 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
229 ret = NF_DROP; 229 ret = NF_DROP;
230 #endif 230 #endif
231 } 231 }
232 return ret; 232 return ret;
233 } 233 }
234 234
235 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { 235 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
236 /* Before packet filtering, change destination */ 236 /* Before packet filtering, change destination */
237 { 237 {
238 .hook = nf_nat_ipv4_in, 238 .hook = nf_nat_ipv4_in,
239 .owner = THIS_MODULE, 239 .owner = THIS_MODULE,
240 .pf = NFPROTO_IPV4, 240 .pf = NFPROTO_IPV4,
241 .hooknum = NF_INET_PRE_ROUTING, 241 .hooknum = NF_INET_PRE_ROUTING,
242 .priority = NF_IP_PRI_NAT_DST, 242 .priority = NF_IP_PRI_NAT_DST,
243 }, 243 },
244 /* After packet filtering, change source */ 244 /* After packet filtering, change source */
245 { 245 {
246 .hook = nf_nat_ipv4_out, 246 .hook = nf_nat_ipv4_out,
247 .owner = THIS_MODULE, 247 .owner = THIS_MODULE,
248 .pf = NFPROTO_IPV4, 248 .pf = NFPROTO_IPV4,
249 .hooknum = NF_INET_POST_ROUTING, 249 .hooknum = NF_INET_POST_ROUTING,
250 .priority = NF_IP_PRI_NAT_SRC, 250 .priority = NF_IP_PRI_NAT_SRC,
251 }, 251 },
252 /* Before packet filtering, change destination */ 252 /* Before packet filtering, change destination */
253 { 253 {
254 .hook = nf_nat_ipv4_local_fn, 254 .hook = nf_nat_ipv4_local_fn,
255 .owner = THIS_MODULE, 255 .owner = THIS_MODULE,
256 .pf = NFPROTO_IPV4, 256 .pf = NFPROTO_IPV4,
257 .hooknum = NF_INET_LOCAL_OUT, 257 .hooknum = NF_INET_LOCAL_OUT,
258 .priority = NF_IP_PRI_NAT_DST, 258 .priority = NF_IP_PRI_NAT_DST,
259 }, 259 },
260 /* After packet filtering, change source */ 260 /* After packet filtering, change source */
261 { 261 {
262 .hook = nf_nat_ipv4_fn, 262 .hook = nf_nat_ipv4_fn,
263 .owner = THIS_MODULE, 263 .owner = THIS_MODULE,
264 .pf = NFPROTO_IPV4, 264 .pf = NFPROTO_IPV4,
265 .hooknum = NF_INET_LOCAL_IN, 265 .hooknum = NF_INET_LOCAL_IN,
266 .priority = NF_IP_PRI_NAT_SRC, 266 .priority = NF_IP_PRI_NAT_SRC,
267 }, 267 },
268 }; 268 };
269 269
270 static int __net_init iptable_nat_net_init(struct net *net) 270 static int __net_init iptable_nat_net_init(struct net *net)
271 { 271 {
272 struct ipt_replace *repl; 272 struct ipt_replace *repl;
273 273
274 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); 274 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
275 if (repl == NULL) 275 if (repl == NULL)
276 return -ENOMEM; 276 return -ENOMEM;
277 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 277 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
278 kfree(repl); 278 kfree(repl);
279 if (IS_ERR(net->ipv4.nat_table)) 279 return PTR_RET(net->ipv4.nat_table);
280 return PTR_ERR(net->ipv4.nat_table);
281 return 0;
282 } 280 }
283 281
284 static void __net_exit iptable_nat_net_exit(struct net *net) 282 static void __net_exit iptable_nat_net_exit(struct net *net)
285 { 283 {
286 ipt_unregister_table(net, net->ipv4.nat_table); 284 ipt_unregister_table(net, net->ipv4.nat_table);
287 } 285 }
288 286
289 static struct pernet_operations iptable_nat_net_ops = { 287 static struct pernet_operations iptable_nat_net_ops = {
290 .init = iptable_nat_net_init, 288 .init = iptable_nat_net_init,
291 .exit = iptable_nat_net_exit, 289 .exit = iptable_nat_net_exit,
292 }; 290 };
293 291
294 static int __init iptable_nat_init(void) 292 static int __init iptable_nat_init(void)
295 { 293 {
296 int err; 294 int err;
297 295
298 err = register_pernet_subsys(&iptable_nat_net_ops); 296 err = register_pernet_subsys(&iptable_nat_net_ops);
299 if (err < 0) 297 if (err < 0)
300 goto err1; 298 goto err1;
301 299
302 err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 300 err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
303 if (err < 0) 301 if (err < 0)
304 goto err2; 302 goto err2;
305 return 0; 303 return 0;
306 304
307 err2: 305 err2:
308 unregister_pernet_subsys(&iptable_nat_net_ops); 306 unregister_pernet_subsys(&iptable_nat_net_ops);
309 err1: 307 err1:
310 return err; 308 return err;
311 } 309 }
312 310
313 static void __exit iptable_nat_exit(void) 311 static void __exit iptable_nat_exit(void)
314 { 312 {
315 nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 313 nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
316 unregister_pernet_subsys(&iptable_nat_net_ops); 314 unregister_pernet_subsys(&iptable_nat_net_ops);
317 } 315 }
318 316
319 module_init(iptable_nat_init); 317 module_init(iptable_nat_init);
320 module_exit(iptable_nat_exit); 318 module_exit(iptable_nat_exit);
321 319
322 MODULE_LICENSE("GPL"); 320 MODULE_LICENSE("GPL");
323 321
net/ipv6/netfilter/ip6table_nat.c
1 /* 1 /*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> 2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT 8 * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
9 * funded by Astaro. 9 * funded by Astaro.
10 */ 10 */
11 11
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/netfilter.h> 13 #include <linux/netfilter.h>
14 #include <linux/netfilter_ipv6.h> 14 #include <linux/netfilter_ipv6.h>
15 #include <linux/netfilter_ipv6/ip6_tables.h> 15 #include <linux/netfilter_ipv6/ip6_tables.h>
16 #include <linux/ipv6.h> 16 #include <linux/ipv6.h>
17 #include <net/ipv6.h> 17 #include <net/ipv6.h>
18 18
19 #include <net/netfilter/nf_nat.h> 19 #include <net/netfilter/nf_nat.h>
20 #include <net/netfilter/nf_nat_core.h> 20 #include <net/netfilter/nf_nat_core.h>
21 #include <net/netfilter/nf_nat_l3proto.h> 21 #include <net/netfilter/nf_nat_l3proto.h>
22 22
23 static const struct xt_table nf_nat_ipv6_table = { 23 static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat", 24 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
26 (1 << NF_INET_POST_ROUTING) | 26 (1 << NF_INET_POST_ROUTING) |
27 (1 << NF_INET_LOCAL_OUT) | 27 (1 << NF_INET_LOCAL_OUT) |
28 (1 << NF_INET_LOCAL_IN), 28 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE, 29 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6, 30 .af = NFPROTO_IPV6,
31 }; 31 };
32 32
33 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 33 static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
34 { 34 {
35 /* Force range to this IP; let proto decide mapping for 35 /* Force range to this IP; let proto decide mapping for
36 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 36 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
37 */ 37 */
38 struct nf_nat_range range; 38 struct nf_nat_range range;
39 39
40 range.flags = 0; 40 range.flags = 0;
41 pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, 41 pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
42 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? 42 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
43 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : 43 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
44 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); 44 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
45 45
46 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 46 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
47 } 47 }
48 48
49 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, 49 static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
50 const struct net_device *in, 50 const struct net_device *in,
51 const struct net_device *out, 51 const struct net_device *out,
52 struct nf_conn *ct) 52 struct nf_conn *ct)
53 { 53 {
54 struct net *net = nf_ct_net(ct); 54 struct net *net = nf_ct_net(ct);
55 unsigned int ret; 55 unsigned int ret;
56 56
57 ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); 57 ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
58 if (ret == NF_ACCEPT) { 58 if (ret == NF_ACCEPT) {
59 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) 59 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
60 ret = alloc_null_binding(ct, hooknum); 60 ret = alloc_null_binding(ct, hooknum);
61 } 61 }
62 return ret; 62 return ret;
63 } 63 }
64 64
65 static unsigned int 65 static unsigned int
66 nf_nat_ipv6_fn(unsigned int hooknum, 66 nf_nat_ipv6_fn(unsigned int hooknum,
67 struct sk_buff *skb, 67 struct sk_buff *skb,
68 const struct net_device *in, 68 const struct net_device *in,
69 const struct net_device *out, 69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *)) 70 int (*okfn)(struct sk_buff *))
71 { 71 {
72 struct nf_conn *ct; 72 struct nf_conn *ct;
73 enum ip_conntrack_info ctinfo; 73 enum ip_conntrack_info ctinfo;
74 struct nf_conn_nat *nat; 74 struct nf_conn_nat *nat;
75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
76 __be16 frag_off; 76 __be16 frag_off;
77 int hdrlen; 77 int hdrlen;
78 u8 nexthdr; 78 u8 nexthdr;
79 79
80 ct = nf_ct_get(skb, &ctinfo); 80 ct = nf_ct_get(skb, &ctinfo);
81 /* Can't track? It's not due to stress, or conntrack would 81 /* Can't track? It's not due to stress, or conntrack would
82 * have dropped it. Hence it's the user's responsibilty to 82 * have dropped it. Hence it's the user's responsibilty to
83 * packet filter it out, or implement conntrack/NAT for that 83 * packet filter it out, or implement conntrack/NAT for that
84 * protocol. 8) --RR 84 * protocol. 8) --RR
85 */ 85 */
86 if (!ct) 86 if (!ct)
87 return NF_ACCEPT; 87 return NF_ACCEPT;
88 88
89 /* Don't try to NAT if this packet is not conntracked */ 89 /* Don't try to NAT if this packet is not conntracked */
90 if (nf_ct_is_untracked(ct)) 90 if (nf_ct_is_untracked(ct))
91 return NF_ACCEPT; 91 return NF_ACCEPT;
92 92
93 nat = nfct_nat(ct); 93 nat = nfct_nat(ct);
94 if (!nat) { 94 if (!nat) {
95 /* NAT module was loaded late. */ 95 /* NAT module was loaded late. */
96 if (nf_ct_is_confirmed(ct)) 96 if (nf_ct_is_confirmed(ct))
97 return NF_ACCEPT; 97 return NF_ACCEPT;
98 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 98 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
99 if (nat == NULL) { 99 if (nat == NULL) {
100 pr_debug("failed to add NAT extension\n"); 100 pr_debug("failed to add NAT extension\n");
101 return NF_ACCEPT; 101 return NF_ACCEPT;
102 } 102 }
103 } 103 }
104 104
105 switch (ctinfo) { 105 switch (ctinfo) {
106 case IP_CT_RELATED: 106 case IP_CT_RELATED:
107 case IP_CT_RELATED_REPLY: 107 case IP_CT_RELATED_REPLY:
108 nexthdr = ipv6_hdr(skb)->nexthdr; 108 nexthdr = ipv6_hdr(skb)->nexthdr;
109 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), 109 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
110 &nexthdr, &frag_off); 110 &nexthdr, &frag_off);
111 111
112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, 113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
114 hooknum, hdrlen)) 114 hooknum, hdrlen))
115 return NF_DROP; 115 return NF_DROP;
116 else 116 else
117 return NF_ACCEPT; 117 return NF_ACCEPT;
118 } 118 }
119 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 119 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
120 case IP_CT_NEW: 120 case IP_CT_NEW:
121 /* Seen it before? This can happen for loopback, retrans, 121 /* Seen it before? This can happen for loopback, retrans,
122 * or local packets. 122 * or local packets.
123 */ 123 */
124 if (!nf_nat_initialized(ct, maniptype)) { 124 if (!nf_nat_initialized(ct, maniptype)) {
125 unsigned int ret; 125 unsigned int ret;
126 126
127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct); 127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
128 if (ret != NF_ACCEPT) 128 if (ret != NF_ACCEPT)
129 return ret; 129 return ret;
130 } else 130 } else
131 pr_debug("Already setup manip %s for ct %p\n", 131 pr_debug("Already setup manip %s for ct %p\n",
132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 ct); 133 ct);
134 break; 134 break;
135 135
136 default: 136 default:
137 /* ESTABLISHED */ 137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 ctinfo == IP_CT_ESTABLISHED_REPLY); 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 } 140 }
141 141
142 return nf_nat_packet(ct, ctinfo, hooknum, skb); 142 return nf_nat_packet(ct, ctinfo, hooknum, skb);
143 } 143 }
144 144
145 static unsigned int 145 static unsigned int
146 nf_nat_ipv6_in(unsigned int hooknum, 146 nf_nat_ipv6_in(unsigned int hooknum,
147 struct sk_buff *skb, 147 struct sk_buff *skb,
148 const struct net_device *in, 148 const struct net_device *in,
149 const struct net_device *out, 149 const struct net_device *out,
150 int (*okfn)(struct sk_buff *)) 150 int (*okfn)(struct sk_buff *))
151 { 151 {
152 unsigned int ret; 152 unsigned int ret;
153 struct in6_addr daddr = ipv6_hdr(skb)->daddr; 153 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
154 154
155 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 155 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
156 if (ret != NF_DROP && ret != NF_STOLEN && 156 if (ret != NF_DROP && ret != NF_STOLEN &&
157 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) 157 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
158 skb_dst_drop(skb); 158 skb_dst_drop(skb);
159 159
160 return ret; 160 return ret;
161 } 161 }
162 162
163 static unsigned int 163 static unsigned int
164 nf_nat_ipv6_out(unsigned int hooknum, 164 nf_nat_ipv6_out(unsigned int hooknum,
165 struct sk_buff *skb, 165 struct sk_buff *skb,
166 const struct net_device *in, 166 const struct net_device *in,
167 const struct net_device *out, 167 const struct net_device *out,
168 int (*okfn)(struct sk_buff *)) 168 int (*okfn)(struct sk_buff *))
169 { 169 {
170 #ifdef CONFIG_XFRM 170 #ifdef CONFIG_XFRM
171 const struct nf_conn *ct; 171 const struct nf_conn *ct;
172 enum ip_conntrack_info ctinfo; 172 enum ip_conntrack_info ctinfo;
173 #endif 173 #endif
174 unsigned int ret; 174 unsigned int ret;
175 175
176 /* root is playing with raw sockets. */ 176 /* root is playing with raw sockets. */
177 if (skb->len < sizeof(struct ipv6hdr)) 177 if (skb->len < sizeof(struct ipv6hdr))
178 return NF_ACCEPT; 178 return NF_ACCEPT;
179 179
180 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 180 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
181 #ifdef CONFIG_XFRM 181 #ifdef CONFIG_XFRM
182 if (ret != NF_DROP && ret != NF_STOLEN && 182 if (ret != NF_DROP && ret != NF_STOLEN &&
183 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 183 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
184 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 184 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
185 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 185 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
186 186
187 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, 187 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
188 &ct->tuplehash[!dir].tuple.dst.u3) || 188 &ct->tuplehash[!dir].tuple.dst.u3) ||
189 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 189 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
190 ct->tuplehash[dir].tuple.src.u.all != 190 ct->tuplehash[dir].tuple.src.u.all !=
191 ct->tuplehash[!dir].tuple.dst.u.all)) 191 ct->tuplehash[!dir].tuple.dst.u.all))
192 if (nf_xfrm_me_harder(skb, AF_INET6) < 0) 192 if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
193 ret = NF_DROP; 193 ret = NF_DROP;
194 } 194 }
195 #endif 195 #endif
196 return ret; 196 return ret;
197 } 197 }
198 198
199 static unsigned int 199 static unsigned int
200 nf_nat_ipv6_local_fn(unsigned int hooknum, 200 nf_nat_ipv6_local_fn(unsigned int hooknum,
201 struct sk_buff *skb, 201 struct sk_buff *skb,
202 const struct net_device *in, 202 const struct net_device *in,
203 const struct net_device *out, 203 const struct net_device *out,
204 int (*okfn)(struct sk_buff *)) 204 int (*okfn)(struct sk_buff *))
205 { 205 {
206 const struct nf_conn *ct; 206 const struct nf_conn *ct;
207 enum ip_conntrack_info ctinfo; 207 enum ip_conntrack_info ctinfo;
208 unsigned int ret; 208 unsigned int ret;
209 209
210 /* root is playing with raw sockets. */ 210 /* root is playing with raw sockets. */
211 if (skb->len < sizeof(struct ipv6hdr)) 211 if (skb->len < sizeof(struct ipv6hdr))
212 return NF_ACCEPT; 212 return NF_ACCEPT;
213 213
214 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 214 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
215 if (ret != NF_DROP && ret != NF_STOLEN && 215 if (ret != NF_DROP && ret != NF_STOLEN &&
216 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 216 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
217 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 217 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
218 218
219 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, 219 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
220 &ct->tuplehash[!dir].tuple.src.u3)) { 220 &ct->tuplehash[!dir].tuple.src.u3)) {
221 if (ip6_route_me_harder(skb)) 221 if (ip6_route_me_harder(skb))
222 ret = NF_DROP; 222 ret = NF_DROP;
223 } 223 }
224 #ifdef CONFIG_XFRM 224 #ifdef CONFIG_XFRM
225 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 225 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
226 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 226 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
227 ct->tuplehash[dir].tuple.dst.u.all != 227 ct->tuplehash[dir].tuple.dst.u.all !=
228 ct->tuplehash[!dir].tuple.src.u.all) 228 ct->tuplehash[!dir].tuple.src.u.all)
229 if (nf_xfrm_me_harder(skb, AF_INET6)) 229 if (nf_xfrm_me_harder(skb, AF_INET6))
230 ret = NF_DROP; 230 ret = NF_DROP;
231 #endif 231 #endif
232 } 232 }
233 return ret; 233 return ret;
234 } 234 }
235 235
236 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { 236 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
237 /* Before packet filtering, change destination */ 237 /* Before packet filtering, change destination */
238 { 238 {
239 .hook = nf_nat_ipv6_in, 239 .hook = nf_nat_ipv6_in,
240 .owner = THIS_MODULE, 240 .owner = THIS_MODULE,
241 .pf = NFPROTO_IPV6, 241 .pf = NFPROTO_IPV6,
242 .hooknum = NF_INET_PRE_ROUTING, 242 .hooknum = NF_INET_PRE_ROUTING,
243 .priority = NF_IP6_PRI_NAT_DST, 243 .priority = NF_IP6_PRI_NAT_DST,
244 }, 244 },
245 /* After packet filtering, change source */ 245 /* After packet filtering, change source */
246 { 246 {
247 .hook = nf_nat_ipv6_out, 247 .hook = nf_nat_ipv6_out,
248 .owner = THIS_MODULE, 248 .owner = THIS_MODULE,
249 .pf = NFPROTO_IPV6, 249 .pf = NFPROTO_IPV6,
250 .hooknum = NF_INET_POST_ROUTING, 250 .hooknum = NF_INET_POST_ROUTING,
251 .priority = NF_IP6_PRI_NAT_SRC, 251 .priority = NF_IP6_PRI_NAT_SRC,
252 }, 252 },
253 /* Before packet filtering, change destination */ 253 /* Before packet filtering, change destination */
254 { 254 {
255 .hook = nf_nat_ipv6_local_fn, 255 .hook = nf_nat_ipv6_local_fn,
256 .owner = THIS_MODULE, 256 .owner = THIS_MODULE,
257 .pf = NFPROTO_IPV6, 257 .pf = NFPROTO_IPV6,
258 .hooknum = NF_INET_LOCAL_OUT, 258 .hooknum = NF_INET_LOCAL_OUT,
259 .priority = NF_IP6_PRI_NAT_DST, 259 .priority = NF_IP6_PRI_NAT_DST,
260 }, 260 },
261 /* After packet filtering, change source */ 261 /* After packet filtering, change source */
262 { 262 {
263 .hook = nf_nat_ipv6_fn, 263 .hook = nf_nat_ipv6_fn,
264 .owner = THIS_MODULE, 264 .owner = THIS_MODULE,
265 .pf = NFPROTO_IPV6, 265 .pf = NFPROTO_IPV6,
266 .hooknum = NF_INET_LOCAL_IN, 266 .hooknum = NF_INET_LOCAL_IN,
267 .priority = NF_IP6_PRI_NAT_SRC, 267 .priority = NF_IP6_PRI_NAT_SRC,
268 }, 268 },
269 }; 269 };
270 270
271 static int __net_init ip6table_nat_net_init(struct net *net) 271 static int __net_init ip6table_nat_net_init(struct net *net)
272 { 272 {
273 struct ip6t_replace *repl; 273 struct ip6t_replace *repl;
274 274
275 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); 275 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
276 if (repl == NULL) 276 if (repl == NULL)
277 return -ENOMEM; 277 return -ENOMEM;
278 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); 278 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
279 kfree(repl); 279 kfree(repl);
280 if (IS_ERR(net->ipv6.ip6table_nat)) 280 return PTR_RET(net->ipv6.ip6table_nat);
281 return PTR_ERR(net->ipv6.ip6table_nat);
282 return 0;
283 } 281 }
284 282
285 static void __net_exit ip6table_nat_net_exit(struct net *net) 283 static void __net_exit ip6table_nat_net_exit(struct net *net)
286 { 284 {
287 ip6t_unregister_table(net, net->ipv6.ip6table_nat); 285 ip6t_unregister_table(net, net->ipv6.ip6table_nat);
288 } 286 }
289 287
290 static struct pernet_operations ip6table_nat_net_ops = { 288 static struct pernet_operations ip6table_nat_net_ops = {
291 .init = ip6table_nat_net_init, 289 .init = ip6table_nat_net_init,
292 .exit = ip6table_nat_net_exit, 290 .exit = ip6table_nat_net_exit,
293 }; 291 };
294 292
295 static int __init ip6table_nat_init(void) 293 static int __init ip6table_nat_init(void)
296 { 294 {
297 int err; 295 int err;
298 296
299 err = register_pernet_subsys(&ip6table_nat_net_ops); 297 err = register_pernet_subsys(&ip6table_nat_net_ops);
300 if (err < 0) 298 if (err < 0)
301 goto err1; 299 goto err1;
302 300
303 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 301 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
304 if (err < 0) 302 if (err < 0)
305 goto err2; 303 goto err2;
306 return 0; 304 return 0;
307 305
308 err2: 306 err2:
309 unregister_pernet_subsys(&ip6table_nat_net_ops); 307 unregister_pernet_subsys(&ip6table_nat_net_ops);
310 err1: 308 err1:
311 return err; 309 return err;
312 } 310 }
313 311
314 static void __exit ip6table_nat_exit(void) 312 static void __exit ip6table_nat_exit(void)
315 { 313 {
316 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 314 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
317 unregister_pernet_subsys(&ip6table_nat_net_ops); 315 unregister_pernet_subsys(&ip6table_nat_net_ops);
318 } 316 }
319 317
320 module_init(ip6table_nat_init); 318 module_init(ip6table_nat_init);
321 module_exit(ip6table_nat_exit); 319 module_exit(ip6table_nat_exit);
322 320
323 MODULE_LICENSE("GPL"); 321 MODULE_LICENSE("GPL");
324 322
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
1 /* 1 /*
2 * Copyright (C)2004 USAGI/WIDE Project 2 * Copyright (C)2004 USAGI/WIDE Project
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * Author: 8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 */ 10 */
11 11
12 #include <linux/types.h> 12 #include <linux/types.h>
13 #include <linux/ipv6.h> 13 #include <linux/ipv6.h>
14 #include <linux/in6.h> 14 #include <linux/in6.h>
15 #include <linux/netfilter.h> 15 #include <linux/netfilter.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/skbuff.h> 17 #include <linux/skbuff.h>
18 #include <linux/icmp.h> 18 #include <linux/icmp.h>
19 #include <net/ipv6.h> 19 #include <net/ipv6.h>
20 #include <net/inet_frag.h> 20 #include <net/inet_frag.h>
21 21
22 #include <linux/netfilter_bridge.h> 22 #include <linux/netfilter_bridge.h>
23 #include <linux/netfilter_ipv6.h> 23 #include <linux/netfilter_ipv6.h>
24 #include <linux/netfilter_ipv6/ip6_tables.h>
24 #include <net/netfilter/nf_conntrack.h> 25 #include <net/netfilter/nf_conntrack.h>
25 #include <net/netfilter/nf_conntrack_helper.h> 26 #include <net/netfilter/nf_conntrack_helper.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h> 27 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_l3proto.h> 28 #include <net/netfilter/nf_conntrack_l3proto.h>
28 #include <net/netfilter/nf_conntrack_core.h> 29 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_zones.h> 30 #include <net/netfilter/nf_conntrack_zones.h>
30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 31 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31 #include <net/netfilter/nf_nat_helper.h> 32 #include <net/netfilter/nf_nat_helper.h>
32 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 33 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
33 #include <net/netfilter/nf_log.h> 34 #include <net/netfilter/nf_log.h>
34 35
35 static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 36 static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
36 struct nf_conntrack_tuple *tuple) 37 struct nf_conntrack_tuple *tuple)
37 { 38 {
38 const u_int32_t *ap; 39 const u_int32_t *ap;
39 u_int32_t _addrs[8]; 40 u_int32_t _addrs[8];
40 41
41 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), 42 ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
42 sizeof(_addrs), _addrs); 43 sizeof(_addrs), _addrs);
43 if (ap == NULL) 44 if (ap == NULL)
44 return false; 45 return false;
45 46
46 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 47 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
47 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 48 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
48 49
49 return true; 50 return true;
50 } 51 }
51 52
52 static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, 53 static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
53 const struct nf_conntrack_tuple *orig) 54 const struct nf_conntrack_tuple *orig)
54 { 55 {
55 memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); 56 memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
56 memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); 57 memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
57 58
58 return true; 59 return true;
59 } 60 }
60 61
61 static int ipv6_print_tuple(struct seq_file *s, 62 static int ipv6_print_tuple(struct seq_file *s,
62 const struct nf_conntrack_tuple *tuple) 63 const struct nf_conntrack_tuple *tuple)
63 { 64 {
64 return seq_printf(s, "src=%pI6 dst=%pI6 ", 65 return seq_printf(s, "src=%pI6 dst=%pI6 ",
65 tuple->src.u3.ip6, tuple->dst.u3.ip6); 66 tuple->src.u3.ip6, tuple->dst.u3.ip6);
66 } 67 }
67 68
68 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 69 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
69 unsigned int *dataoff, u_int8_t *protonum) 70 unsigned int *dataoff, u_int8_t *protonum)
70 { 71 {
71 unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 72 unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
72 __be16 frag_off; 73 __be16 frag_off;
73 int protoff; 74 int protoff;
74 u8 nexthdr; 75 u8 nexthdr;
75 76
76 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 77 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
77 &nexthdr, sizeof(nexthdr)) != 0) { 78 &nexthdr, sizeof(nexthdr)) != 0) {
78 pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 79 pr_debug("ip6_conntrack_core: can't get nexthdr\n");
79 return -NF_ACCEPT; 80 return -NF_ACCEPT;
80 } 81 }
81 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 82 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
82 /* 83 /*
83 * (protoff == skb->len) mean that the packet doesn't have no data 84 * (protoff == skb->len) mean that the packet doesn't have no data
84 * except of IPv6 & ext headers. but it's tracked anyway. - YK 85 * except of IPv6 & ext headers. but it's tracked anyway. - YK
85 */ 86 */
86 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 87 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
87 pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 88 pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
88 return -NF_ACCEPT; 89 return -NF_ACCEPT;
89 } 90 }
90 91
91 *dataoff = protoff; 92 *dataoff = protoff;
92 *protonum = nexthdr; 93 *protonum = nexthdr;
93 return NF_ACCEPT; 94 return NF_ACCEPT;
94 } 95 }
95 96
96 static unsigned int ipv6_helper(unsigned int hooknum, 97 static unsigned int ipv6_helper(unsigned int hooknum,
97 struct sk_buff *skb, 98 struct sk_buff *skb,
98 const struct net_device *in, 99 const struct net_device *in,
99 const struct net_device *out, 100 const struct net_device *out,
100 int (*okfn)(struct sk_buff *)) 101 int (*okfn)(struct sk_buff *))
101 { 102 {
102 struct nf_conn *ct; 103 struct nf_conn *ct;
103 const struct nf_conn_help *help; 104 const struct nf_conn_help *help;
104 const struct nf_conntrack_helper *helper; 105 const struct nf_conntrack_helper *helper;
105 enum ip_conntrack_info ctinfo; 106 enum ip_conntrack_info ctinfo;
106 unsigned int ret; 107 unsigned int ret;
107 __be16 frag_off; 108 __be16 frag_off;
108 int protoff; 109 int protoff;
109 u8 nexthdr; 110 u8 nexthdr;
110 111
111 /* This is where we call the helper: as the packet goes out. */ 112 /* This is where we call the helper: as the packet goes out. */
112 ct = nf_ct_get(skb, &ctinfo); 113 ct = nf_ct_get(skb, &ctinfo);
113 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 114 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
114 return NF_ACCEPT; 115 return NF_ACCEPT;
115 116
116 help = nfct_help(ct); 117 help = nfct_help(ct);
117 if (!help) 118 if (!help)
118 return NF_ACCEPT; 119 return NF_ACCEPT;
119 /* rcu_read_lock()ed by nf_hook_slow */ 120 /* rcu_read_lock()ed by nf_hook_slow */
120 helper = rcu_dereference(help->helper); 121 helper = rcu_dereference(help->helper);
121 if (!helper) 122 if (!helper)
122 return NF_ACCEPT; 123 return NF_ACCEPT;
123 124
124 nexthdr = ipv6_hdr(skb)->nexthdr; 125 nexthdr = ipv6_hdr(skb)->nexthdr;
125 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 126 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
126 &frag_off); 127 &frag_off);
127 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 128 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
128 pr_debug("proto header not found\n"); 129 pr_debug("proto header not found\n");
129 return NF_ACCEPT; 130 return NF_ACCEPT;
130 } 131 }
131 132
132 ret = helper->help(skb, protoff, ct, ctinfo); 133 ret = helper->help(skb, protoff, ct, ctinfo);
133 if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { 134 if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
134 nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, 135 nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL,
135 "nf_ct_%s: dropping packet", helper->name); 136 "nf_ct_%s: dropping packet", helper->name);
136 } 137 }
137 return ret; 138 return ret;
138 } 139 }
139 140
140 static unsigned int ipv6_confirm(unsigned int hooknum, 141 static unsigned int ipv6_confirm(unsigned int hooknum,
141 struct sk_buff *skb, 142 struct sk_buff *skb,
142 const struct net_device *in, 143 const struct net_device *in,
143 const struct net_device *out, 144 const struct net_device *out,
144 int (*okfn)(struct sk_buff *)) 145 int (*okfn)(struct sk_buff *))
145 { 146 {
146 struct nf_conn *ct; 147 struct nf_conn *ct;
147 enum ip_conntrack_info ctinfo; 148 enum ip_conntrack_info ctinfo;
148 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 149 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
149 int protoff; 150 int protoff;
150 __be16 frag_off; 151 __be16 frag_off;
151 152
152 ct = nf_ct_get(skb, &ctinfo); 153 ct = nf_ct_get(skb, &ctinfo);
153 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 154 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
154 goto out; 155 goto out;
155 156
156 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 157 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
157 &frag_off); 158 &frag_off);
158 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 159 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
159 pr_debug("proto header not found\n"); 160 pr_debug("proto header not found\n");
160 goto out; 161 goto out;
161 } 162 }
162 163
163 /* adjust seqs for loopback traffic only in outgoing direction */ 164 /* adjust seqs for loopback traffic only in outgoing direction */
164 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 165 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
165 !nf_is_loopback_packet(skb)) { 166 !nf_is_loopback_packet(skb)) {
166 typeof(nf_nat_seq_adjust_hook) seq_adjust; 167 typeof(nf_nat_seq_adjust_hook) seq_adjust;
167 168
168 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); 169 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
169 if (!seq_adjust || 170 if (!seq_adjust ||
170 !seq_adjust(skb, ct, ctinfo, protoff)) { 171 !seq_adjust(skb, ct, ctinfo, protoff)) {
171 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 172 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
172 return NF_DROP; 173 return NF_DROP;
173 } 174 }
174 } 175 }
175 out: 176 out:
176 /* We've seen it coming out the other side: confirm it */ 177 /* We've seen it coming out the other side: confirm it */
177 return nf_conntrack_confirm(skb); 178 return nf_conntrack_confirm(skb);
178 } 179 }
179 180
180 static unsigned int __ipv6_conntrack_in(struct net *net, 181 static unsigned int __ipv6_conntrack_in(struct net *net,
181 unsigned int hooknum, 182 unsigned int hooknum,
182 struct sk_buff *skb, 183 struct sk_buff *skb,
183 const struct net_device *in, 184 const struct net_device *in,
184 const struct net_device *out, 185 const struct net_device *out,
185 int (*okfn)(struct sk_buff *)) 186 int (*okfn)(struct sk_buff *))
186 { 187 {
187 struct sk_buff *reasm = skb->nfct_reasm; 188 struct sk_buff *reasm = skb->nfct_reasm;
188 const struct nf_conn_help *help; 189 const struct nf_conn_help *help;
189 struct nf_conn *ct; 190 struct nf_conn *ct;
190 enum ip_conntrack_info ctinfo; 191 enum ip_conntrack_info ctinfo;
191 192
192 /* This packet is fragmented and has reassembled packet. */ 193 /* This packet is fragmented and has reassembled packet. */
193 if (reasm) { 194 if (reasm) {
194 /* Reassembled packet isn't parsed yet ? */ 195 /* Reassembled packet isn't parsed yet ? */
195 if (!reasm->nfct) { 196 if (!reasm->nfct) {
196 unsigned int ret; 197 unsigned int ret;
197 198
198 ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); 199 ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
199 if (ret != NF_ACCEPT) 200 if (ret != NF_ACCEPT)
200 return ret; 201 return ret;
201 } 202 }
202 203
203 /* Conntrack helpers need the entire reassembled packet in the 204 /* Conntrack helpers need the entire reassembled packet in the
204 * POST_ROUTING hook. In case of unconfirmed connections NAT 205 * POST_ROUTING hook. In case of unconfirmed connections NAT
205 * might reassign a helper, so the entire packet is also 206 * might reassign a helper, so the entire packet is also
206 * required. 207 * required.
207 */ 208 */
208 ct = nf_ct_get(reasm, &ctinfo); 209 ct = nf_ct_get(reasm, &ctinfo);
209 if (ct != NULL && !nf_ct_is_untracked(ct)) { 210 if (ct != NULL && !nf_ct_is_untracked(ct)) {
210 help = nfct_help(ct); 211 help = nfct_help(ct);
211 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { 212 if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
212 nf_conntrack_get_reasm(skb); 213 nf_conntrack_get_reasm(skb);
213 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, 214 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
214 (struct net_device *)in, 215 (struct net_device *)in,
215 (struct net_device *)out, 216 (struct net_device *)out,
216 okfn, NF_IP6_PRI_CONNTRACK + 1); 217 okfn, NF_IP6_PRI_CONNTRACK + 1);
217 return NF_DROP_ERR(-ECANCELED); 218 return NF_DROP_ERR(-ECANCELED);
218 } 219 }
219 } 220 }
220 221
221 nf_conntrack_get(reasm->nfct); 222 nf_conntrack_get(reasm->nfct);
222 skb->nfct = reasm->nfct; 223 skb->nfct = reasm->nfct;
223 skb->nfctinfo = reasm->nfctinfo; 224 skb->nfctinfo = reasm->nfctinfo;
224 return NF_ACCEPT; 225 return NF_ACCEPT;
225 } 226 }
226 227
227 return nf_conntrack_in(net, PF_INET6, hooknum, skb); 228 return nf_conntrack_in(net, PF_INET6, hooknum, skb);
228 } 229 }
229 230
230 static unsigned int ipv6_conntrack_in(unsigned int hooknum, 231 static unsigned int ipv6_conntrack_in(unsigned int hooknum,
231 struct sk_buff *skb, 232 struct sk_buff *skb,
232 const struct net_device *in, 233 const struct net_device *in,
233 const struct net_device *out, 234 const struct net_device *out,
234 int (*okfn)(struct sk_buff *)) 235 int (*okfn)(struct sk_buff *))
235 { 236 {
236 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); 237 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
237 } 238 }
238 239
239 static unsigned int ipv6_conntrack_local(unsigned int hooknum, 240 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
240 struct sk_buff *skb, 241 struct sk_buff *skb,
241 const struct net_device *in, 242 const struct net_device *in,
242 const struct net_device *out, 243 const struct net_device *out,
243 int (*okfn)(struct sk_buff *)) 244 int (*okfn)(struct sk_buff *))
244 { 245 {
245 /* root is playing with raw sockets. */ 246 /* root is playing with raw sockets. */
246 if (skb->len < sizeof(struct ipv6hdr)) { 247 if (skb->len < sizeof(struct ipv6hdr)) {
247 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 248 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
248 return NF_ACCEPT; 249 return NF_ACCEPT;
249 } 250 }
250 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); 251 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
251 } 252 }
252 253
253 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 254 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
254 { 255 {
255 .hook = ipv6_conntrack_in, 256 .hook = ipv6_conntrack_in,
256 .owner = THIS_MODULE, 257 .owner = THIS_MODULE,
257 .pf = NFPROTO_IPV6, 258 .pf = NFPROTO_IPV6,
258 .hooknum = NF_INET_PRE_ROUTING, 259 .hooknum = NF_INET_PRE_ROUTING,
259 .priority = NF_IP6_PRI_CONNTRACK, 260 .priority = NF_IP6_PRI_CONNTRACK,
260 }, 261 },
261 { 262 {
262 .hook = ipv6_conntrack_local, 263 .hook = ipv6_conntrack_local,
263 .owner = THIS_MODULE, 264 .owner = THIS_MODULE,
264 .pf = NFPROTO_IPV6, 265 .pf = NFPROTO_IPV6,
265 .hooknum = NF_INET_LOCAL_OUT, 266 .hooknum = NF_INET_LOCAL_OUT,
266 .priority = NF_IP6_PRI_CONNTRACK, 267 .priority = NF_IP6_PRI_CONNTRACK,
267 }, 268 },
268 { 269 {
269 .hook = ipv6_helper, 270 .hook = ipv6_helper,
270 .owner = THIS_MODULE, 271 .owner = THIS_MODULE,
271 .pf = NFPROTO_IPV6, 272 .pf = NFPROTO_IPV6,
272 .hooknum = NF_INET_POST_ROUTING, 273 .hooknum = NF_INET_POST_ROUTING,
273 .priority = NF_IP6_PRI_CONNTRACK_HELPER, 274 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
274 }, 275 },
275 { 276 {
276 .hook = ipv6_confirm, 277 .hook = ipv6_confirm,
277 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
278 .pf = NFPROTO_IPV6, 279 .pf = NFPROTO_IPV6,
279 .hooknum = NF_INET_POST_ROUTING, 280 .hooknum = NF_INET_POST_ROUTING,
280 .priority = NF_IP6_PRI_LAST, 281 .priority = NF_IP6_PRI_LAST,
281 }, 282 },
282 { 283 {
283 .hook = ipv6_helper, 284 .hook = ipv6_helper,
284 .owner = THIS_MODULE, 285 .owner = THIS_MODULE,
285 .pf = NFPROTO_IPV6, 286 .pf = NFPROTO_IPV6,
286 .hooknum = NF_INET_LOCAL_IN, 287 .hooknum = NF_INET_LOCAL_IN,
287 .priority = NF_IP6_PRI_CONNTRACK_HELPER, 288 .priority = NF_IP6_PRI_CONNTRACK_HELPER,
288 }, 289 },
289 { 290 {
290 .hook = ipv6_confirm, 291 .hook = ipv6_confirm,
291 .owner = THIS_MODULE, 292 .owner = THIS_MODULE,
292 .pf = NFPROTO_IPV6, 293 .pf = NFPROTO_IPV6,
293 .hooknum = NF_INET_LOCAL_IN, 294 .hooknum = NF_INET_LOCAL_IN,
294 .priority = NF_IP6_PRI_LAST-1, 295 .priority = NF_IP6_PRI_LAST-1,
295 }, 296 },
296 }; 297 };
297 298
299 static int
300 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
301 {
302 const struct inet_sock *inet = inet_sk(sk);
303 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
304 const struct nf_conntrack_tuple_hash *h;
305 struct sockaddr_in6 sin6;
306 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
307 struct nf_conn *ct;
308
309 tuple.src.u3.in6 = inet6->rcv_saddr;
310 tuple.src.u.tcp.port = inet->inet_sport;
311 tuple.dst.u3.in6 = inet6->daddr;
312 tuple.dst.u.tcp.port = inet->inet_dport;
313 tuple.dst.protonum = sk->sk_protocol;
314
315 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
316 return -ENOPROTOOPT;
317
318 if (*len < 0 || (unsigned int) *len < sizeof(sin6))
319 return -EINVAL;
320
321 h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
322 if (!h) {
323 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
324 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
325 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
326 return -ENOENT;
327 }
328
329 ct = nf_ct_tuplehash_to_ctrack(h);
330
331 sin6.sin6_family = AF_INET6;
332 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
333 sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
334 memcpy(&sin6.sin6_addr,
335 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
336 sizeof(sin6.sin6_addr));
337
338 nf_ct_put(ct);
339
340 if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
341 sin6.sin6_scope_id = sk->sk_bound_dev_if;
342 else
343 sin6.sin6_scope_id = 0;
344
345 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
346 }
347
298 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 348 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
299 349
300 #include <linux/netfilter/nfnetlink.h> 350 #include <linux/netfilter/nfnetlink.h>
301 #include <linux/netfilter/nfnetlink_conntrack.h> 351 #include <linux/netfilter/nfnetlink_conntrack.h>
302 352
303 static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 353 static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
304 const struct nf_conntrack_tuple *tuple) 354 const struct nf_conntrack_tuple *tuple)
305 { 355 {
306 if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, 356 if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
307 &tuple->src.u3.ip6) || 357 &tuple->src.u3.ip6) ||
308 nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, 358 nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
309 &tuple->dst.u3.ip6)) 359 &tuple->dst.u3.ip6))
310 goto nla_put_failure; 360 goto nla_put_failure;
311 return 0; 361 return 0;
312 362
313 nla_put_failure: 363 nla_put_failure:
314 return -1; 364 return -1;
315 } 365 }
316 366
317 static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { 367 static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
318 [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, 368 [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 },
319 [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, 369 [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 },
320 }; 370 };
321 371
322 static int ipv6_nlattr_to_tuple(struct nlattr *tb[], 372 static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
323 struct nf_conntrack_tuple *t) 373 struct nf_conntrack_tuple *t)
324 { 374 {
325 if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) 375 if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
326 return -EINVAL; 376 return -EINVAL;
327 377
328 memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), 378 memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]),
329 sizeof(u_int32_t) * 4); 379 sizeof(u_int32_t) * 4);
330 memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), 380 memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]),
331 sizeof(u_int32_t) * 4); 381 sizeof(u_int32_t) * 4);
332 382
333 return 0; 383 return 0;
334 } 384 }
335 385
336 static int ipv6_nlattr_tuple_size(void) 386 static int ipv6_nlattr_tuple_size(void)
337 { 387 {
338 return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); 388 return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
339 } 389 }
340 #endif 390 #endif
341 391
342 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { 392 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
343 .l3proto = PF_INET6, 393 .l3proto = PF_INET6,
344 .name = "ipv6", 394 .name = "ipv6",
345 .pkt_to_tuple = ipv6_pkt_to_tuple, 395 .pkt_to_tuple = ipv6_pkt_to_tuple,
346 .invert_tuple = ipv6_invert_tuple, 396 .invert_tuple = ipv6_invert_tuple,
347 .print_tuple = ipv6_print_tuple, 397 .print_tuple = ipv6_print_tuple,
348 .get_l4proto = ipv6_get_l4proto, 398 .get_l4proto = ipv6_get_l4proto,
349 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 399 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
350 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 400 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
351 .nlattr_tuple_size = ipv6_nlattr_tuple_size, 401 .nlattr_tuple_size = ipv6_nlattr_tuple_size,
352 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 402 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
353 .nla_policy = ipv6_nla_policy, 403 .nla_policy = ipv6_nla_policy,
354 #endif 404 #endif
355 .me = THIS_MODULE, 405 .me = THIS_MODULE,
356 }; 406 };
357 407
358 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 408 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
359 MODULE_LICENSE("GPL"); 409 MODULE_LICENSE("GPL");
360 MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); 410 MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
361 411
412 static struct nf_sockopt_ops so_getorigdst6 = {
413 .pf = NFPROTO_IPV6,
414 .get_optmin = IP6T_SO_ORIGINAL_DST,
415 .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
416 .get = ipv6_getorigdst,
417 .owner = THIS_MODULE,
418 };
419
362 static int ipv6_net_init(struct net *net) 420 static int ipv6_net_init(struct net *net)
363 { 421 {
364 int ret = 0; 422 int ret = 0;
365 423
366 ret = nf_conntrack_l4proto_register(net, 424 ret = nf_conntrack_l4proto_register(net,
367 &nf_conntrack_l4proto_tcp6); 425 &nf_conntrack_l4proto_tcp6);
368 if (ret < 0) { 426 if (ret < 0) {
369 printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); 427 printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n");
370 goto out; 428 goto out;
371 } 429 }
372 ret = nf_conntrack_l4proto_register(net, 430 ret = nf_conntrack_l4proto_register(net,
373 &nf_conntrack_l4proto_udp6); 431 &nf_conntrack_l4proto_udp6);
374 if (ret < 0) { 432 if (ret < 0) {
375 printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); 433 printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n");
376 goto cleanup_tcp6; 434 goto cleanup_tcp6;
377 } 435 }
378 ret = nf_conntrack_l4proto_register(net, 436 ret = nf_conntrack_l4proto_register(net,
379 &nf_conntrack_l4proto_icmpv6); 437 &nf_conntrack_l4proto_icmpv6);
380 if (ret < 0) { 438 if (ret < 0) {
381 printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); 439 printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n");
382 goto cleanup_udp6; 440 goto cleanup_udp6;
383 } 441 }
384 ret = nf_conntrack_l3proto_register(net, 442 ret = nf_conntrack_l3proto_register(net,
385 &nf_conntrack_l3proto_ipv6); 443 &nf_conntrack_l3proto_ipv6);
386 if (ret < 0) { 444 if (ret < 0) {
387 printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); 445 printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n");
388 goto cleanup_icmpv6; 446 goto cleanup_icmpv6;
389 } 447 }
390 return 0; 448 return 0;
391 cleanup_icmpv6: 449 cleanup_icmpv6:
392 nf_conntrack_l4proto_unregister(net, 450 nf_conntrack_l4proto_unregister(net,
393 &nf_conntrack_l4proto_icmpv6); 451 &nf_conntrack_l4proto_icmpv6);
394 cleanup_udp6: 452 cleanup_udp6:
395 nf_conntrack_l4proto_unregister(net, 453 nf_conntrack_l4proto_unregister(net,
396 &nf_conntrack_l4proto_udp6); 454 &nf_conntrack_l4proto_udp6);
397 cleanup_tcp6: 455 cleanup_tcp6:
398 nf_conntrack_l4proto_unregister(net, 456 nf_conntrack_l4proto_unregister(net,
399 &nf_conntrack_l4proto_tcp6); 457 &nf_conntrack_l4proto_tcp6);
400 out: 458 out:
401 return ret; 459 return ret;
402 } 460 }
403 461
404 static void ipv6_net_exit(struct net *net) 462 static void ipv6_net_exit(struct net *net)
405 { 463 {
406 nf_conntrack_l3proto_unregister(net, 464 nf_conntrack_l3proto_unregister(net,
407 &nf_conntrack_l3proto_ipv6); 465 &nf_conntrack_l3proto_ipv6);
408 nf_conntrack_l4proto_unregister(net, 466 nf_conntrack_l4proto_unregister(net,
409 &nf_conntrack_l4proto_icmpv6); 467 &nf_conntrack_l4proto_icmpv6);
410 nf_conntrack_l4proto_unregister(net, 468 nf_conntrack_l4proto_unregister(net,
411 &nf_conntrack_l4proto_udp6); 469 &nf_conntrack_l4proto_udp6);
412 nf_conntrack_l4proto_unregister(net, 470 nf_conntrack_l4proto_unregister(net,
413 &nf_conntrack_l4proto_tcp6); 471 &nf_conntrack_l4proto_tcp6);
414 } 472 }
415 473
416 static struct pernet_operations ipv6_net_ops = { 474 static struct pernet_operations ipv6_net_ops = {
417 .init = ipv6_net_init, 475 .init = ipv6_net_init,
418 .exit = ipv6_net_exit, 476 .exit = ipv6_net_exit,
419 }; 477 };
420 478
421 static int __init nf_conntrack_l3proto_ipv6_init(void) 479 static int __init nf_conntrack_l3proto_ipv6_init(void)
422 { 480 {
423 int ret = 0; 481 int ret = 0;
424 482
425 need_conntrack(); 483 need_conntrack();
426 nf_defrag_ipv6_enable(); 484 nf_defrag_ipv6_enable();
427 485
486 ret = nf_register_sockopt(&so_getorigdst6);
487 if (ret < 0) {
488 pr_err("Unable to register netfilter socket option\n");
489 return ret;
490 }
491
428 ret = register_pernet_subsys(&ipv6_net_ops); 492 ret = register_pernet_subsys(&ipv6_net_ops);
429 if (ret < 0) 493 if (ret < 0)
430 goto cleanup_pernet; 494 goto cleanup_pernet;
431 ret = nf_register_hooks(ipv6_conntrack_ops, 495 ret = nf_register_hooks(ipv6_conntrack_ops,
432 ARRAY_SIZE(ipv6_conntrack_ops)); 496 ARRAY_SIZE(ipv6_conntrack_ops));
433 if (ret < 0) { 497 if (ret < 0) {
434 pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " 498 pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
435 "hook.\n"); 499 "hook.\n");
436 goto cleanup_ipv6; 500 goto cleanup_ipv6;
437 } 501 }
438 return ret; 502 return ret;
439 503
440 cleanup_ipv6: 504 cleanup_ipv6:
441 unregister_pernet_subsys(&ipv6_net_ops); 505 unregister_pernet_subsys(&ipv6_net_ops);
442 cleanup_pernet: 506 cleanup_pernet:
507 nf_unregister_sockopt(&so_getorigdst6);
443 return ret; 508 return ret;
444 } 509 }
445 510
446 static void __exit nf_conntrack_l3proto_ipv6_fini(void) 511 static void __exit nf_conntrack_l3proto_ipv6_fini(void)
447 { 512 {
448 synchronize_net(); 513 synchronize_net();
449 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); 514 nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
450 unregister_pernet_subsys(&ipv6_net_ops); 515 unregister_pernet_subsys(&ipv6_net_ops);
516 nf_unregister_sockopt(&so_getorigdst6);
451 } 517 }
452 518
453 module_init(nf_conntrack_l3proto_ipv6_init); 519 module_init(nf_conntrack_l3proto_ipv6_init);
454 module_exit(nf_conntrack_l3proto_ipv6_fini); 520 module_exit(nf_conntrack_l3proto_ipv6_fini);
455 521
net/netfilter/ipvs/ip_vs_nfct.c
1 /* 1 /*
2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS 2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
3 * 3 *
4 * Portions Copyright (C) 2001-2002 4 * Portions Copyright (C) 2001-2002
5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. 5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6 * 6 *
7 * Portions Copyright (C) 2003-2010 7 * Portions Copyright (C) 2003-2010
8 * Julian Anastasov 8 * Julian Anastasov
9 * 9 *
10 * 10 *
11 * This code is free software; you can redistribute it and/or modify 11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or 13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version. 14 * (at your option) any later version.
15 * 15 *
16 * This program is distributed in the hope that it will be useful, 16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details. 19 * GNU General Public License for more details.
20 * 20 *
21 * You should have received a copy of the GNU General Public License 21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software 22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * 24 *
25 * 25 *
26 * Authors: 26 * Authors:
27 * Ben North <ben@redfrontdoor.org> 27 * Ben North <ben@redfrontdoor.org>
28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels 28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match 29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
30 * 30 *
31 * 31 *
32 * Current status: 32 * Current status:
33 * 33 *
34 * - provide conntrack confirmation for new and related connections, by 34 * - provide conntrack confirmation for new and related connections, by
35 * this way we can see their proper conntrack state in all hooks 35 * this way we can see their proper conntrack state in all hooks
36 * - support for all forwarding methods, not only NAT 36 * - support for all forwarding methods, not only NAT
37 * - FTP support (NAT), ability to support other NAT apps with expectations 37 * - FTP support (NAT), ability to support other NAT apps with expectations
38 * - to correctly create expectations for related NAT connections the proper 38 * - to correctly create expectations for related NAT connections the proper
39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires 39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables 40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41 * NAT rules are needed) 41 * NAT rules are needed)
42 * - alter reply for NAT when forwarding packet in original direction: 42 * - alter reply for NAT when forwarding packet in original direction:
43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or 43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44 * when RELATED conntrack is created from real server (Active FTP DATA) 44 * when RELATED conntrack is created from real server (Active FTP DATA)
45 * - if iptables_nat is not loaded the Passive FTP will not work (the 45 * - if iptables_nat is not loaded the Passive FTP will not work (the
46 * PASV response can not be NAT-ed) but Active FTP should work 46 * PASV response can not be NAT-ed) but Active FTP should work
47 * 47 *
48 */ 48 */
49 49
50 #define KMSG_COMPONENT "IPVS" 50 #define KMSG_COMPONENT "IPVS"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 52
53 #include <linux/module.h> 53 #include <linux/module.h>
54 #include <linux/types.h> 54 #include <linux/types.h>
55 #include <linux/kernel.h> 55 #include <linux/kernel.h>
56 #include <linux/errno.h> 56 #include <linux/errno.h>
57 #include <linux/compiler.h> 57 #include <linux/compiler.h>
58 #include <linux/vmalloc.h> 58 #include <linux/vmalloc.h>
59 #include <linux/skbuff.h> 59 #include <linux/skbuff.h>
60 #include <net/ip.h> 60 #include <net/ip.h>
61 #include <linux/netfilter.h> 61 #include <linux/netfilter.h>
62 #include <linux/netfilter_ipv4.h> 62 #include <linux/netfilter_ipv4.h>
63 #include <net/ip_vs.h> 63 #include <net/ip_vs.h>
64 #include <net/netfilter/nf_conntrack_core.h> 64 #include <net/netfilter/nf_conntrack_core.h>
65 #include <net/netfilter/nf_conntrack_expect.h> 65 #include <net/netfilter/nf_conntrack_expect.h>
66 #include <net/netfilter/nf_conntrack_helper.h> 66 #include <net/netfilter/nf_conntrack_helper.h>
67 #include <net/netfilter/nf_conntrack_zones.h> 67 #include <net/netfilter/nf_conntrack_zones.h>
68 68
69 69
70 #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" 70 #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71 #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ 71 #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ 72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
73 (T)->dst.protonum 73 (T)->dst.protonum
74 74
75 #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" 75 #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76 #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ 76 #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77 &((C)->vaddr.ip), ntohs((C)->vport), \ 77 &((C)->vaddr.ip), ntohs((C)->vport), \
78 &((C)->daddr.ip), ntohs((C)->dport), \ 78 &((C)->daddr.ip), ntohs((C)->dport), \
79 (C)->protocol, (C)->state 79 (C)->protocol, (C)->state
80 80
81 void 81 void
82 ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) 82 ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
83 { 83 {
84 enum ip_conntrack_info ctinfo; 84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 85 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple new_tuple; 86 struct nf_conntrack_tuple new_tuple;
87 87
88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || 88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
89 nf_ct_is_dying(ct)) 89 nf_ct_is_dying(ct))
90 return; 90 return;
91 91
92 /* Never alter conntrack for non-NAT conns */ 92 /* Never alter conntrack for non-NAT conns */
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return; 94 return;
95 95
96 /* Alter reply only in original direction */ 96 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return; 98 return;
99 99
100 /* 100 /*
101 * The connection is not yet in the hashtable, so we update it. 101 * The connection is not yet in the hashtable, so we update it.
102 * CIP->VIP will remain the same, so leave the tuple in 102 * CIP->VIP will remain the same, so leave the tuple in
103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the 103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104 * real-server we will see RIP->DIP. 104 * real-server we will see RIP->DIP.
105 */ 105 */
106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 /* 107 /*
108 * This will also take care of UDP and other protocols. 108 * This will also take care of UDP and other protocols.
109 */ 109 */
110 if (outin) { 110 if (outin) {
111 new_tuple.src.u3 = cp->daddr; 111 new_tuple.src.u3 = cp->daddr;
112 if (new_tuple.dst.protonum != IPPROTO_ICMP && 112 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113 new_tuple.dst.protonum != IPPROTO_ICMPV6) 113 new_tuple.dst.protonum != IPPROTO_ICMPV6)
114 new_tuple.src.u.tcp.port = cp->dport; 114 new_tuple.src.u.tcp.port = cp->dport;
115 } else { 115 } else {
116 new_tuple.dst.u3 = cp->vaddr; 116 new_tuple.dst.u3 = cp->vaddr;
117 if (new_tuple.dst.protonum != IPPROTO_ICMP && 117 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118 new_tuple.dst.protonum != IPPROTO_ICMPV6) 118 new_tuple.dst.protonum != IPPROTO_ICMPV6)
119 new_tuple.dst.u.tcp.port = cp->vport; 119 new_tuple.dst.u.tcp.port = cp->vport;
120 } 120 }
121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " 121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122 "ctinfo=%d, old reply=" FMT_TUPLE 122 "ctinfo=%d, old reply=" FMT_TUPLE
123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", 123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124 __func__, ct, ct->status, ctinfo, 124 __func__, ct, ct->status, ctinfo,
125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), 125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126 ARG_TUPLE(&new_tuple), ARG_CONN(cp)); 126 ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127 nf_conntrack_alter_reply(ct, &new_tuple); 127 nf_conntrack_alter_reply(ct, &new_tuple);
128 } 128 }
129 129
130 int ip_vs_confirm_conntrack(struct sk_buff *skb) 130 int ip_vs_confirm_conntrack(struct sk_buff *skb)
131 { 131 {
132 return nf_conntrack_confirm(skb); 132 return nf_conntrack_confirm(skb);
133 } 133 }
134 134
135 /* 135 /*
136 * Called from init_conntrack() as expectfn handler. 136 * Called from init_conntrack() as expectfn handler.
137 */ 137 */
138 static void ip_vs_nfct_expect_callback(struct nf_conn *ct, 138 static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139 struct nf_conntrack_expect *exp) 139 struct nf_conntrack_expect *exp)
140 { 140 {
141 struct nf_conntrack_tuple *orig, new_reply; 141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp; 142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p; 143 struct ip_vs_conn_param p;
144 struct net *net = nf_ct_net(ct); 144 struct net *net = nf_ct_net(ct);
145 145
146 if (exp->tuple.src.l3num != PF_INET) 146 if (exp->tuple.src.l3num != PF_INET)
147 return; 147 return;
148 148
149 /* 149 /*
150 * We assume that no NF locks are held before this callback. 150 * We assume that no NF locks are held before this callback.
151 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their 151 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
152 * expectations even if they use wildcard values, now we provide the 152 * expectations even if they use wildcard values, now we provide the
153 * actual values from the newly created original conntrack direction. 153 * actual values from the newly created original conntrack direction.
154 * The conntrack is confirmed when packet reaches IPVS hooks. 154 * The conntrack is confirmed when packet reaches IPVS hooks.
155 */ 155 */
156 156
157 /* RS->CLIENT */ 157 /* RS->CLIENT */
158 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 158 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
159 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, 159 ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
160 &orig->src.u3, orig->src.u.tcp.port, 160 &orig->src.u3, orig->src.u.tcp.port,
161 &orig->dst.u3, orig->dst.u.tcp.port, &p); 161 &orig->dst.u3, orig->dst.u.tcp.port, &p);
162 cp = ip_vs_conn_out_get(&p); 162 cp = ip_vs_conn_out_get(&p);
163 if (cp) { 163 if (cp) {
164 /* Change reply CLIENT->RS to CLIENT->VS */ 164 /* Change reply CLIENT->RS to CLIENT->VS */
165 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 165 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
166 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 166 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
167 FMT_TUPLE ", found inout cp=" FMT_CONN "\n", 167 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
168 __func__, ct, ct->status, 168 __func__, ct, ct->status,
169 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 169 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
170 ARG_CONN(cp)); 170 ARG_CONN(cp));
171 new_reply.dst.u3 = cp->vaddr; 171 new_reply.dst.u3 = cp->vaddr;
172 new_reply.dst.u.tcp.port = cp->vport; 172 new_reply.dst.u.tcp.port = cp->vport;
173 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE 173 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
174 ", inout cp=" FMT_CONN "\n", 174 ", inout cp=" FMT_CONN "\n",
175 __func__, ct, 175 __func__, ct,
176 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 176 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
177 ARG_CONN(cp)); 177 ARG_CONN(cp));
178 goto alter; 178 goto alter;
179 } 179 }
180 180
181 /* CLIENT->VS */ 181 /* CLIENT->VS */
182 cp = ip_vs_conn_in_get(&p); 182 cp = ip_vs_conn_in_get(&p);
183 if (cp) { 183 if (cp) {
184 /* Change reply VS->CLIENT to RS->CLIENT */ 184 /* Change reply VS->CLIENT to RS->CLIENT */
185 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 185 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
186 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 186 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
187 FMT_TUPLE ", found outin cp=" FMT_CONN "\n", 187 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
188 __func__, ct, ct->status, 188 __func__, ct, ct->status,
189 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 189 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
190 ARG_CONN(cp)); 190 ARG_CONN(cp));
191 new_reply.src.u3 = cp->daddr; 191 new_reply.src.u3 = cp->daddr;
192 new_reply.src.u.tcp.port = cp->dport; 192 new_reply.src.u.tcp.port = cp->dport;
193 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " 193 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
194 FMT_TUPLE ", outin cp=" FMT_CONN "\n", 194 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
195 __func__, ct, 195 __func__, ct,
196 ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 196 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
197 ARG_CONN(cp)); 197 ARG_CONN(cp));
198 goto alter; 198 goto alter;
199 } 199 }
200 200
201 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE 201 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
202 " - unknown expect\n", 202 " - unknown expect\n",
203 __func__, ct, ct->status, ARG_TUPLE(orig)); 203 __func__, ct, ct->status, ARG_TUPLE(orig));
204 return; 204 return;
205 205
206 alter: 206 alter:
207 /* Never alter conntrack for non-NAT conns */ 207 /* Never alter conntrack for non-NAT conns */
208 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) 208 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
209 nf_conntrack_alter_reply(ct, &new_reply); 209 nf_conntrack_alter_reply(ct, &new_reply);
210 ip_vs_conn_put(cp); 210 ip_vs_conn_put(cp);
211 return; 211 return;
212 } 212 }
213 213
214 /* 214 /*
215 * Create NF conntrack expectation with wildcard (optional) source port. 215 * Create NF conntrack expectation with wildcard (optional) source port.
216 * Then the default callback function will alter the reply and will confirm 216 * Then the default callback function will alter the reply and will confirm
217 * the conntrack entry when the first packet comes. 217 * the conntrack entry when the first packet comes.
218 * Use port 0 to expect connection from any port. 218 * Use port 0 to expect connection from any port.
219 */ 219 */
220 void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, 220 void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
221 struct ip_vs_conn *cp, u_int8_t proto, 221 struct ip_vs_conn *cp, u_int8_t proto,
222 const __be16 port, int from_rs) 222 const __be16 port, int from_rs)
223 { 223 {
224 struct nf_conntrack_expect *exp; 224 struct nf_conntrack_expect *exp;
225 225
226 if (ct == NULL || nf_ct_is_untracked(ct)) 226 if (ct == NULL || nf_ct_is_untracked(ct))
227 return; 227 return;
228 228
229 exp = nf_ct_expect_alloc(ct); 229 exp = nf_ct_expect_alloc(ct);
230 if (!exp) 230 if (!exp)
231 return; 231 return;
232 232
233 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), 233 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
234 from_rs ? &cp->daddr : &cp->caddr, 234 from_rs ? &cp->daddr : &cp->caddr,
235 from_rs ? &cp->caddr : &cp->vaddr, 235 from_rs ? &cp->caddr : &cp->vaddr,
236 proto, port ? &port : NULL, 236 proto, port ? &port : NULL,
237 from_rs ? &cp->cport : &cp->vport); 237 from_rs ? &cp->cport : &cp->vport);
238 238
239 exp->expectfn = ip_vs_nfct_expect_callback; 239 exp->expectfn = ip_vs_nfct_expect_callback;
240 240
241 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", 241 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
242 __func__, ct, ARG_TUPLE(&exp->tuple)); 242 __func__, ct, ARG_TUPLE(&exp->tuple));
243 nf_ct_expect_related(exp); 243 nf_ct_expect_related(exp);
244 nf_ct_expect_put(exp); 244 nf_ct_expect_put(exp);
245 } 245 }
246 EXPORT_SYMBOL(ip_vs_nfct_expect_related); 246 EXPORT_SYMBOL(ip_vs_nfct_expect_related);
247 247
248 /* 248 /*
249 * Our connection was terminated, try to drop the conntrack immediately 249 * Our connection was terminated, try to drop the conntrack immediately
250 */ 250 */
251 void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) 251 void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
252 { 252 {
253 struct nf_conntrack_tuple_hash *h; 253 struct nf_conntrack_tuple_hash *h;
254 struct nf_conn *ct; 254 struct nf_conn *ct;
255 struct nf_conntrack_tuple tuple; 255 struct nf_conntrack_tuple tuple;
256 256
257 if (!cp->cport) 257 if (!cp->cport)
258 return; 258 return;
259 259
260 tuple = (struct nf_conntrack_tuple) { 260 tuple = (struct nf_conntrack_tuple) {
261 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; 261 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
262 tuple.src.u3 = cp->caddr; 262 tuple.src.u3 = cp->caddr;
263 tuple.src.u.all = cp->cport; 263 tuple.src.u.all = cp->cport;
264 tuple.src.l3num = cp->af; 264 tuple.src.l3num = cp->af;
265 tuple.dst.u3 = cp->vaddr; 265 tuple.dst.u3 = cp->vaddr;
266 tuple.dst.u.all = cp->vport; 266 tuple.dst.u.all = cp->vport;
267 267
268 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE 268 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
269 " for conn " FMT_CONN "\n", 269 " for conn " FMT_CONN "\n",
270 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 270 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
271 271
272 h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, 272 h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
273 &tuple); 273 &tuple);
274 if (h) { 274 if (h) {
275 ct = nf_ct_tuplehash_to_ctrack(h); 275 ct = nf_ct_tuplehash_to_ctrack(h);
276 /* Show what happens instead of calling nf_ct_kill() */ 276 /* Show what happens instead of calling nf_ct_kill() */
277 if (del_timer(&ct->timeout)) { 277 if (del_timer(&ct->timeout)) {
278 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" 278 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
279 FMT_TUPLE "\n", 279 FMT_TUPLE "\n",
280 __func__, ct, ARG_TUPLE(&tuple)); 280 __func__, ct, ARG_TUPLE(&tuple));
281 if (ct->timeout.function) 281 if (ct->timeout.function)
282 ct->timeout.function(ct->timeout.data); 282 ct->timeout.function(ct->timeout.data);
283 } else { 283 } else {
284 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" 284 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
285 FMT_TUPLE "\n", 285 FMT_TUPLE "\n",
286 __func__, ct, ARG_TUPLE(&tuple)); 286 __func__, ct, ARG_TUPLE(&tuple));
287 } 287 }
288 nf_ct_put(ct); 288 nf_ct_put(ct);
289 } else { 289 } else {
290 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", 290 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
291 __func__, ARG_TUPLE(&tuple)); 291 __func__, ARG_TUPLE(&tuple));
292 } 292 }
293 } 293 }
294 294
295 295
net/netfilter/ipvs/ip_vs_xmit.c
1 /* 1 /*
2 * ip_vs_xmit.c: various packet transmitters for IPVS 2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 * 3 *
4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
5 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Changes: 12 * Changes:
13 * 13 *
14 * Description of forwarding methods: 14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and 15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD 16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example, 17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used 18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet 19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules: 20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet 22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL 23 * - the only place where we can see skb->sk != NULL
24 */ 24 */
25 25
26 #define KMSG_COMPONENT "IPVS" 26 #define KMSG_COMPONENT "IPVS"
27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28 28
29 #include <linux/kernel.h> 29 #include <linux/kernel.h>
30 #include <linux/slab.h> 30 #include <linux/slab.h>
31 #include <linux/tcp.h> /* for tcphdr */ 31 #include <linux/tcp.h> /* for tcphdr */
32 #include <net/ip.h> 32 #include <net/ip.h>
33 #include <net/tcp.h> /* for csum_tcpudp_magic */ 33 #include <net/tcp.h> /* for csum_tcpudp_magic */
34 #include <net/udp.h> 34 #include <net/udp.h>
35 #include <net/icmp.h> /* for icmp_send */ 35 #include <net/icmp.h> /* for icmp_send */
36 #include <net/route.h> /* for ip_route_output */ 36 #include <net/route.h> /* for ip_route_output */
37 #include <net/ipv6.h> 37 #include <net/ipv6.h>
38 #include <net/ip6_route.h> 38 #include <net/ip6_route.h>
39 #include <net/addrconf.h> 39 #include <net/addrconf.h>
40 #include <linux/icmpv6.h> 40 #include <linux/icmpv6.h>
41 #include <linux/netfilter.h> 41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv4.h> 42 #include <linux/netfilter_ipv4.h>
43 43
44 #include <net/ip_vs.h> 44 #include <net/ip_vs.h>
45 45
46 enum { 46 enum {
47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ 47 IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ 48 IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to 49 IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50 * local 50 * local
51 */ 51 */
52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
54 }; 54 };
55 55
56 /* 56 /*
57 * Destination cache to speed up outgoing route lookup 57 * Destination cache to speed up outgoing route lookup
58 */ 58 */
59 static inline void 59 static inline void
60 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, 60 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
61 u32 dst_cookie) 61 u32 dst_cookie)
62 { 62 {
63 struct dst_entry *old_dst; 63 struct dst_entry *old_dst;
64 64
65 old_dst = dest->dst_cache; 65 old_dst = dest->dst_cache;
66 dest->dst_cache = dst; 66 dest->dst_cache = dst;
67 dest->dst_rtos = rtos; 67 dest->dst_rtos = rtos;
68 dest->dst_cookie = dst_cookie; 68 dest->dst_cookie = dst_cookie;
69 dst_release(old_dst); 69 dst_release(old_dst);
70 } 70 }
71 71
72 static inline struct dst_entry * 72 static inline struct dst_entry *
73 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) 73 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
74 { 74 {
75 struct dst_entry *dst = dest->dst_cache; 75 struct dst_entry *dst = dest->dst_cache;
76 76
77 if (!dst) 77 if (!dst)
78 return NULL; 78 return NULL;
79 if ((dst->obsolete || rtos != dest->dst_rtos) && 79 if ((dst->obsolete || rtos != dest->dst_rtos) &&
80 dst->ops->check(dst, dest->dst_cookie) == NULL) { 80 dst->ops->check(dst, dest->dst_cookie) == NULL) {
81 dest->dst_cache = NULL; 81 dest->dst_cache = NULL;
82 dst_release(dst); 82 dst_release(dst);
83 return NULL; 83 return NULL;
84 } 84 }
85 dst_hold(dst); 85 dst_hold(dst);
86 return dst; 86 return dst;
87 } 87 }
88 88
89 static inline bool 89 static inline bool
90 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) 90 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
91 { 91 {
92 if (IP6CB(skb)->frag_max_size) { 92 if (IP6CB(skb)->frag_max_size) {
93 /* frag_max_size tell us that, this packet have been 93 /* frag_max_size tell us that, this packet have been
94 * defragmented by netfilter IPv6 conntrack module. 94 * defragmented by netfilter IPv6 conntrack module.
95 */ 95 */
96 if (IP6CB(skb)->frag_max_size > mtu) 96 if (IP6CB(skb)->frag_max_size > mtu)
97 return true; /* largest fragment violate MTU */ 97 return true; /* largest fragment violate MTU */
98 } 98 }
99 else if (skb->len > mtu && !skb_is_gso(skb)) { 99 else if (skb->len > mtu && !skb_is_gso(skb)) {
100 return true; /* Packet size violate MTU size */ 100 return true; /* Packet size violate MTU size */
101 } 101 }
102 return false; 102 return false;
103 } 103 }
104 104
105 /* Get route to daddr, update *saddr, optionally bind route to saddr */ 105 /* Get route to daddr, update *saddr, optionally bind route to saddr */
106 static struct rtable *do_output_route4(struct net *net, __be32 daddr, 106 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
107 u32 rtos, int rt_mode, __be32 *saddr) 107 u32 rtos, int rt_mode, __be32 *saddr)
108 { 108 {
109 struct flowi4 fl4; 109 struct flowi4 fl4;
110 struct rtable *rt; 110 struct rtable *rt;
111 int loop = 0; 111 int loop = 0;
112 112
113 memset(&fl4, 0, sizeof(fl4)); 113 memset(&fl4, 0, sizeof(fl4));
114 fl4.daddr = daddr; 114 fl4.daddr = daddr;
115 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; 115 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
116 fl4.flowi4_tos = rtos; 116 fl4.flowi4_tos = rtos;
117 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 117 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
118 FLOWI_FLAG_KNOWN_NH : 0; 118 FLOWI_FLAG_KNOWN_NH : 0;
119 119
120 retry: 120 retry:
121 rt = ip_route_output_key(net, &fl4); 121 rt = ip_route_output_key(net, &fl4);
122 if (IS_ERR(rt)) { 122 if (IS_ERR(rt)) {
123 /* Invalid saddr ? */ 123 /* Invalid saddr ? */
124 if (PTR_ERR(rt) == -EINVAL && *saddr && 124 if (PTR_ERR(rt) == -EINVAL && *saddr &&
125 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 125 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
126 *saddr = 0; 126 *saddr = 0;
127 flowi4_update_output(&fl4, 0, rtos, daddr, 0); 127 flowi4_update_output(&fl4, 0, rtos, daddr, 0);
128 goto retry; 128 goto retry;
129 } 129 }
130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
131 return NULL; 131 return NULL;
132 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 132 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
133 ip_rt_put(rt); 133 ip_rt_put(rt);
134 *saddr = fl4.saddr; 134 *saddr = fl4.saddr;
135 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); 135 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
136 loop++; 136 loop++;
137 goto retry; 137 goto retry;
138 } 138 }
139 *saddr = fl4.saddr; 139 *saddr = fl4.saddr;
140 return rt; 140 return rt;
141 } 141 }
142 142
143 /* Get route to destination or remote server */ 143 /* Get route to destination or remote server */
144 static struct rtable * 144 static struct rtable *
145 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 145 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
146 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) 146 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
147 { 147 {
148 struct net *net = dev_net(skb_dst(skb)->dev); 148 struct net *net = dev_net(skb_dst(skb)->dev);
149 struct rtable *rt; /* Route to the other host */ 149 struct rtable *rt; /* Route to the other host */
150 struct rtable *ort; /* Original route */ 150 struct rtable *ort; /* Original route */
151 int local; 151 int local;
152 152
153 if (dest) { 153 if (dest) {
154 spin_lock(&dest->dst_lock); 154 spin_lock(&dest->dst_lock);
155 if (!(rt = (struct rtable *) 155 if (!(rt = (struct rtable *)
156 __ip_vs_dst_check(dest, rtos))) { 156 __ip_vs_dst_check(dest, rtos))) {
157 rt = do_output_route4(net, dest->addr.ip, rtos, 157 rt = do_output_route4(net, dest->addr.ip, rtos,
158 rt_mode, &dest->dst_saddr.ip); 158 rt_mode, &dest->dst_saddr.ip);
159 if (!rt) { 159 if (!rt) {
160 spin_unlock(&dest->dst_lock); 160 spin_unlock(&dest->dst_lock);
161 return NULL; 161 return NULL;
162 } 162 }
163 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); 163 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
164 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " 164 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
165 "rtos=%X\n", 165 "rtos=%X\n",
166 &dest->addr.ip, &dest->dst_saddr.ip, 166 &dest->addr.ip, &dest->dst_saddr.ip,
167 atomic_read(&rt->dst.__refcnt), rtos); 167 atomic_read(&rt->dst.__refcnt), rtos);
168 } 168 }
169 daddr = dest->addr.ip; 169 daddr = dest->addr.ip;
170 if (ret_saddr) 170 if (ret_saddr)
171 *ret_saddr = dest->dst_saddr.ip; 171 *ret_saddr = dest->dst_saddr.ip;
172 spin_unlock(&dest->dst_lock); 172 spin_unlock(&dest->dst_lock);
173 } else { 173 } else {
174 __be32 saddr = htonl(INADDR_ANY); 174 __be32 saddr = htonl(INADDR_ANY);
175 175
176 /* For such unconfigured boxes avoid many route lookups 176 /* For such unconfigured boxes avoid many route lookups
177 * for performance reasons because we do not remember saddr 177 * for performance reasons because we do not remember saddr
178 */ 178 */
179 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 179 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
180 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); 180 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
181 if (!rt) 181 if (!rt)
182 return NULL; 182 return NULL;
183 if (ret_saddr) 183 if (ret_saddr)
184 *ret_saddr = saddr; 184 *ret_saddr = saddr;
185 } 185 }
186 186
187 local = rt->rt_flags & RTCF_LOCAL; 187 local = rt->rt_flags & RTCF_LOCAL;
188 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 188 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
189 rt_mode)) { 189 rt_mode)) {
190 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 190 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
191 (rt->rt_flags & RTCF_LOCAL) ? 191 (rt->rt_flags & RTCF_LOCAL) ?
192 "local":"non-local", &daddr); 192 "local":"non-local", &daddr);
193 ip_rt_put(rt); 193 ip_rt_put(rt);
194 return NULL; 194 return NULL;
195 } 195 }
196 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 196 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
197 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { 197 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
198 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " 198 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
199 "requires NAT method, dest: %pI4\n", 199 "requires NAT method, dest: %pI4\n",
200 &ip_hdr(skb)->daddr, &daddr); 200 &ip_hdr(skb)->daddr, &daddr);
201 ip_rt_put(rt); 201 ip_rt_put(rt);
202 return NULL; 202 return NULL;
203 } 203 }
204 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { 204 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
205 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " 205 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
206 "to non-local address, dest: %pI4\n", 206 "to non-local address, dest: %pI4\n",
207 &ip_hdr(skb)->saddr, &daddr); 207 &ip_hdr(skb)->saddr, &daddr);
208 ip_rt_put(rt); 208 ip_rt_put(rt);
209 return NULL; 209 return NULL;
210 } 210 }
211 211
212 return rt; 212 return rt;
213 } 213 }
214 214
215 /* Reroute packet to local IPv4 stack after DNAT */ 215 /* Reroute packet to local IPv4 stack after DNAT */
216 static int 216 static int
217 __ip_vs_reroute_locally(struct sk_buff *skb) 217 __ip_vs_reroute_locally(struct sk_buff *skb)
218 { 218 {
219 struct rtable *rt = skb_rtable(skb); 219 struct rtable *rt = skb_rtable(skb);
220 struct net_device *dev = rt->dst.dev; 220 struct net_device *dev = rt->dst.dev;
221 struct net *net = dev_net(dev); 221 struct net *net = dev_net(dev);
222 struct iphdr *iph = ip_hdr(skb); 222 struct iphdr *iph = ip_hdr(skb);
223 223
224 if (rt_is_input_route(rt)) { 224 if (rt_is_input_route(rt)) {
225 unsigned long orefdst = skb->_skb_refdst; 225 unsigned long orefdst = skb->_skb_refdst;
226 226
227 if (ip_route_input(skb, iph->daddr, iph->saddr, 227 if (ip_route_input(skb, iph->daddr, iph->saddr,
228 iph->tos, skb->dev)) 228 iph->tos, skb->dev))
229 return 0; 229 return 0;
230 refdst_drop(orefdst); 230 refdst_drop(orefdst);
231 } else { 231 } else {
232 struct flowi4 fl4 = { 232 struct flowi4 fl4 = {
233 .daddr = iph->daddr, 233 .daddr = iph->daddr,
234 .saddr = iph->saddr, 234 .saddr = iph->saddr,
235 .flowi4_tos = RT_TOS(iph->tos), 235 .flowi4_tos = RT_TOS(iph->tos),
236 .flowi4_mark = skb->mark, 236 .flowi4_mark = skb->mark,
237 }; 237 };
238 238
239 rt = ip_route_output_key(net, &fl4); 239 rt = ip_route_output_key(net, &fl4);
240 if (IS_ERR(rt)) 240 if (IS_ERR(rt))
241 return 0; 241 return 0;
242 if (!(rt->rt_flags & RTCF_LOCAL)) { 242 if (!(rt->rt_flags & RTCF_LOCAL)) {
243 ip_rt_put(rt); 243 ip_rt_put(rt);
244 return 0; 244 return 0;
245 } 245 }
246 /* Drop old route. */ 246 /* Drop old route. */
247 skb_dst_drop(skb); 247 skb_dst_drop(skb);
248 skb_dst_set(skb, &rt->dst); 248 skb_dst_set(skb, &rt->dst);
249 } 249 }
250 return 1; 250 return 1;
251 } 251 }
252 252
253 #ifdef CONFIG_IP_VS_IPV6 253 #ifdef CONFIG_IP_VS_IPV6
254 254
255 static inline int __ip_vs_is_local_route6(struct rt6_info *rt) 255 static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
256 { 256 {
257 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; 257 return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
258 } 258 }
259 259
260 static struct dst_entry * 260 static struct dst_entry *
261 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, 261 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
262 struct in6_addr *ret_saddr, int do_xfrm) 262 struct in6_addr *ret_saddr, int do_xfrm)
263 { 263 {
264 struct dst_entry *dst; 264 struct dst_entry *dst;
265 struct flowi6 fl6 = { 265 struct flowi6 fl6 = {
266 .daddr = *daddr, 266 .daddr = *daddr,
267 }; 267 };
268 268
269 dst = ip6_route_output(net, NULL, &fl6); 269 dst = ip6_route_output(net, NULL, &fl6);
270 if (dst->error) 270 if (dst->error)
271 goto out_err; 271 goto out_err;
272 if (!ret_saddr) 272 if (!ret_saddr)
273 return dst; 273 return dst;
274 if (ipv6_addr_any(&fl6.saddr) && 274 if (ipv6_addr_any(&fl6.saddr) &&
275 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, 275 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
276 &fl6.daddr, 0, &fl6.saddr) < 0) 276 &fl6.daddr, 0, &fl6.saddr) < 0)
277 goto out_err; 277 goto out_err;
278 if (do_xfrm) { 278 if (do_xfrm) {
279 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 279 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
280 if (IS_ERR(dst)) { 280 if (IS_ERR(dst)) {
281 dst = NULL; 281 dst = NULL;
282 goto out_err; 282 goto out_err;
283 } 283 }
284 } 284 }
285 *ret_saddr = fl6.saddr; 285 *ret_saddr = fl6.saddr;
286 return dst; 286 return dst;
287 287
288 out_err: 288 out_err:
289 dst_release(dst); 289 dst_release(dst);
290 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); 290 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
291 return NULL; 291 return NULL;
292 } 292 }
293 293
294 /* 294 /*
295 * Get route to destination or remote server 295 * Get route to destination or remote server
296 */ 296 */
297 static struct rt6_info * 297 static struct rt6_info *
298 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, 298 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
299 struct in6_addr *daddr, struct in6_addr *ret_saddr, 299 struct in6_addr *daddr, struct in6_addr *ret_saddr,
300 int do_xfrm, int rt_mode) 300 int do_xfrm, int rt_mode)
301 { 301 {
302 struct net *net = dev_net(skb_dst(skb)->dev); 302 struct net *net = dev_net(skb_dst(skb)->dev);
303 struct rt6_info *rt; /* Route to the other host */ 303 struct rt6_info *rt; /* Route to the other host */
304 struct rt6_info *ort; /* Original route */ 304 struct rt6_info *ort; /* Original route */
305 struct dst_entry *dst; 305 struct dst_entry *dst;
306 int local; 306 int local;
307 307
308 if (dest) { 308 if (dest) {
309 spin_lock(&dest->dst_lock); 309 spin_lock(&dest->dst_lock);
310 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); 310 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
311 if (!rt) { 311 if (!rt) {
312 u32 cookie; 312 u32 cookie;
313 313
314 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 314 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
315 &dest->dst_saddr.in6, 315 &dest->dst_saddr.in6,
316 do_xfrm); 316 do_xfrm);
317 if (!dst) { 317 if (!dst) {
318 spin_unlock(&dest->dst_lock); 318 spin_unlock(&dest->dst_lock);
319 return NULL; 319 return NULL;
320 } 320 }
321 rt = (struct rt6_info *) dst; 321 rt = (struct rt6_info *) dst;
322 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 322 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
323 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); 323 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
324 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 324 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
325 &dest->addr.in6, &dest->dst_saddr.in6, 325 &dest->addr.in6, &dest->dst_saddr.in6,
326 atomic_read(&rt->dst.__refcnt)); 326 atomic_read(&rt->dst.__refcnt));
327 } 327 }
328 if (ret_saddr) 328 if (ret_saddr)
329 *ret_saddr = dest->dst_saddr.in6; 329 *ret_saddr = dest->dst_saddr.in6;
330 spin_unlock(&dest->dst_lock); 330 spin_unlock(&dest->dst_lock);
331 } else { 331 } else {
332 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 332 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
333 if (!dst) 333 if (!dst)
334 return NULL; 334 return NULL;
335 rt = (struct rt6_info *) dst; 335 rt = (struct rt6_info *) dst;
336 } 336 }
337 337
338 local = __ip_vs_is_local_route6(rt); 338 local = __ip_vs_is_local_route6(rt);
339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 339 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340 rt_mode)) { 340 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", 341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 342 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 343 dst_release(&rt->dst);
344 return NULL; 344 return NULL;
345 } 345 }
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 347 !((ort = (struct rt6_info *) skb_dst(skb)) &&
348 __ip_vs_is_local_route6(ort))) { 348 __ip_vs_is_local_route6(ort))) {
349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " 349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
350 "requires NAT method, dest: %pI6c\n", 350 "requires NAT method, dest: %pI6c\n",
351 &ipv6_hdr(skb)->daddr, daddr); 351 &ipv6_hdr(skb)->daddr, daddr);
352 dst_release(&rt->dst); 352 dst_release(&rt->dst);
353 return NULL; 353 return NULL;
354 } 354 }
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
357 IPV6_ADDR_LOOPBACK)) { 357 IPV6_ADDR_LOOPBACK)) {
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " 358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
359 "to non-local address, dest: %pI6c\n", 359 "to non-local address, dest: %pI6c\n",
360 &ipv6_hdr(skb)->saddr, daddr); 360 &ipv6_hdr(skb)->saddr, daddr);
361 dst_release(&rt->dst); 361 dst_release(&rt->dst);
362 return NULL; 362 return NULL;
363 } 363 }
364 364
365 return rt; 365 return rt;
366 } 366 }
367 #endif 367 #endif
368 368
369 369
370 /* 370 /*
371 * Release dest->dst_cache before a dest is removed 371 * Release dest->dst_cache before a dest is removed
372 */ 372 */
373 void 373 void
374 ip_vs_dst_reset(struct ip_vs_dest *dest) 374 ip_vs_dst_reset(struct ip_vs_dest *dest)
375 { 375 {
376 struct dst_entry *old_dst; 376 struct dst_entry *old_dst;
377 377
378 old_dst = dest->dst_cache; 378 old_dst = dest->dst_cache;
379 dest->dst_cache = NULL; 379 dest->dst_cache = NULL;
380 dst_release(old_dst); 380 dst_release(old_dst);
381 dest->dst_saddr.ip = 0; 381 dest->dst_saddr.ip = 0;
382 } 382 }
383 383
384 #define IP_VS_XMIT_TUNNEL(skb, cp) \ 384 #define IP_VS_XMIT_TUNNEL(skb, cp) \
385 ({ \ 385 ({ \
386 int __ret = NF_ACCEPT; \ 386 int __ret = NF_ACCEPT; \
387 \ 387 \
388 (skb)->ipvs_property = 1; \ 388 (skb)->ipvs_property = 1; \
389 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ 389 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
390 __ret = ip_vs_confirm_conntrack(skb); \ 390 __ret = ip_vs_confirm_conntrack(skb); \
391 if (__ret == NF_ACCEPT) { \ 391 if (__ret == NF_ACCEPT) { \
392 nf_reset(skb); \ 392 nf_reset(skb); \
393 skb_forward_csum(skb); \ 393 skb_forward_csum(skb); \
394 } \ 394 } \
395 __ret; \ 395 __ret; \
396 }) 396 })
397 397
398 #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ 398 #define IP_VS_XMIT_NAT(pf, skb, cp, local) \
399 do { \ 399 do { \
400 (skb)->ipvs_property = 1; \ 400 (skb)->ipvs_property = 1; \
401 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 401 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
402 ip_vs_notrack(skb); \ 402 ip_vs_notrack(skb); \
403 else \ 403 else \
404 ip_vs_update_conntrack(skb, cp, 1); \ 404 ip_vs_update_conntrack(skb, cp, 1); \
405 if (local) \ 405 if (local) \
406 return NF_ACCEPT; \ 406 return NF_ACCEPT; \
407 skb_forward_csum(skb); \ 407 skb_forward_csum(skb); \
408 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 408 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
409 skb_dst(skb)->dev, dst_output); \ 409 skb_dst(skb)->dev, dst_output); \
410 } while (0) 410 } while (0)
411 411
412 #define IP_VS_XMIT(pf, skb, cp, local) \ 412 #define IP_VS_XMIT(pf, skb, cp, local) \
413 do { \ 413 do { \
414 (skb)->ipvs_property = 1; \ 414 (skb)->ipvs_property = 1; \
415 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ 415 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
416 ip_vs_notrack(skb); \ 416 ip_vs_notrack(skb); \
417 if (local) \ 417 if (local) \
418 return NF_ACCEPT; \ 418 return NF_ACCEPT; \
419 skb_forward_csum(skb); \ 419 skb_forward_csum(skb); \
420 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 420 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
421 skb_dst(skb)->dev, dst_output); \ 421 skb_dst(skb)->dev, dst_output); \
422 } while (0) 422 } while (0)
423 423
424 424
425 /* 425 /*
426 * NULL transmitter (do nothing except return NF_ACCEPT) 426 * NULL transmitter (do nothing except return NF_ACCEPT)
427 */ 427 */
428 int 428 int
429 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 429 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431 { 431 {
432 /* we do not touch skb and do not need pskb ptr */ 432 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
434 } 434 }
435 435
436 436
437 /* 437 /*
438 * Bypass transmitter 438 * Bypass transmitter
439 * Let packets bypass the destination when the destination is not 439 * Let packets bypass the destination when the destination is not
440 * available, it may be only used in transparent cache cluster. 440 * available, it may be only used in transparent cache cluster.
441 */ 441 */
442 int 442 int
443 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 443 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445 { 445 {
446 struct rtable *rt; /* Route to the other host */ 446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 447 struct iphdr *iph = ip_hdr(skb);
448 int mtu; 448 int mtu;
449 449
450 EnterFunction(10); 450 EnterFunction(10);
451 451
452 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), 452 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
453 IP_VS_RT_MODE_NON_LOCAL, NULL))) 453 IP_VS_RT_MODE_NON_LOCAL, NULL)))
454 goto tx_error_icmp; 454 goto tx_error_icmp;
455 455
456 /* MTU checking */ 456 /* MTU checking */
457 mtu = dst_mtu(&rt->dst); 457 mtu = dst_mtu(&rt->dst);
458 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 458 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
459 !skb_is_gso(skb)) { 459 !skb_is_gso(skb)) {
460 ip_rt_put(rt); 460 ip_rt_put(rt);
461 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 461 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
462 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 462 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
463 goto tx_error; 463 goto tx_error;
464 } 464 }
465 465
466 /* 466 /*
467 * Call ip_send_check because we are not sure it is called 467 * Call ip_send_check because we are not sure it is called
468 * after ip_defrag. Is copy-on-write needed? 468 * after ip_defrag. Is copy-on-write needed?
469 */ 469 */
470 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { 470 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
471 ip_rt_put(rt); 471 ip_rt_put(rt);
472 return NF_STOLEN; 472 return NF_STOLEN;
473 } 473 }
474 ip_send_check(ip_hdr(skb)); 474 ip_send_check(ip_hdr(skb));
475 475
476 /* drop old route */ 476 /* drop old route */
477 skb_dst_drop(skb); 477 skb_dst_drop(skb);
478 skb_dst_set(skb, &rt->dst); 478 skb_dst_set(skb, &rt->dst);
479 479
480 /* Another hack: avoid icmp_send in ip_fragment */ 480 /* Another hack: avoid icmp_send in ip_fragment */
481 skb->local_df = 1; 481 skb->local_df = 1;
482 482
483 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 483 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
484 484
485 LeaveFunction(10); 485 LeaveFunction(10);
486 return NF_STOLEN; 486 return NF_STOLEN;
487 487
488 tx_error_icmp: 488 tx_error_icmp:
489 dst_link_failure(skb); 489 dst_link_failure(skb);
490 tx_error: 490 tx_error:
491 kfree_skb(skb); 491 kfree_skb(skb);
492 LeaveFunction(10); 492 LeaveFunction(10);
493 return NF_STOLEN; 493 return NF_STOLEN;
494 } 494 }
495 495
496 #ifdef CONFIG_IP_VS_IPV6 496 #ifdef CONFIG_IP_VS_IPV6
497 int 497 int
498 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 498 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
500 { 500 {
501 struct rt6_info *rt; /* Route to the other host */ 501 struct rt6_info *rt; /* Route to the other host */
502 int mtu; 502 int mtu;
503 503
504 EnterFunction(10); 504 EnterFunction(10);
505 505
506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, 506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
507 IP_VS_RT_MODE_NON_LOCAL); 507 IP_VS_RT_MODE_NON_LOCAL);
508 if (!rt) 508 if (!rt)
509 goto tx_error_icmp; 509 goto tx_error_icmp;
510 510
511 /* MTU checking */ 511 /* MTU checking */
512 mtu = dst_mtu(&rt->dst); 512 mtu = dst_mtu(&rt->dst);
513 if (__mtu_check_toobig_v6(skb, mtu)) { 513 if (__mtu_check_toobig_v6(skb, mtu)) {
514 if (!skb->dev) { 514 if (!skb->dev) {
515 struct net *net = dev_net(skb_dst(skb)->dev); 515 struct net *net = dev_net(skb_dst(skb)->dev);
516 516
517 skb->dev = net->loopback_dev; 517 skb->dev = net->loopback_dev;
518 } 518 }
519 /* only send ICMP too big on first fragment */ 519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs) 520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
522 dst_release(&rt->dst); 522 dst_release(&rt->dst);
523 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
524 goto tx_error; 524 goto tx_error;
525 } 525 }
526 526
527 /* 527 /*
528 * Call ip_send_check because we are not sure it is called 528 * Call ip_send_check because we are not sure it is called
529 * after ip_defrag. Is copy-on-write needed? 529 * after ip_defrag. Is copy-on-write needed?
530 */ 530 */
531 skb = skb_share_check(skb, GFP_ATOMIC); 531 skb = skb_share_check(skb, GFP_ATOMIC);
532 if (unlikely(skb == NULL)) { 532 if (unlikely(skb == NULL)) {
533 dst_release(&rt->dst); 533 dst_release(&rt->dst);
534 return NF_STOLEN; 534 return NF_STOLEN;
535 } 535 }
536 536
537 /* drop old route */ 537 /* drop old route */
538 skb_dst_drop(skb); 538 skb_dst_drop(skb);
539 skb_dst_set(skb, &rt->dst); 539 skb_dst_set(skb, &rt->dst);
540 540
541 /* Another hack: avoid icmp_send in ip_fragment */ 541 /* Another hack: avoid icmp_send in ip_fragment */
542 skb->local_df = 1; 542 skb->local_df = 1;
543 543
544 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 544 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
545 545
546 LeaveFunction(10); 546 LeaveFunction(10);
547 return NF_STOLEN; 547 return NF_STOLEN;
548 548
549 tx_error_icmp: 549 tx_error_icmp:
550 dst_link_failure(skb); 550 dst_link_failure(skb);
551 tx_error: 551 tx_error:
552 kfree_skb(skb); 552 kfree_skb(skb);
553 LeaveFunction(10); 553 LeaveFunction(10);
554 return NF_STOLEN; 554 return NF_STOLEN;
555 } 555 }
556 #endif 556 #endif
557 557
558 /* 558 /*
559 * NAT transmitter (only for outside-to-inside nat forwarding) 559 * NAT transmitter (only for outside-to-inside nat forwarding)
560 * Not used for related ICMP 560 * Not used for related ICMP
561 */ 561 */
562 int 562 int
563 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 563 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
565 { 565 {
566 struct rtable *rt; /* Route to the other host */ 566 struct rtable *rt; /* Route to the other host */
567 int mtu; 567 int mtu;
568 struct iphdr *iph = ip_hdr(skb); 568 struct iphdr *iph = ip_hdr(skb);
569 int local; 569 int local;
570 570
571 EnterFunction(10); 571 EnterFunction(10);
572 572
573 /* check if it is a connection of no-client-port */ 573 /* check if it is a connection of no-client-port */
574 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 574 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
575 __be16 _pt, *p; 575 __be16 _pt, *p;
576 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); 576 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
577 if (p == NULL) 577 if (p == NULL)
578 goto tx_error; 578 goto tx_error;
579 ip_vs_conn_fill_cport(cp, *p); 579 ip_vs_conn_fill_cport(cp, *p);
580 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 580 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
581 } 581 }
582 582
583 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 583 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
584 RT_TOS(iph->tos), 584 RT_TOS(iph->tos),
585 IP_VS_RT_MODE_LOCAL | 585 IP_VS_RT_MODE_LOCAL |
586 IP_VS_RT_MODE_NON_LOCAL | 586 IP_VS_RT_MODE_NON_LOCAL |
587 IP_VS_RT_MODE_RDR, NULL))) 587 IP_VS_RT_MODE_RDR, NULL)))
588 goto tx_error_icmp; 588 goto tx_error_icmp;
589 local = rt->rt_flags & RTCF_LOCAL; 589 local = rt->rt_flags & RTCF_LOCAL;
590 /* 590 /*
591 * Avoid duplicate tuple in reply direction for NAT traffic 591 * Avoid duplicate tuple in reply direction for NAT traffic
592 * to local address when connection is sync-ed 592 * to local address when connection is sync-ed
593 */ 593 */
594 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 594 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
595 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 595 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
596 enum ip_conntrack_info ctinfo; 596 enum ip_conntrack_info ctinfo;
597 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 597 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
598 598
599 if (ct && !nf_ct_is_untracked(ct)) { 599 if (ct && !nf_ct_is_untracked(ct)) {
600 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 600 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
601 "ip_vs_nat_xmit(): " 601 "ip_vs_nat_xmit(): "
602 "stopping DNAT to local address"); 602 "stopping DNAT to local address");
603 goto tx_error_put; 603 goto tx_error_put;
604 } 604 }
605 } 605 }
606 #endif 606 #endif
607 607
608 /* From world but DNAT to loopback address? */ 608 /* From world but DNAT to loopback address? */
609 if (local && ipv4_is_loopback(cp->daddr.ip) && 609 if (local && ipv4_is_loopback(cp->daddr.ip) &&
610 rt_is_input_route(skb_rtable(skb))) { 610 rt_is_input_route(skb_rtable(skb))) {
611 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 611 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
612 "stopping DNAT to loopback address"); 612 "stopping DNAT to loopback address");
613 goto tx_error_put; 613 goto tx_error_put;
614 } 614 }
615 615
616 /* MTU checking */ 616 /* MTU checking */
617 mtu = dst_mtu(&rt->dst); 617 mtu = dst_mtu(&rt->dst);
618 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 618 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
619 !skb_is_gso(skb)) { 619 !skb_is_gso(skb)) {
620 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 620 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
621 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 621 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
622 "ip_vs_nat_xmit(): frag needed for"); 622 "ip_vs_nat_xmit(): frag needed for");
623 goto tx_error_put; 623 goto tx_error_put;
624 } 624 }
625 625
626 /* copy-on-write the packet before mangling it */ 626 /* copy-on-write the packet before mangling it */
627 if (!skb_make_writable(skb, sizeof(struct iphdr))) 627 if (!skb_make_writable(skb, sizeof(struct iphdr)))
628 goto tx_error_put; 628 goto tx_error_put;
629 629
630 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 630 if (skb_cow(skb, rt->dst.dev->hard_header_len))
631 goto tx_error_put; 631 goto tx_error_put;
632 632
633 /* mangle the packet */ 633 /* mangle the packet */
634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
635 goto tx_error_put; 635 goto tx_error_put;
636 ip_hdr(skb)->daddr = cp->daddr.ip; 636 ip_hdr(skb)->daddr = cp->daddr.ip;
637 ip_send_check(ip_hdr(skb)); 637 ip_send_check(ip_hdr(skb));
638 638
639 if (!local) { 639 if (!local) {
640 /* drop old route */ 640 /* drop old route */
641 skb_dst_drop(skb); 641 skb_dst_drop(skb);
642 skb_dst_set(skb, &rt->dst); 642 skb_dst_set(skb, &rt->dst);
643 } else { 643 } else {
644 ip_rt_put(rt); 644 ip_rt_put(rt);
645 /* 645 /*
646 * Some IPv4 replies get local address from routes, 646 * Some IPv4 replies get local address from routes,
647 * not from iph, so while we DNAT after routing 647 * not from iph, so while we DNAT after routing
648 * we need this second input/output route. 648 * we need this second input/output route.
649 */ 649 */
650 if (!__ip_vs_reroute_locally(skb)) 650 if (!__ip_vs_reroute_locally(skb))
651 goto tx_error; 651 goto tx_error;
652 } 652 }
653 653
654 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 654 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
655 655
656 /* FIXME: when application helper enlarges the packet and the length 656 /* FIXME: when application helper enlarges the packet and the length
657 is larger than the MTU of outgoing device, there will be still 657 is larger than the MTU of outgoing device, there will be still
658 MTU problem. */ 658 MTU problem. */
659 659
660 /* Another hack: avoid icmp_send in ip_fragment */ 660 /* Another hack: avoid icmp_send in ip_fragment */
661 skb->local_df = 1; 661 skb->local_df = 1;
662 662
663 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 663 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
664 664
665 LeaveFunction(10); 665 LeaveFunction(10);
666 return NF_STOLEN; 666 return NF_STOLEN;
667 667
668 tx_error_icmp: 668 tx_error_icmp:
669 dst_link_failure(skb); 669 dst_link_failure(skb);
670 tx_error: 670 tx_error:
671 kfree_skb(skb); 671 kfree_skb(skb);
672 LeaveFunction(10); 672 LeaveFunction(10);
673 return NF_STOLEN; 673 return NF_STOLEN;
674 tx_error_put: 674 tx_error_put:
675 ip_rt_put(rt); 675 ip_rt_put(rt);
676 goto tx_error; 676 goto tx_error;
677 } 677 }
678 678
679 #ifdef CONFIG_IP_VS_IPV6 679 #ifdef CONFIG_IP_VS_IPV6
680 int 680 int
681 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 681 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
683 { 683 {
684 struct rt6_info *rt; /* Route to the other host */ 684 struct rt6_info *rt; /* Route to the other host */
685 int mtu; 685 int mtu;
686 int local; 686 int local;
687 687
688 EnterFunction(10); 688 EnterFunction(10);
689 689
690 /* check if it is a connection of no-client-port */ 690 /* check if it is a connection of no-client-port */
691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { 691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
692 __be16 _pt, *p; 692 __be16 _pt, *p;
693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); 693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
694 if (p == NULL) 694 if (p == NULL)
695 goto tx_error; 695 goto tx_error;
696 ip_vs_conn_fill_cport(cp, *p); 696 ip_vs_conn_fill_cport(cp, *p);
697 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 697 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
698 } 698 }
699 699
700 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 700 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
701 0, (IP_VS_RT_MODE_LOCAL | 701 0, (IP_VS_RT_MODE_LOCAL |
702 IP_VS_RT_MODE_NON_LOCAL | 702 IP_VS_RT_MODE_NON_LOCAL |
703 IP_VS_RT_MODE_RDR)))) 703 IP_VS_RT_MODE_RDR))))
704 goto tx_error_icmp; 704 goto tx_error_icmp;
705 local = __ip_vs_is_local_route6(rt); 705 local = __ip_vs_is_local_route6(rt);
706 /* 706 /*
707 * Avoid duplicate tuple in reply direction for NAT traffic 707 * Avoid duplicate tuple in reply direction for NAT traffic
708 * to local address when connection is sync-ed 708 * to local address when connection is sync-ed
709 */ 709 */
710 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 710 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
711 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 711 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
712 enum ip_conntrack_info ctinfo; 712 enum ip_conntrack_info ctinfo;
713 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 713 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
714 714
715 if (ct && !nf_ct_is_untracked(ct)) { 715 if (ct && !nf_ct_is_untracked(ct)) {
716 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 716 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
717 "ip_vs_nat_xmit_v6(): " 717 "ip_vs_nat_xmit_v6(): "
718 "stopping DNAT to local address"); 718 "stopping DNAT to local address");
719 goto tx_error_put; 719 goto tx_error_put;
720 } 720 }
721 } 721 }
722 #endif 722 #endif
723 723
724 /* From world but DNAT to loopback address? */ 724 /* From world but DNAT to loopback address? */
725 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 725 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
726 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 726 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
727 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 727 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
728 "ip_vs_nat_xmit_v6(): " 728 "ip_vs_nat_xmit_v6(): "
729 "stopping DNAT to loopback address"); 729 "stopping DNAT to loopback address");
730 goto tx_error_put; 730 goto tx_error_put;
731 } 731 }
732 732
733 /* MTU checking */ 733 /* MTU checking */
734 mtu = dst_mtu(&rt->dst); 734 mtu = dst_mtu(&rt->dst);
735 if (__mtu_check_toobig_v6(skb, mtu)) { 735 if (__mtu_check_toobig_v6(skb, mtu)) {
736 if (!skb->dev) { 736 if (!skb->dev) {
737 struct net *net = dev_net(skb_dst(skb)->dev); 737 struct net *net = dev_net(skb_dst(skb)->dev);
738 738
739 skb->dev = net->loopback_dev; 739 skb->dev = net->loopback_dev;
740 } 740 }
741 /* only send ICMP too big on first fragment */ 741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs) 742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, 744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
745 "ip_vs_nat_xmit_v6(): frag needed for"); 745 "ip_vs_nat_xmit_v6(): frag needed for");
746 goto tx_error_put; 746 goto tx_error_put;
747 } 747 }
748 748
749 /* copy-on-write the packet before mangling it */ 749 /* copy-on-write the packet before mangling it */
750 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 750 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
751 goto tx_error_put; 751 goto tx_error_put;
752 752
753 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 753 if (skb_cow(skb, rt->dst.dev->hard_header_len))
754 goto tx_error_put; 754 goto tx_error_put;
755 755
756 /* mangle the packet */ 756 /* mangle the packet */
757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) 757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
758 goto tx_error; 758 goto tx_error;
759 ipv6_hdr(skb)->daddr = cp->daddr.in6; 759 ipv6_hdr(skb)->daddr = cp->daddr.in6;
760 760
761 if (!local || !skb->dev) { 761 if (!local || !skb->dev) {
762 /* drop the old route when skb is not shared */ 762 /* drop the old route when skb is not shared */
763 skb_dst_drop(skb); 763 skb_dst_drop(skb);
764 skb_dst_set(skb, &rt->dst); 764 skb_dst_set(skb, &rt->dst);
765 } else { 765 } else {
766 /* destined to loopback, do we need to change route? */ 766 /* destined to loopback, do we need to change route? */
767 dst_release(&rt->dst); 767 dst_release(&rt->dst);
768 } 768 }
769 769
770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
771 771
772 /* FIXME: when application helper enlarges the packet and the length 772 /* FIXME: when application helper enlarges the packet and the length
773 is larger than the MTU of outgoing device, there will be still 773 is larger than the MTU of outgoing device, there will be still
774 MTU problem. */ 774 MTU problem. */
775 775
776 /* Another hack: avoid icmp_send in ip_fragment */ 776 /* Another hack: avoid icmp_send in ip_fragment */
777 skb->local_df = 1; 777 skb->local_df = 1;
778 778
779 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 779 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
780 780
781 LeaveFunction(10); 781 LeaveFunction(10);
782 return NF_STOLEN; 782 return NF_STOLEN;
783 783
784 tx_error_icmp: 784 tx_error_icmp:
785 dst_link_failure(skb); 785 dst_link_failure(skb);
786 tx_error: 786 tx_error:
787 LeaveFunction(10); 787 LeaveFunction(10);
788 kfree_skb(skb); 788 kfree_skb(skb);
789 return NF_STOLEN; 789 return NF_STOLEN;
790 tx_error_put: 790 tx_error_put:
791 dst_release(&rt->dst); 791 dst_release(&rt->dst);
792 goto tx_error; 792 goto tx_error;
793 } 793 }
794 #endif 794 #endif
795 795
796 796
797 /* 797 /*
798 * IP Tunneling transmitter 798 * IP Tunneling transmitter
799 * 799 *
800 * This function encapsulates the packet in a new IP packet, its 800 * This function encapsulates the packet in a new IP packet, its
801 * destination will be set to cp->daddr. Most code of this function 801 * destination will be set to cp->daddr. Most code of this function
802 * is taken from ipip.c. 802 * is taken from ipip.c.
803 * 803 *
804 * It is used in VS/TUN cluster. The load balancer selects a real 804 * It is used in VS/TUN cluster. The load balancer selects a real
805 * server from a cluster based on a scheduling algorithm, 805 * server from a cluster based on a scheduling algorithm,
806 * encapsulates the request packet and forwards it to the selected 806 * encapsulates the request packet and forwards it to the selected
807 * server. For example, all real servers are configured with 807 * server. For example, all real servers are configured with
808 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives 808 * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
809 * the encapsulated packet, it will decapsulate the packet, processe 809 * the encapsulated packet, it will decapsulate the packet, processe
810 * the request and return the response packets directly to the client 810 * the request and return the response packets directly to the client
811 * without passing the load balancer. This can greatly increase the 811 * without passing the load balancer. This can greatly increase the
812 * scalability of virtual server. 812 * scalability of virtual server.
813 * 813 *
814 * Used for ANY protocol 814 * Used for ANY protocol
815 */ 815 */
816 int 816 int
817 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 817 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
818 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 818 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
819 { 819 {
820 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 820 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
821 struct rtable *rt; /* Route to the other host */ 821 struct rtable *rt; /* Route to the other host */
822 __be32 saddr; /* Source for tunnel */ 822 __be32 saddr; /* Source for tunnel */
823 struct net_device *tdev; /* Device to other host */ 823 struct net_device *tdev; /* Device to other host */
824 struct iphdr *old_iph = ip_hdr(skb); 824 struct iphdr *old_iph = ip_hdr(skb);
825 u8 tos = old_iph->tos; 825 u8 tos = old_iph->tos;
826 __be16 df; 826 __be16 df;
827 struct iphdr *iph; /* Our new IP header */ 827 struct iphdr *iph; /* Our new IP header */
828 unsigned int max_headroom; /* The extra header space needed */ 828 unsigned int max_headroom; /* The extra header space needed */
829 int mtu; 829 int mtu;
830 int ret; 830 int ret;
831 831
832 EnterFunction(10); 832 EnterFunction(10);
833 833
834 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 834 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
835 RT_TOS(tos), IP_VS_RT_MODE_LOCAL | 835 RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
836 IP_VS_RT_MODE_NON_LOCAL | 836 IP_VS_RT_MODE_NON_LOCAL |
837 IP_VS_RT_MODE_CONNECT, 837 IP_VS_RT_MODE_CONNECT,
838 &saddr))) 838 &saddr)))
839 goto tx_error_icmp; 839 goto tx_error_icmp;
840 if (rt->rt_flags & RTCF_LOCAL) { 840 if (rt->rt_flags & RTCF_LOCAL) {
841 ip_rt_put(rt); 841 ip_rt_put(rt);
842 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 842 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
843 } 843 }
844 844
845 tdev = rt->dst.dev; 845 tdev = rt->dst.dev;
846 846
847 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 847 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
848 if (mtu < 68) { 848 if (mtu < 68) {
849 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 849 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
850 goto tx_error_put; 850 goto tx_error_put;
851 } 851 }
852 if (rt_is_output_route(skb_rtable(skb))) 852 if (rt_is_output_route(skb_rtable(skb)))
853 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 853 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
854 854
855 /* Copy DF, reset fragment offset and MF */ 855 /* Copy DF, reset fragment offset and MF */
856 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; 856 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
857 857
858 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { 858 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
859 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 859 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
860 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 860 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
861 goto tx_error_put; 861 goto tx_error_put;
862 } 862 }
863 863
864 /* 864 /*
865 * Okay, now see if we can stuff it in the buffer as-is. 865 * Okay, now see if we can stuff it in the buffer as-is.
866 */ 866 */
867 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 867 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
868 868
869 if (skb_headroom(skb) < max_headroom 869 if (skb_headroom(skb) < max_headroom
870 || skb_cloned(skb) || skb_shared(skb)) { 870 || skb_cloned(skb) || skb_shared(skb)) {
871 struct sk_buff *new_skb = 871 struct sk_buff *new_skb =
872 skb_realloc_headroom(skb, max_headroom); 872 skb_realloc_headroom(skb, max_headroom);
873 if (!new_skb) { 873 if (!new_skb) {
874 ip_rt_put(rt); 874 ip_rt_put(rt);
875 kfree_skb(skb); 875 kfree_skb(skb);
876 IP_VS_ERR_RL("%s(): no memory\n", __func__); 876 IP_VS_ERR_RL("%s(): no memory\n", __func__);
877 return NF_STOLEN; 877 return NF_STOLEN;
878 } 878 }
879 consume_skb(skb); 879 consume_skb(skb);
880 skb = new_skb; 880 skb = new_skb;
881 old_iph = ip_hdr(skb); 881 old_iph = ip_hdr(skb);
882 } 882 }
883 883
884 skb->transport_header = skb->network_header; 884 skb->transport_header = skb->network_header;
885 885
886 /* fix old IP header checksum */ 886 /* fix old IP header checksum */
887 ip_send_check(old_iph); 887 ip_send_check(old_iph);
888 888
889 skb_push(skb, sizeof(struct iphdr)); 889 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb); 890 skb_reset_network_header(skb);
891 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 891 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
892 892
893 /* drop old route */ 893 /* drop old route */
894 skb_dst_drop(skb); 894 skb_dst_drop(skb);
895 skb_dst_set(skb, &rt->dst); 895 skb_dst_set(skb, &rt->dst);
896 896
897 /* 897 /*
898 * Push down and install the IPIP header. 898 * Push down and install the IPIP header.
899 */ 899 */
900 iph = ip_hdr(skb); 900 iph = ip_hdr(skb);
901 iph->version = 4; 901 iph->version = 4;
902 iph->ihl = sizeof(struct iphdr)>>2; 902 iph->ihl = sizeof(struct iphdr)>>2;
903 iph->frag_off = df; 903 iph->frag_off = df;
904 iph->protocol = IPPROTO_IPIP; 904 iph->protocol = IPPROTO_IPIP;
905 iph->tos = tos; 905 iph->tos = tos;
906 iph->daddr = cp->daddr.ip; 906 iph->daddr = cp->daddr.ip;
907 iph->saddr = saddr; 907 iph->saddr = saddr;
908 iph->ttl = old_iph->ttl; 908 iph->ttl = old_iph->ttl;
909 ip_select_ident(iph, &rt->dst, NULL); 909 ip_select_ident(iph, &rt->dst, NULL);
910 910
911 /* Another hack: avoid icmp_send in ip_fragment */ 911 /* Another hack: avoid icmp_send in ip_fragment */
912 skb->local_df = 1; 912 skb->local_df = 1;
913 913
914 ret = IP_VS_XMIT_TUNNEL(skb, cp); 914 ret = IP_VS_XMIT_TUNNEL(skb, cp);
915 if (ret == NF_ACCEPT) 915 if (ret == NF_ACCEPT)
916 ip_local_out(skb); 916 ip_local_out(skb);
917 else if (ret == NF_DROP) 917 else if (ret == NF_DROP)
918 kfree_skb(skb); 918 kfree_skb(skb);
919 919
920 LeaveFunction(10); 920 LeaveFunction(10);
921 921
922 return NF_STOLEN; 922 return NF_STOLEN;
923 923
924 tx_error_icmp: 924 tx_error_icmp:
925 dst_link_failure(skb); 925 dst_link_failure(skb);
926 tx_error: 926 tx_error:
927 kfree_skb(skb); 927 kfree_skb(skb);
928 LeaveFunction(10); 928 LeaveFunction(10);
929 return NF_STOLEN; 929 return NF_STOLEN;
930 tx_error_put: 930 tx_error_put:
931 ip_rt_put(rt); 931 ip_rt_put(rt);
932 goto tx_error; 932 goto tx_error;
933 } 933 }
934 934
935 #ifdef CONFIG_IP_VS_IPV6 935 #ifdef CONFIG_IP_VS_IPV6
936 int 936 int
937 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 937 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
938 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 938 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
939 { 939 {
940 struct rt6_info *rt; /* Route to the other host */ 940 struct rt6_info *rt; /* Route to the other host */
941 struct in6_addr saddr; /* Source for tunnel */ 941 struct in6_addr saddr; /* Source for tunnel */
942 struct net_device *tdev; /* Device to other host */ 942 struct net_device *tdev; /* Device to other host */
943 struct ipv6hdr *old_iph = ipv6_hdr(skb); 943 struct ipv6hdr *old_iph = ipv6_hdr(skb);
944 struct ipv6hdr *iph; /* Our new IP header */ 944 struct ipv6hdr *iph; /* Our new IP header */
945 unsigned int max_headroom; /* The extra header space needed */ 945 unsigned int max_headroom; /* The extra header space needed */
946 int mtu; 946 int mtu;
947 int ret; 947 int ret;
948 948
949 EnterFunction(10); 949 EnterFunction(10);
950 950
951 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, 951 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
952 &saddr, 1, (IP_VS_RT_MODE_LOCAL | 952 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
953 IP_VS_RT_MODE_NON_LOCAL)))) 953 IP_VS_RT_MODE_NON_LOCAL))))
954 goto tx_error_icmp; 954 goto tx_error_icmp;
955 if (__ip_vs_is_local_route6(rt)) { 955 if (__ip_vs_is_local_route6(rt)) {
956 dst_release(&rt->dst); 956 dst_release(&rt->dst);
957 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); 957 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
958 } 958 }
959 959
960 tdev = rt->dst.dev; 960 tdev = rt->dst.dev;
961 961
962 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 962 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
963 if (mtu < IPV6_MIN_MTU) { 963 if (mtu < IPV6_MIN_MTU) {
964 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, 964 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
965 IPV6_MIN_MTU); 965 IPV6_MIN_MTU);
966 goto tx_error_put; 966 goto tx_error_put;
967 } 967 }
968 if (skb_dst(skb)) 968 if (skb_dst(skb))
969 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 969 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
970 970
971 /* MTU checking: Notice that 'mtu' have been adjusted before hand */ 971 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
972 if (__mtu_check_toobig_v6(skb, mtu)) { 972 if (__mtu_check_toobig_v6(skb, mtu)) {
973 if (!skb->dev) { 973 if (!skb->dev) {
974 struct net *net = dev_net(skb_dst(skb)->dev); 974 struct net *net = dev_net(skb_dst(skb)->dev);
975 975
976 skb->dev = net->loopback_dev; 976 skb->dev = net->loopback_dev;
977 } 977 }
978 /* only send ICMP too big on first fragment */ 978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs) 979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
981 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
982 goto tx_error_put; 982 goto tx_error_put;
983 } 983 }
984 984
985 /* 985 /*
986 * Okay, now see if we can stuff it in the buffer as-is. 986 * Okay, now see if we can stuff it in the buffer as-is.
987 */ 987 */
988 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 988 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
989 989
990 if (skb_headroom(skb) < max_headroom 990 if (skb_headroom(skb) < max_headroom
991 || skb_cloned(skb) || skb_shared(skb)) { 991 || skb_cloned(skb) || skb_shared(skb)) {
992 struct sk_buff *new_skb = 992 struct sk_buff *new_skb =
993 skb_realloc_headroom(skb, max_headroom); 993 skb_realloc_headroom(skb, max_headroom);
994 if (!new_skb) { 994 if (!new_skb) {
995 dst_release(&rt->dst); 995 dst_release(&rt->dst);
996 kfree_skb(skb); 996 kfree_skb(skb);
997 IP_VS_ERR_RL("%s(): no memory\n", __func__); 997 IP_VS_ERR_RL("%s(): no memory\n", __func__);
998 return NF_STOLEN; 998 return NF_STOLEN;
999 } 999 }
1000 consume_skb(skb); 1000 consume_skb(skb);
1001 skb = new_skb; 1001 skb = new_skb;
1002 old_iph = ipv6_hdr(skb); 1002 old_iph = ipv6_hdr(skb);
1003 } 1003 }
1004 1004
1005 skb->transport_header = skb->network_header; 1005 skb->transport_header = skb->network_header;
1006 1006
1007 skb_push(skb, sizeof(struct ipv6hdr)); 1007 skb_push(skb, sizeof(struct ipv6hdr));
1008 skb_reset_network_header(skb); 1008 skb_reset_network_header(skb);
1009 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1009 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1010 1010
1011 /* drop old route */ 1011 /* drop old route */
1012 skb_dst_drop(skb); 1012 skb_dst_drop(skb);
1013 skb_dst_set(skb, &rt->dst); 1013 skb_dst_set(skb, &rt->dst);
1014 1014
1015 /* 1015 /*
1016 * Push down and install the IPIP header. 1016 * Push down and install the IPIP header.
1017 */ 1017 */
1018 iph = ipv6_hdr(skb); 1018 iph = ipv6_hdr(skb);
1019 iph->version = 6; 1019 iph->version = 6;
1020 iph->nexthdr = IPPROTO_IPV6; 1020 iph->nexthdr = IPPROTO_IPV6;
1021 iph->payload_len = old_iph->payload_len; 1021 iph->payload_len = old_iph->payload_len;
1022 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 1022 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
1023 iph->priority = old_iph->priority; 1023 iph->priority = old_iph->priority;
1024 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 1024 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
1025 iph->daddr = cp->daddr.in6; 1025 iph->daddr = cp->daddr.in6;
1026 iph->saddr = saddr; 1026 iph->saddr = saddr;
1027 iph->hop_limit = old_iph->hop_limit; 1027 iph->hop_limit = old_iph->hop_limit;
1028 1028
1029 /* Another hack: avoid icmp_send in ip_fragment */ 1029 /* Another hack: avoid icmp_send in ip_fragment */
1030 skb->local_df = 1; 1030 skb->local_df = 1;
1031 1031
1032 ret = IP_VS_XMIT_TUNNEL(skb, cp); 1032 ret = IP_VS_XMIT_TUNNEL(skb, cp);
1033 if (ret == NF_ACCEPT) 1033 if (ret == NF_ACCEPT)
1034 ip6_local_out(skb); 1034 ip6_local_out(skb);
1035 else if (ret == NF_DROP) 1035 else if (ret == NF_DROP)
1036 kfree_skb(skb); 1036 kfree_skb(skb);
1037 1037
1038 LeaveFunction(10); 1038 LeaveFunction(10);
1039 1039
1040 return NF_STOLEN; 1040 return NF_STOLEN;
1041 1041
1042 tx_error_icmp: 1042 tx_error_icmp:
1043 dst_link_failure(skb); 1043 dst_link_failure(skb);
1044 tx_error: 1044 tx_error:
1045 kfree_skb(skb); 1045 kfree_skb(skb);
1046 LeaveFunction(10); 1046 LeaveFunction(10);
1047 return NF_STOLEN; 1047 return NF_STOLEN;
1048 tx_error_put: 1048 tx_error_put:
1049 dst_release(&rt->dst); 1049 dst_release(&rt->dst);
1050 goto tx_error; 1050 goto tx_error;
1051 } 1051 }
1052 #endif 1052 #endif
1053 1053
1054 1054
1055 /* 1055 /*
1056 * Direct Routing transmitter 1056 * Direct Routing transmitter
1057 * Used for ANY protocol 1057 * Used for ANY protocol
1058 */ 1058 */
1059 int 1059 int
1060 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1060 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1062 { 1062 {
1063 struct rtable *rt; /* Route to the other host */ 1063 struct rtable *rt; /* Route to the other host */
1064 struct iphdr *iph = ip_hdr(skb); 1064 struct iphdr *iph = ip_hdr(skb);
1065 int mtu; 1065 int mtu;
1066 1066
1067 EnterFunction(10); 1067 EnterFunction(10);
1068 1068
1069 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1069 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1070 RT_TOS(iph->tos), 1070 RT_TOS(iph->tos),
1071 IP_VS_RT_MODE_LOCAL | 1071 IP_VS_RT_MODE_LOCAL |
1072 IP_VS_RT_MODE_NON_LOCAL | 1072 IP_VS_RT_MODE_NON_LOCAL |
1073 IP_VS_RT_MODE_KNOWN_NH, NULL))) 1073 IP_VS_RT_MODE_KNOWN_NH, NULL)))
1074 goto tx_error_icmp; 1074 goto tx_error_icmp;
1075 if (rt->rt_flags & RTCF_LOCAL) { 1075 if (rt->rt_flags & RTCF_LOCAL) {
1076 ip_rt_put(rt); 1076 ip_rt_put(rt);
1077 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 1077 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1078 } 1078 }
1079 1079
1080 /* MTU checking */ 1080 /* MTU checking */
1081 mtu = dst_mtu(&rt->dst); 1081 mtu = dst_mtu(&rt->dst);
1082 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && 1082 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1083 !skb_is_gso(skb)) { 1083 !skb_is_gso(skb)) {
1084 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 1084 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1085 ip_rt_put(rt); 1085 ip_rt_put(rt);
1086 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1086 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1087 goto tx_error; 1087 goto tx_error;
1088 } 1088 }
1089 1089
1090 /* 1090 /*
1091 * Call ip_send_check because we are not sure it is called 1091 * Call ip_send_check because we are not sure it is called
1092 * after ip_defrag. Is copy-on-write needed? 1092 * after ip_defrag. Is copy-on-write needed?
1093 */ 1093 */
1094 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { 1094 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1095 ip_rt_put(rt); 1095 ip_rt_put(rt);
1096 return NF_STOLEN; 1096 return NF_STOLEN;
1097 } 1097 }
1098 ip_send_check(ip_hdr(skb)); 1098 ip_send_check(ip_hdr(skb));
1099 1099
1100 /* drop old route */ 1100 /* drop old route */
1101 skb_dst_drop(skb); 1101 skb_dst_drop(skb);
1102 skb_dst_set(skb, &rt->dst); 1102 skb_dst_set(skb, &rt->dst);
1103 1103
1104 /* Another hack: avoid icmp_send in ip_fragment */ 1104 /* Another hack: avoid icmp_send in ip_fragment */
1105 skb->local_df = 1; 1105 skb->local_df = 1;
1106 1106
1107 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 1107 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1108 1108
1109 LeaveFunction(10); 1109 LeaveFunction(10);
1110 return NF_STOLEN; 1110 return NF_STOLEN;
1111 1111
1112 tx_error_icmp: 1112 tx_error_icmp:
1113 dst_link_failure(skb); 1113 dst_link_failure(skb);
1114 tx_error: 1114 tx_error:
1115 kfree_skb(skb); 1115 kfree_skb(skb);
1116 LeaveFunction(10); 1116 LeaveFunction(10);
1117 return NF_STOLEN; 1117 return NF_STOLEN;
1118 } 1118 }
1119 1119
1120 #ifdef CONFIG_IP_VS_IPV6 1120 #ifdef CONFIG_IP_VS_IPV6
1121 int 1121 int
1122 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1122 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
1124 { 1124 {
1125 struct rt6_info *rt; /* Route to the other host */ 1125 struct rt6_info *rt; /* Route to the other host */
1126 int mtu; 1126 int mtu;
1127 1127
1128 EnterFunction(10); 1128 EnterFunction(10);
1129 1129
1130 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1130 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1131 0, (IP_VS_RT_MODE_LOCAL | 1131 0, (IP_VS_RT_MODE_LOCAL |
1132 IP_VS_RT_MODE_NON_LOCAL)))) 1132 IP_VS_RT_MODE_NON_LOCAL))))
1133 goto tx_error_icmp; 1133 goto tx_error_icmp;
1134 if (__ip_vs_is_local_route6(rt)) { 1134 if (__ip_vs_is_local_route6(rt)) {
1135 dst_release(&rt->dst); 1135 dst_release(&rt->dst);
1136 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); 1136 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1137 } 1137 }
1138 1138
1139 /* MTU checking */ 1139 /* MTU checking */
1140 mtu = dst_mtu(&rt->dst); 1140 mtu = dst_mtu(&rt->dst);
1141 if (__mtu_check_toobig_v6(skb, mtu)) { 1141 if (__mtu_check_toobig_v6(skb, mtu)) {
1142 if (!skb->dev) { 1142 if (!skb->dev) {
1143 struct net *net = dev_net(skb_dst(skb)->dev); 1143 struct net *net = dev_net(skb_dst(skb)->dev);
1144 1144
1145 skb->dev = net->loopback_dev; 1145 skb->dev = net->loopback_dev;
1146 } 1146 }
1147 /* only send ICMP too big on first fragment */ 1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs) 1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1150 dst_release(&rt->dst); 1150 dst_release(&rt->dst);
1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1152 goto tx_error; 1152 goto tx_error;
1153 } 1153 }
1154 1154
1155 /* 1155 /*
1156 * Call ip_send_check because we are not sure it is called 1156 * Call ip_send_check because we are not sure it is called
1157 * after ip_defrag. Is copy-on-write needed? 1157 * after ip_defrag. Is copy-on-write needed?
1158 */ 1158 */
1159 skb = skb_share_check(skb, GFP_ATOMIC); 1159 skb = skb_share_check(skb, GFP_ATOMIC);
1160 if (unlikely(skb == NULL)) { 1160 if (unlikely(skb == NULL)) {
1161 dst_release(&rt->dst); 1161 dst_release(&rt->dst);
1162 return NF_STOLEN; 1162 return NF_STOLEN;
1163 } 1163 }
1164 1164
1165 /* drop old route */ 1165 /* drop old route */
1166 skb_dst_drop(skb); 1166 skb_dst_drop(skb);
1167 skb_dst_set(skb, &rt->dst); 1167 skb_dst_set(skb, &rt->dst);
1168 1168
1169 /* Another hack: avoid icmp_send in ip_fragment */ 1169 /* Another hack: avoid icmp_send in ip_fragment */
1170 skb->local_df = 1; 1170 skb->local_df = 1;
1171 1171
1172 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 1172 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
1173 1173
1174 LeaveFunction(10); 1174 LeaveFunction(10);
1175 return NF_STOLEN; 1175 return NF_STOLEN;
1176 1176
1177 tx_error_icmp: 1177 tx_error_icmp:
1178 dst_link_failure(skb); 1178 dst_link_failure(skb);
1179 tx_error: 1179 tx_error:
1180 kfree_skb(skb); 1180 kfree_skb(skb);
1181 LeaveFunction(10); 1181 LeaveFunction(10);
1182 return NF_STOLEN; 1182 return NF_STOLEN;
1183 } 1183 }
1184 #endif 1184 #endif
1185 1185
1186 1186
1187 /* 1187 /*
1188 * ICMP packet transmitter 1188 * ICMP packet transmitter
1189 * called by the ip_vs_in_icmp 1189 * called by the ip_vs_in_icmp
1190 */ 1190 */
1191 int 1191 int
1192 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1192 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1193 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1193 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1194 struct ip_vs_iphdr *iph) 1194 struct ip_vs_iphdr *iph)
1195 { 1195 {
1196 struct rtable *rt; /* Route to the other host */ 1196 struct rtable *rt; /* Route to the other host */
1197 int mtu; 1197 int mtu;
1198 int rc; 1198 int rc;
1199 int local; 1199 int local;
1200 int rt_mode; 1200 int rt_mode;
1201 1201
1202 EnterFunction(10); 1202 EnterFunction(10);
1203 1203
1204 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1204 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1205 forwarded directly here, because there is no need to 1205 forwarded directly here, because there is no need to
1206 translate address/port back */ 1206 translate address/port back */
1207 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1207 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1208 if (cp->packet_xmit) 1208 if (cp->packet_xmit)
1209 rc = cp->packet_xmit(skb, cp, pp, iph); 1209 rc = cp->packet_xmit(skb, cp, pp, iph);
1210 else 1210 else
1211 rc = NF_ACCEPT; 1211 rc = NF_ACCEPT;
1212 /* do not touch skb anymore */ 1212 /* do not touch skb anymore */
1213 atomic_inc(&cp->in_pkts); 1213 atomic_inc(&cp->in_pkts);
1214 goto out; 1214 goto out;
1215 } 1215 }
1216 1216
1217 /* 1217 /*
1218 * mangle and send the packet here (only for VS/NAT) 1218 * mangle and send the packet here (only for VS/NAT)
1219 */ 1219 */
1220 1220
1221 /* LOCALNODE from FORWARD hook is not supported */ 1221 /* LOCALNODE from FORWARD hook is not supported */
1222 rt_mode = (hooknum != NF_INET_FORWARD) ? 1222 rt_mode = (hooknum != NF_INET_FORWARD) ?
1223 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1223 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1224 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1224 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1225 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1225 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1226 RT_TOS(ip_hdr(skb)->tos), 1226 RT_TOS(ip_hdr(skb)->tos),
1227 rt_mode, NULL))) 1227 rt_mode, NULL)))
1228 goto tx_error_icmp; 1228 goto tx_error_icmp;
1229 local = rt->rt_flags & RTCF_LOCAL; 1229 local = rt->rt_flags & RTCF_LOCAL;
1230 1230
1231 /* 1231 /*
1232 * Avoid duplicate tuple in reply direction for NAT traffic 1232 * Avoid duplicate tuple in reply direction for NAT traffic
1233 * to local address when connection is sync-ed 1233 * to local address when connection is sync-ed
1234 */ 1234 */
1235 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1235 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1236 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1236 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1237 enum ip_conntrack_info ctinfo; 1237 enum ip_conntrack_info ctinfo;
1238 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 1238 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1239 1239
1240 if (ct && !nf_ct_is_untracked(ct)) { 1240 if (ct && !nf_ct_is_untracked(ct)) {
1241 IP_VS_DBG(10, "%s(): " 1241 IP_VS_DBG(10, "%s(): "
1242 "stopping DNAT to local address %pI4\n", 1242 "stopping DNAT to local address %pI4\n",
1243 __func__, &cp->daddr.ip); 1243 __func__, &cp->daddr.ip);
1244 goto tx_error_put; 1244 goto tx_error_put;
1245 } 1245 }
1246 } 1246 }
1247 #endif 1247 #endif
1248 1248
1249 /* From world but DNAT to loopback address? */ 1249 /* From world but DNAT to loopback address? */
1250 if (local && ipv4_is_loopback(cp->daddr.ip) && 1250 if (local && ipv4_is_loopback(cp->daddr.ip) &&
1251 rt_is_input_route(skb_rtable(skb))) { 1251 rt_is_input_route(skb_rtable(skb))) {
1252 IP_VS_DBG(1, "%s(): " 1252 IP_VS_DBG(1, "%s(): "
1253 "stopping DNAT to loopback %pI4\n", 1253 "stopping DNAT to loopback %pI4\n",
1254 __func__, &cp->daddr.ip); 1254 __func__, &cp->daddr.ip);
1255 goto tx_error_put; 1255 goto tx_error_put;
1256 } 1256 }
1257 1257
1258 /* MTU checking */ 1258 /* MTU checking */
1259 mtu = dst_mtu(&rt->dst); 1259 mtu = dst_mtu(&rt->dst);
1260 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && 1260 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1261 !skb_is_gso(skb)) { 1261 !skb_is_gso(skb)) {
1262 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1262 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1263 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1263 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1264 goto tx_error_put; 1264 goto tx_error_put;
1265 } 1265 }
1266 1266
1267 /* copy-on-write the packet before mangling it */ 1267 /* copy-on-write the packet before mangling it */
1268 if (!skb_make_writable(skb, offset)) 1268 if (!skb_make_writable(skb, offset))
1269 goto tx_error_put; 1269 goto tx_error_put;
1270 1270
1271 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1271 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1272 goto tx_error_put; 1272 goto tx_error_put;
1273 1273
1274 ip_vs_nat_icmp(skb, pp, cp, 0); 1274 ip_vs_nat_icmp(skb, pp, cp, 0);
1275 1275
1276 if (!local) { 1276 if (!local) {
1277 /* drop the old route when skb is not shared */ 1277 /* drop the old route when skb is not shared */
1278 skb_dst_drop(skb); 1278 skb_dst_drop(skb);
1279 skb_dst_set(skb, &rt->dst); 1279 skb_dst_set(skb, &rt->dst);
1280 } else { 1280 } else {
1281 ip_rt_put(rt); 1281 ip_rt_put(rt);
1282 /* 1282 /*
1283 * Some IPv4 replies get local address from routes, 1283 * Some IPv4 replies get local address from routes,
1284 * not from iph, so while we DNAT after routing 1284 * not from iph, so while we DNAT after routing
1285 * we need this second input/output route. 1285 * we need this second input/output route.
1286 */ 1286 */
1287 if (!__ip_vs_reroute_locally(skb)) 1287 if (!__ip_vs_reroute_locally(skb))
1288 goto tx_error; 1288 goto tx_error;
1289 } 1289 }
1290 1290
1291 /* Another hack: avoid icmp_send in ip_fragment */ 1291 /* Another hack: avoid icmp_send in ip_fragment */
1292 skb->local_df = 1; 1292 skb->local_df = 1;
1293 1293
1294 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 1294 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1295 1295
1296 rc = NF_STOLEN; 1296 rc = NF_STOLEN;
1297 goto out; 1297 goto out;
1298 1298
1299 tx_error_icmp: 1299 tx_error_icmp:
1300 dst_link_failure(skb); 1300 dst_link_failure(skb);
1301 tx_error: 1301 tx_error:
1302 dev_kfree_skb(skb); 1302 dev_kfree_skb(skb);
1303 rc = NF_STOLEN; 1303 rc = NF_STOLEN;
1304 out: 1304 out:
1305 LeaveFunction(10); 1305 LeaveFunction(10);
1306 return rc; 1306 return rc;
1307 tx_error_put: 1307 tx_error_put:
1308 ip_rt_put(rt); 1308 ip_rt_put(rt);
1309 goto tx_error; 1309 goto tx_error;
1310 } 1310 }
1311 1311
1312 #ifdef CONFIG_IP_VS_IPV6 1312 #ifdef CONFIG_IP_VS_IPV6
1313 int 1313 int
1314 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1314 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph) 1316 struct ip_vs_iphdr *iph)
1317 { 1317 {
1318 struct rt6_info *rt; /* Route to the other host */ 1318 struct rt6_info *rt; /* Route to the other host */
1319 int mtu; 1319 int mtu;
1320 int rc; 1320 int rc;
1321 int local; 1321 int local;
1322 int rt_mode; 1322 int rt_mode;
1323 1323
1324 EnterFunction(10); 1324 EnterFunction(10);
1325 1325
1326 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be 1326 /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1327 forwarded directly here, because there is no need to 1327 forwarded directly here, because there is no need to
1328 translate address/port back */ 1328 translate address/port back */
1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1330 if (cp->packet_xmit) 1330 if (cp->packet_xmit)
1331 rc = cp->packet_xmit(skb, cp, pp, iph); 1331 rc = cp->packet_xmit(skb, cp, pp, iph);
1332 else 1332 else
1333 rc = NF_ACCEPT; 1333 rc = NF_ACCEPT;
1334 /* do not touch skb anymore */ 1334 /* do not touch skb anymore */
1335 atomic_inc(&cp->in_pkts); 1335 atomic_inc(&cp->in_pkts);
1336 goto out; 1336 goto out;
1337 } 1337 }
1338 1338
1339 /* 1339 /*
1340 * mangle and send the packet here (only for VS/NAT) 1340 * mangle and send the packet here (only for VS/NAT)
1341 */ 1341 */
1342 1342
1343 /* LOCALNODE from FORWARD hook is not supported */ 1343 /* LOCALNODE from FORWARD hook is not supported */
1344 rt_mode = (hooknum != NF_INET_FORWARD) ? 1344 rt_mode = (hooknum != NF_INET_FORWARD) ?
1345 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1345 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1346 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1346 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1347 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1347 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1348 0, rt_mode))) 1348 0, rt_mode)))
1349 goto tx_error_icmp; 1349 goto tx_error_icmp;
1350 1350
1351 local = __ip_vs_is_local_route6(rt); 1351 local = __ip_vs_is_local_route6(rt);
1352 /* 1352 /*
1353 * Avoid duplicate tuple in reply direction for NAT traffic 1353 * Avoid duplicate tuple in reply direction for NAT traffic
1354 * to local address when connection is sync-ed 1354 * to local address when connection is sync-ed
1355 */ 1355 */
1356 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 1356 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1357 if (cp->flags & IP_VS_CONN_F_SYNC && local) { 1357 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1358 enum ip_conntrack_info ctinfo; 1358 enum ip_conntrack_info ctinfo;
1359 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); 1359 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1360 1360
1361 if (ct && !nf_ct_is_untracked(ct)) { 1361 if (ct && !nf_ct_is_untracked(ct)) {
1362 IP_VS_DBG(10, "%s(): " 1362 IP_VS_DBG(10, "%s(): "
1363 "stopping DNAT to local address %pI6\n", 1363 "stopping DNAT to local address %pI6\n",
1364 __func__, &cp->daddr.in6); 1364 __func__, &cp->daddr.in6);
1365 goto tx_error_put; 1365 goto tx_error_put;
1366 } 1366 }
1367 } 1367 }
1368 #endif 1368 #endif
1369 1369
1370 /* From world but DNAT to loopback address? */ 1370 /* From world but DNAT to loopback address? */
1371 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && 1371 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1372 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { 1372 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1373 IP_VS_DBG(1, "%s(): " 1373 IP_VS_DBG(1, "%s(): "
1374 "stopping DNAT to loopback %pI6\n", 1374 "stopping DNAT to loopback %pI6\n",
1375 __func__, &cp->daddr.in6); 1375 __func__, &cp->daddr.in6);
1376 goto tx_error_put; 1376 goto tx_error_put;
1377 } 1377 }
1378 1378
1379 /* MTU checking */ 1379 /* MTU checking */
1380 mtu = dst_mtu(&rt->dst); 1380 mtu = dst_mtu(&rt->dst);
1381 if (__mtu_check_toobig_v6(skb, mtu)) { 1381 if (__mtu_check_toobig_v6(skb, mtu)) {
1382 if (!skb->dev) { 1382 if (!skb->dev) {
1383 struct net *net = dev_net(skb_dst(skb)->dev); 1383 struct net *net = dev_net(skb_dst(skb)->dev);
1384 1384
1385 skb->dev = net->loopback_dev; 1385 skb->dev = net->loopback_dev;
1386 } 1386 }
1387 /* only send ICMP too big on first fragment */ 1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs) 1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1391 goto tx_error_put; 1391 goto tx_error_put;
1392 } 1392 }
1393 1393
1394 /* copy-on-write the packet before mangling it */ 1394 /* copy-on-write the packet before mangling it */
1395 if (!skb_make_writable(skb, offset)) 1395 if (!skb_make_writable(skb, offset))
1396 goto tx_error_put; 1396 goto tx_error_put;
1397 1397
1398 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1398 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1399 goto tx_error_put; 1399 goto tx_error_put;
1400 1400
1401 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1401 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1402 1402
1403 if (!local || !skb->dev) { 1403 if (!local || !skb->dev) {
1404 /* drop the old route when skb is not shared */ 1404 /* drop the old route when skb is not shared */
1405 skb_dst_drop(skb); 1405 skb_dst_drop(skb);
1406 skb_dst_set(skb, &rt->dst); 1406 skb_dst_set(skb, &rt->dst);
1407 } else { 1407 } else {
1408 /* destined to loopback, do we need to change route? */ 1408 /* destined to loopback, do we need to change route? */
1409 dst_release(&rt->dst); 1409 dst_release(&rt->dst);
1410 } 1410 }
1411 1411
1412 /* Another hack: avoid icmp_send in ip_fragment */ 1412 /* Another hack: avoid icmp_send in ip_fragment */
1413 skb->local_df = 1; 1413 skb->local_df = 1;
1414 1414
1415 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 1415 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1416 1416
1417 rc = NF_STOLEN; 1417 rc = NF_STOLEN;
1418 goto out; 1418 goto out;
1419 1419
1420 tx_error_icmp: 1420 tx_error_icmp:
1421 dst_link_failure(skb); 1421 dst_link_failure(skb);
1422 tx_error: 1422 tx_error:
1423 dev_kfree_skb(skb); 1423 dev_kfree_skb(skb);
1424 rc = NF_STOLEN; 1424 rc = NF_STOLEN;
1425 out: 1425 out:
1426 LeaveFunction(10); 1426 LeaveFunction(10);
1427 return rc; 1427 return rc;
1428 tx_error_put: 1428 tx_error_put:
1429 dst_release(&rt->dst); 1429 dst_release(&rt->dst);
1430 goto tx_error; 1430 goto tx_error;
1431 } 1431 }
1432 #endif 1432 #endif
1433 1433