Commit 545b29019c8959c805abfe8194d47e989f1a6e5f
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge branch 'master' of git://1984.lsi.us.es/nf-next
Conflicts: net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c Minor conflict due to some IS_ENABLED conversions done in net-next. Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 7 changed files Inline Diff
include/uapi/linux/in6.h
1 | /* | 1 | /* |
2 | * Types and definitions for AF_INET6 | 2 | * Types and definitions for AF_INET6 |
3 | * Linux INET6 implementation | 3 | * Linux INET6 implementation |
4 | * | 4 | * |
5 | * Authors: | 5 | * Authors: |
6 | * Pedro Roque <roque@di.fc.ul.pt> | 6 | * Pedro Roque <roque@di.fc.ul.pt> |
7 | * | 7 | * |
8 | * Sources: | 8 | * Sources: |
9 | * IPv6 Program Interfaces for BSD Systems | 9 | * IPv6 Program Interfaces for BSD Systems |
10 | * <draft-ietf-ipngwg-bsd-api-05.txt> | 10 | * <draft-ietf-ipngwg-bsd-api-05.txt> |
11 | * | 11 | * |
12 | * Advanced Sockets API for IPv6 | 12 | * Advanced Sockets API for IPv6 |
13 | * <draft-stevens-advanced-api-00.txt> | 13 | * <draft-stevens-advanced-api-00.txt> |
14 | * | 14 | * |
15 | * This program is free software; you can redistribute it and/or | 15 | * This program is free software; you can redistribute it and/or |
16 | * modify it under the terms of the GNU General Public License | 16 | * modify it under the terms of the GNU General Public License |
17 | * as published by the Free Software Foundation; either version | 17 | * as published by the Free Software Foundation; either version |
18 | * 2 of the License, or (at your option) any later version. | 18 | * 2 of the License, or (at your option) any later version. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #ifndef _UAPI_LINUX_IN6_H | 21 | #ifndef _UAPI_LINUX_IN6_H |
22 | #define _UAPI_LINUX_IN6_H | 22 | #define _UAPI_LINUX_IN6_H |
23 | 23 | ||
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * IPv6 address structure | 27 | * IPv6 address structure |
28 | */ | 28 | */ |
29 | 29 | ||
30 | struct in6_addr { | 30 | struct in6_addr { |
31 | union { | 31 | union { |
32 | __u8 u6_addr8[16]; | 32 | __u8 u6_addr8[16]; |
33 | __be16 u6_addr16[8]; | 33 | __be16 u6_addr16[8]; |
34 | __be32 u6_addr32[4]; | 34 | __be32 u6_addr32[4]; |
35 | } in6_u; | 35 | } in6_u; |
36 | #define s6_addr in6_u.u6_addr8 | 36 | #define s6_addr in6_u.u6_addr8 |
37 | #define s6_addr16 in6_u.u6_addr16 | 37 | #define s6_addr16 in6_u.u6_addr16 |
38 | #define s6_addr32 in6_u.u6_addr32 | 38 | #define s6_addr32 in6_u.u6_addr32 |
39 | }; | 39 | }; |
40 | 40 | ||
41 | /* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 | 41 | /* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553 |
42 | * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined | 42 | * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined |
43 | * in network byte order, not in host byte order as are the IPv4 equivalents | 43 | * in network byte order, not in host byte order as are the IPv4 equivalents |
44 | */ | 44 | */ |
45 | 45 | ||
46 | struct sockaddr_in6 { | 46 | struct sockaddr_in6 { |
47 | unsigned short int sin6_family; /* AF_INET6 */ | 47 | unsigned short int sin6_family; /* AF_INET6 */ |
48 | __be16 sin6_port; /* Transport layer port # */ | 48 | __be16 sin6_port; /* Transport layer port # */ |
49 | __be32 sin6_flowinfo; /* IPv6 flow information */ | 49 | __be32 sin6_flowinfo; /* IPv6 flow information */ |
50 | struct in6_addr sin6_addr; /* IPv6 address */ | 50 | struct in6_addr sin6_addr; /* IPv6 address */ |
51 | __u32 sin6_scope_id; /* scope id (new in RFC2553) */ | 51 | __u32 sin6_scope_id; /* scope id (new in RFC2553) */ |
52 | }; | 52 | }; |
53 | 53 | ||
54 | struct ipv6_mreq { | 54 | struct ipv6_mreq { |
55 | /* IPv6 multicast address of group */ | 55 | /* IPv6 multicast address of group */ |
56 | struct in6_addr ipv6mr_multiaddr; | 56 | struct in6_addr ipv6mr_multiaddr; |
57 | 57 | ||
58 | /* local IPv6 address of interface */ | 58 | /* local IPv6 address of interface */ |
59 | int ipv6mr_ifindex; | 59 | int ipv6mr_ifindex; |
60 | }; | 60 | }; |
61 | 61 | ||
62 | #define ipv6mr_acaddr ipv6mr_multiaddr | 62 | #define ipv6mr_acaddr ipv6mr_multiaddr |
63 | 63 | ||
64 | struct in6_flowlabel_req { | 64 | struct in6_flowlabel_req { |
65 | struct in6_addr flr_dst; | 65 | struct in6_addr flr_dst; |
66 | __be32 flr_label; | 66 | __be32 flr_label; |
67 | __u8 flr_action; | 67 | __u8 flr_action; |
68 | __u8 flr_share; | 68 | __u8 flr_share; |
69 | __u16 flr_flags; | 69 | __u16 flr_flags; |
70 | __u16 flr_expires; | 70 | __u16 flr_expires; |
71 | __u16 flr_linger; | 71 | __u16 flr_linger; |
72 | __u32 __flr_pad; | 72 | __u32 __flr_pad; |
73 | /* Options in format of IPV6_PKTOPTIONS */ | 73 | /* Options in format of IPV6_PKTOPTIONS */ |
74 | }; | 74 | }; |
75 | 75 | ||
76 | #define IPV6_FL_A_GET 0 | 76 | #define IPV6_FL_A_GET 0 |
77 | #define IPV6_FL_A_PUT 1 | 77 | #define IPV6_FL_A_PUT 1 |
78 | #define IPV6_FL_A_RENEW 2 | 78 | #define IPV6_FL_A_RENEW 2 |
79 | 79 | ||
80 | #define IPV6_FL_F_CREATE 1 | 80 | #define IPV6_FL_F_CREATE 1 |
81 | #define IPV6_FL_F_EXCL 2 | 81 | #define IPV6_FL_F_EXCL 2 |
82 | 82 | ||
83 | #define IPV6_FL_S_NONE 0 | 83 | #define IPV6_FL_S_NONE 0 |
84 | #define IPV6_FL_S_EXCL 1 | 84 | #define IPV6_FL_S_EXCL 1 |
85 | #define IPV6_FL_S_PROCESS 2 | 85 | #define IPV6_FL_S_PROCESS 2 |
86 | #define IPV6_FL_S_USER 3 | 86 | #define IPV6_FL_S_USER 3 |
87 | #define IPV6_FL_S_ANY 255 | 87 | #define IPV6_FL_S_ANY 255 |
88 | 88 | ||
89 | 89 | ||
90 | /* | 90 | /* |
91 | * Bitmask constant declarations to help applications select out the | 91 | * Bitmask constant declarations to help applications select out the |
92 | * flow label and priority fields. | 92 | * flow label and priority fields. |
93 | * | 93 | * |
94 | * Note that this are in host byte order while the flowinfo field of | 94 | * Note that this are in host byte order while the flowinfo field of |
95 | * sockaddr_in6 is in network byte order. | 95 | * sockaddr_in6 is in network byte order. |
96 | */ | 96 | */ |
97 | 97 | ||
98 | #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff | 98 | #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff |
99 | #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 | 99 | #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 |
100 | 100 | ||
101 | /* These definitions are obsolete */ | 101 | /* These definitions are obsolete */ |
102 | #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 | 102 | #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 |
103 | #define IPV6_PRIORITY_FILLER 0x0100 | 103 | #define IPV6_PRIORITY_FILLER 0x0100 |
104 | #define IPV6_PRIORITY_UNATTENDED 0x0200 | 104 | #define IPV6_PRIORITY_UNATTENDED 0x0200 |
105 | #define IPV6_PRIORITY_RESERVED1 0x0300 | 105 | #define IPV6_PRIORITY_RESERVED1 0x0300 |
106 | #define IPV6_PRIORITY_BULK 0x0400 | 106 | #define IPV6_PRIORITY_BULK 0x0400 |
107 | #define IPV6_PRIORITY_RESERVED2 0x0500 | 107 | #define IPV6_PRIORITY_RESERVED2 0x0500 |
108 | #define IPV6_PRIORITY_INTERACTIVE 0x0600 | 108 | #define IPV6_PRIORITY_INTERACTIVE 0x0600 |
109 | #define IPV6_PRIORITY_CONTROL 0x0700 | 109 | #define IPV6_PRIORITY_CONTROL 0x0700 |
110 | #define IPV6_PRIORITY_8 0x0800 | 110 | #define IPV6_PRIORITY_8 0x0800 |
111 | #define IPV6_PRIORITY_9 0x0900 | 111 | #define IPV6_PRIORITY_9 0x0900 |
112 | #define IPV6_PRIORITY_10 0x0a00 | 112 | #define IPV6_PRIORITY_10 0x0a00 |
113 | #define IPV6_PRIORITY_11 0x0b00 | 113 | #define IPV6_PRIORITY_11 0x0b00 |
114 | #define IPV6_PRIORITY_12 0x0c00 | 114 | #define IPV6_PRIORITY_12 0x0c00 |
115 | #define IPV6_PRIORITY_13 0x0d00 | 115 | #define IPV6_PRIORITY_13 0x0d00 |
116 | #define IPV6_PRIORITY_14 0x0e00 | 116 | #define IPV6_PRIORITY_14 0x0e00 |
117 | #define IPV6_PRIORITY_15 0x0f00 | 117 | #define IPV6_PRIORITY_15 0x0f00 |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * IPV6 extension headers | 120 | * IPV6 extension headers |
121 | */ | 121 | */ |
122 | #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */ | 122 | #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */ |
123 | #define IPPROTO_ROUTING 43 /* IPv6 routing header */ | 123 | #define IPPROTO_ROUTING 43 /* IPv6 routing header */ |
124 | #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */ | 124 | #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */ |
125 | #define IPPROTO_ICMPV6 58 /* ICMPv6 */ | 125 | #define IPPROTO_ICMPV6 58 /* ICMPv6 */ |
126 | #define IPPROTO_NONE 59 /* IPv6 no next header */ | 126 | #define IPPROTO_NONE 59 /* IPv6 no next header */ |
127 | #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ | 127 | #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ |
128 | #define IPPROTO_MH 135 /* IPv6 mobility header */ | 128 | #define IPPROTO_MH 135 /* IPv6 mobility header */ |
129 | 129 | ||
130 | /* | 130 | /* |
131 | * IPv6 TLV options. | 131 | * IPv6 TLV options. |
132 | */ | 132 | */ |
133 | #define IPV6_TLV_PAD1 0 | 133 | #define IPV6_TLV_PAD1 0 |
134 | #define IPV6_TLV_PADN 1 | 134 | #define IPV6_TLV_PADN 1 |
135 | #define IPV6_TLV_ROUTERALERT 5 | 135 | #define IPV6_TLV_ROUTERALERT 5 |
136 | #define IPV6_TLV_JUMBO 194 | 136 | #define IPV6_TLV_JUMBO 194 |
137 | #define IPV6_TLV_HAO 201 /* home address option */ | 137 | #define IPV6_TLV_HAO 201 /* home address option */ |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * IPV6 socket options | 140 | * IPV6 socket options |
141 | */ | 141 | */ |
142 | 142 | ||
143 | #define IPV6_ADDRFORM 1 | 143 | #define IPV6_ADDRFORM 1 |
144 | #define IPV6_2292PKTINFO 2 | 144 | #define IPV6_2292PKTINFO 2 |
145 | #define IPV6_2292HOPOPTS 3 | 145 | #define IPV6_2292HOPOPTS 3 |
146 | #define IPV6_2292DSTOPTS 4 | 146 | #define IPV6_2292DSTOPTS 4 |
147 | #define IPV6_2292RTHDR 5 | 147 | #define IPV6_2292RTHDR 5 |
148 | #define IPV6_2292PKTOPTIONS 6 | 148 | #define IPV6_2292PKTOPTIONS 6 |
149 | #define IPV6_CHECKSUM 7 | 149 | #define IPV6_CHECKSUM 7 |
150 | #define IPV6_2292HOPLIMIT 8 | 150 | #define IPV6_2292HOPLIMIT 8 |
151 | #define IPV6_NEXTHOP 9 | 151 | #define IPV6_NEXTHOP 9 |
152 | #define IPV6_AUTHHDR 10 /* obsolete */ | 152 | #define IPV6_AUTHHDR 10 /* obsolete */ |
153 | #define IPV6_FLOWINFO 11 | 153 | #define IPV6_FLOWINFO 11 |
154 | 154 | ||
155 | #define IPV6_UNICAST_HOPS 16 | 155 | #define IPV6_UNICAST_HOPS 16 |
156 | #define IPV6_MULTICAST_IF 17 | 156 | #define IPV6_MULTICAST_IF 17 |
157 | #define IPV6_MULTICAST_HOPS 18 | 157 | #define IPV6_MULTICAST_HOPS 18 |
158 | #define IPV6_MULTICAST_LOOP 19 | 158 | #define IPV6_MULTICAST_LOOP 19 |
159 | #define IPV6_ADD_MEMBERSHIP 20 | 159 | #define IPV6_ADD_MEMBERSHIP 20 |
160 | #define IPV6_DROP_MEMBERSHIP 21 | 160 | #define IPV6_DROP_MEMBERSHIP 21 |
161 | #define IPV6_ROUTER_ALERT 22 | 161 | #define IPV6_ROUTER_ALERT 22 |
162 | #define IPV6_MTU_DISCOVER 23 | 162 | #define IPV6_MTU_DISCOVER 23 |
163 | #define IPV6_MTU 24 | 163 | #define IPV6_MTU 24 |
164 | #define IPV6_RECVERR 25 | 164 | #define IPV6_RECVERR 25 |
165 | #define IPV6_V6ONLY 26 | 165 | #define IPV6_V6ONLY 26 |
166 | #define IPV6_JOIN_ANYCAST 27 | 166 | #define IPV6_JOIN_ANYCAST 27 |
167 | #define IPV6_LEAVE_ANYCAST 28 | 167 | #define IPV6_LEAVE_ANYCAST 28 |
168 | 168 | ||
169 | /* IPV6_MTU_DISCOVER values */ | 169 | /* IPV6_MTU_DISCOVER values */ |
170 | #define IPV6_PMTUDISC_DONT 0 | 170 | #define IPV6_PMTUDISC_DONT 0 |
171 | #define IPV6_PMTUDISC_WANT 1 | 171 | #define IPV6_PMTUDISC_WANT 1 |
172 | #define IPV6_PMTUDISC_DO 2 | 172 | #define IPV6_PMTUDISC_DO 2 |
173 | #define IPV6_PMTUDISC_PROBE 3 | 173 | #define IPV6_PMTUDISC_PROBE 3 |
174 | 174 | ||
175 | /* Flowlabel */ | 175 | /* Flowlabel */ |
176 | #define IPV6_FLOWLABEL_MGR 32 | 176 | #define IPV6_FLOWLABEL_MGR 32 |
177 | #define IPV6_FLOWINFO_SEND 33 | 177 | #define IPV6_FLOWINFO_SEND 33 |
178 | 178 | ||
179 | #define IPV6_IPSEC_POLICY 34 | 179 | #define IPV6_IPSEC_POLICY 34 |
180 | #define IPV6_XFRM_POLICY 35 | 180 | #define IPV6_XFRM_POLICY 35 |
181 | 181 | ||
182 | /* | 182 | /* |
183 | * Multicast: | 183 | * Multicast: |
184 | * Following socket options are shared between IPv4 and IPv6. | 184 | * Following socket options are shared between IPv4 and IPv6. |
185 | * | 185 | * |
186 | * MCAST_JOIN_GROUP 42 | 186 | * MCAST_JOIN_GROUP 42 |
187 | * MCAST_BLOCK_SOURCE 43 | 187 | * MCAST_BLOCK_SOURCE 43 |
188 | * MCAST_UNBLOCK_SOURCE 44 | 188 | * MCAST_UNBLOCK_SOURCE 44 |
189 | * MCAST_LEAVE_GROUP 45 | 189 | * MCAST_LEAVE_GROUP 45 |
190 | * MCAST_JOIN_SOURCE_GROUP 46 | 190 | * MCAST_JOIN_SOURCE_GROUP 46 |
191 | * MCAST_LEAVE_SOURCE_GROUP 47 | 191 | * MCAST_LEAVE_SOURCE_GROUP 47 |
192 | * MCAST_MSFILTER 48 | 192 | * MCAST_MSFILTER 48 |
193 | */ | 193 | */ |
194 | 194 | ||
195 | /* | 195 | /* |
196 | * Advanced API (RFC3542) (1) | 196 | * Advanced API (RFC3542) (1) |
197 | * | 197 | * |
198 | * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c. | 198 | * Note: IPV6_RECVRTHDRDSTOPTS does not exist. see net/ipv6/datagram.c. |
199 | */ | 199 | */ |
200 | 200 | ||
201 | #define IPV6_RECVPKTINFO 49 | 201 | #define IPV6_RECVPKTINFO 49 |
202 | #define IPV6_PKTINFO 50 | 202 | #define IPV6_PKTINFO 50 |
203 | #define IPV6_RECVHOPLIMIT 51 | 203 | #define IPV6_RECVHOPLIMIT 51 |
204 | #define IPV6_HOPLIMIT 52 | 204 | #define IPV6_HOPLIMIT 52 |
205 | #define IPV6_RECVHOPOPTS 53 | 205 | #define IPV6_RECVHOPOPTS 53 |
206 | #define IPV6_HOPOPTS 54 | 206 | #define IPV6_HOPOPTS 54 |
207 | #define IPV6_RTHDRDSTOPTS 55 | 207 | #define IPV6_RTHDRDSTOPTS 55 |
208 | #define IPV6_RECVRTHDR 56 | 208 | #define IPV6_RECVRTHDR 56 |
209 | #define IPV6_RTHDR 57 | 209 | #define IPV6_RTHDR 57 |
210 | #define IPV6_RECVDSTOPTS 58 | 210 | #define IPV6_RECVDSTOPTS 58 |
211 | #define IPV6_DSTOPTS 59 | 211 | #define IPV6_DSTOPTS 59 |
212 | #define IPV6_RECVPATHMTU 60 | 212 | #define IPV6_RECVPATHMTU 60 |
213 | #define IPV6_PATHMTU 61 | 213 | #define IPV6_PATHMTU 61 |
214 | #define IPV6_DONTFRAG 62 | 214 | #define IPV6_DONTFRAG 62 |
215 | #if 0 /* not yet */ | 215 | #if 0 /* not yet */ |
216 | #define IPV6_USE_MIN_MTU 63 | 216 | #define IPV6_USE_MIN_MTU 63 |
217 | #endif | 217 | #endif |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * Netfilter (1) | 220 | * Netfilter (1) |
221 | * | 221 | * |
222 | * Following socket options are used in ip6_tables; | 222 | * Following socket options are used in ip6_tables; |
223 | * see include/linux/netfilter_ipv6/ip6_tables.h. | 223 | * see include/linux/netfilter_ipv6/ip6_tables.h. |
224 | * | 224 | * |
225 | * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 | 225 | * IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 |
226 | * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 | 226 | * IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 |
227 | */ | 227 | */ |
228 | 228 | ||
229 | /* | 229 | /* |
230 | * Advanced API (RFC3542) (2) | 230 | * Advanced API (RFC3542) (2) |
231 | */ | 231 | */ |
232 | #define IPV6_RECVTCLASS 66 | 232 | #define IPV6_RECVTCLASS 66 |
233 | #define IPV6_TCLASS 67 | 233 | #define IPV6_TCLASS 67 |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Netfilter (2) | 236 | * Netfilter (2) |
237 | * | 237 | * |
238 | * Following socket options are used in ip6_tables; | 238 | * Following socket options are used in ip6_tables; |
239 | * see include/linux/netfilter_ipv6/ip6_tables.h. | 239 | * see include/linux/netfilter_ipv6/ip6_tables.h. |
240 | * | 240 | * |
241 | * IP6T_SO_GET_REVISION_MATCH 68 | 241 | * IP6T_SO_GET_REVISION_MATCH 68 |
242 | * IP6T_SO_GET_REVISION_TARGET 69 | 242 | * IP6T_SO_GET_REVISION_TARGET 69 |
243 | * IP6T_SO_ORIGINAL_DST 80 | ||
243 | */ | 244 | */ |
244 | 245 | ||
245 | /* RFC5014: Source address selection */ | 246 | /* RFC5014: Source address selection */ |
246 | #define IPV6_ADDR_PREFERENCES 72 | 247 | #define IPV6_ADDR_PREFERENCES 72 |
247 | 248 | ||
248 | #define IPV6_PREFER_SRC_TMP 0x0001 | 249 | #define IPV6_PREFER_SRC_TMP 0x0001 |
249 | #define IPV6_PREFER_SRC_PUBLIC 0x0002 | 250 | #define IPV6_PREFER_SRC_PUBLIC 0x0002 |
250 | #define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100 | 251 | #define IPV6_PREFER_SRC_PUBTMP_DEFAULT 0x0100 |
251 | #define IPV6_PREFER_SRC_COA 0x0004 | 252 | #define IPV6_PREFER_SRC_COA 0x0004 |
252 | #define IPV6_PREFER_SRC_HOME 0x0400 | 253 | #define IPV6_PREFER_SRC_HOME 0x0400 |
253 | #define IPV6_PREFER_SRC_CGA 0x0008 | 254 | #define IPV6_PREFER_SRC_CGA 0x0008 |
254 | #define IPV6_PREFER_SRC_NONCGA 0x0800 | 255 | #define IPV6_PREFER_SRC_NONCGA 0x0800 |
255 | 256 | ||
256 | /* RFC5082: Generalized Ttl Security Mechanism */ | 257 | /* RFC5082: Generalized Ttl Security Mechanism */ |
257 | #define IPV6_MINHOPCOUNT 73 | 258 | #define IPV6_MINHOPCOUNT 73 |
258 | 259 | ||
259 | #define IPV6_ORIGDSTADDR 74 | 260 | #define IPV6_ORIGDSTADDR 74 |
260 | #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR | 261 | #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR |
261 | #define IPV6_TRANSPARENT 75 | 262 | #define IPV6_TRANSPARENT 75 |
262 | #define IPV6_UNICAST_IF 76 | 263 | #define IPV6_UNICAST_IF 76 |
263 | 264 | ||
264 | /* | 265 | /* |
265 | * Multicast Routing: | 266 | * Multicast Routing: |
266 | * see include/linux/mroute6.h. | 267 | * see include/linux/mroute6.h. |
267 | * | 268 | * |
268 | * MRT6_INIT 200 | 269 | * MRT6_INIT 200 |
269 | * MRT6_DONE 201 | 270 | * MRT6_DONE 201 |
270 | * MRT6_ADD_MIF 202 | 271 | * MRT6_ADD_MIF 202 |
271 | * MRT6_DEL_MIF 203 | 272 | * MRT6_DEL_MIF 203 |
272 | * MRT6_ADD_MFC 204 | 273 | * MRT6_ADD_MFC 204 |
273 | * MRT6_DEL_MFC 205 | 274 | * MRT6_DEL_MFC 205 |
274 | * MRT6_VERSION 206 | 275 | * MRT6_VERSION 206 |
275 | * MRT6_ASSERT 207 | 276 | * MRT6_ASSERT 207 |
276 | * MRT6_PIM 208 | 277 | * MRT6_PIM 208 |
277 | * (reserved) 209 | 278 | * (reserved) 209 |
278 | */ | 279 | */ |
279 | #endif /* _UAPI_LINUX_IN6_H */ | 280 | #endif /* _UAPI_LINUX_IN6_H */ |
280 | 281 |
include/uapi/linux/netfilter_ipv6/ip6_tables.h
1 | /* | 1 | /* |
2 | * 25-Jul-1998 Major changes to allow for ip chain table | 2 | * 25-Jul-1998 Major changes to allow for ip chain table |
3 | * | 3 | * |
4 | * 3-Jan-2000 Named tables to allow packet selection for different uses. | 4 | * 3-Jan-2000 Named tables to allow packet selection for different uses. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | /* | 7 | /* |
8 | * Format of an IP6 firewall descriptor | 8 | * Format of an IP6 firewall descriptor |
9 | * | 9 | * |
10 | * src, dst, src_mask, dst_mask are always stored in network byte order. | 10 | * src, dst, src_mask, dst_mask are always stored in network byte order. |
11 | * flags are stored in host byte order (of course). | 11 | * flags are stored in host byte order (of course). |
12 | * Port numbers are stored in HOST byte order. | 12 | * Port numbers are stored in HOST byte order. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #ifndef _UAPI_IP6_TABLES_H | 15 | #ifndef _UAPI_IP6_TABLES_H |
16 | #define _UAPI_IP6_TABLES_H | 16 | #define _UAPI_IP6_TABLES_H |
17 | 17 | ||
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/compiler.h> | 19 | #include <linux/compiler.h> |
20 | #include <linux/netfilter_ipv6.h> | 20 | #include <linux/netfilter_ipv6.h> |
21 | 21 | ||
22 | #include <linux/netfilter/x_tables.h> | 22 | #include <linux/netfilter/x_tables.h> |
23 | 23 | ||
24 | #ifndef __KERNEL__ | 24 | #ifndef __KERNEL__ |
25 | #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN | 25 | #define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN |
26 | #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN | 26 | #define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN |
27 | #define ip6t_match xt_match | 27 | #define ip6t_match xt_match |
28 | #define ip6t_target xt_target | 28 | #define ip6t_target xt_target |
29 | #define ip6t_table xt_table | 29 | #define ip6t_table xt_table |
30 | #define ip6t_get_revision xt_get_revision | 30 | #define ip6t_get_revision xt_get_revision |
31 | #define ip6t_entry_match xt_entry_match | 31 | #define ip6t_entry_match xt_entry_match |
32 | #define ip6t_entry_target xt_entry_target | 32 | #define ip6t_entry_target xt_entry_target |
33 | #define ip6t_standard_target xt_standard_target | 33 | #define ip6t_standard_target xt_standard_target |
34 | #define ip6t_error_target xt_error_target | 34 | #define ip6t_error_target xt_error_target |
35 | #define ip6t_counters xt_counters | 35 | #define ip6t_counters xt_counters |
36 | #define IP6T_CONTINUE XT_CONTINUE | 36 | #define IP6T_CONTINUE XT_CONTINUE |
37 | #define IP6T_RETURN XT_RETURN | 37 | #define IP6T_RETURN XT_RETURN |
38 | 38 | ||
39 | /* Pre-iptables-1.4.0 */ | 39 | /* Pre-iptables-1.4.0 */ |
40 | #include <linux/netfilter/xt_tcpudp.h> | 40 | #include <linux/netfilter/xt_tcpudp.h> |
41 | #define ip6t_tcp xt_tcp | 41 | #define ip6t_tcp xt_tcp |
42 | #define ip6t_udp xt_udp | 42 | #define ip6t_udp xt_udp |
43 | #define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT | 43 | #define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT |
44 | #define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT | 44 | #define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT |
45 | #define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS | 45 | #define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS |
46 | #define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION | 46 | #define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION |
47 | #define IP6T_TCP_INV_MASK XT_TCP_INV_MASK | 47 | #define IP6T_TCP_INV_MASK XT_TCP_INV_MASK |
48 | #define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT | 48 | #define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT |
49 | #define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT | 49 | #define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT |
50 | #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK | 50 | #define IP6T_UDP_INV_MASK XT_UDP_INV_MASK |
51 | 51 | ||
52 | #define ip6t_counters_info xt_counters_info | 52 | #define ip6t_counters_info xt_counters_info |
53 | #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET | 53 | #define IP6T_STANDARD_TARGET XT_STANDARD_TARGET |
54 | #define IP6T_ERROR_TARGET XT_ERROR_TARGET | 54 | #define IP6T_ERROR_TARGET XT_ERROR_TARGET |
55 | #define IP6T_MATCH_ITERATE(e, fn, args...) \ | 55 | #define IP6T_MATCH_ITERATE(e, fn, args...) \ |
56 | XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) | 56 | XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) |
57 | #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ | 57 | #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ |
58 | XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) | 58 | XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | /* Yes, Virginia, you have to zero the padding. */ | 61 | /* Yes, Virginia, you have to zero the padding. */ |
62 | struct ip6t_ip6 { | 62 | struct ip6t_ip6 { |
63 | /* Source and destination IP6 addr */ | 63 | /* Source and destination IP6 addr */ |
64 | struct in6_addr src, dst; | 64 | struct in6_addr src, dst; |
65 | /* Mask for src and dest IP6 addr */ | 65 | /* Mask for src and dest IP6 addr */ |
66 | struct in6_addr smsk, dmsk; | 66 | struct in6_addr smsk, dmsk; |
67 | char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; | 67 | char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; |
68 | unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; | 68 | unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; |
69 | 69 | ||
70 | /* Upper protocol number | 70 | /* Upper protocol number |
71 | * - The allowed value is 0 (any) or protocol number of last parsable | 71 | * - The allowed value is 0 (any) or protocol number of last parsable |
72 | * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or | 72 | * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or |
73 | * the non IPv6 extension headers. | 73 | * the non IPv6 extension headers. |
74 | * - The protocol numbers of IPv6 extension headers except of ESP and | 74 | * - The protocol numbers of IPv6 extension headers except of ESP and |
75 | * MH do not match any packets. | 75 | * MH do not match any packets. |
76 | * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. | 76 | * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. |
77 | */ | 77 | */ |
78 | __u16 proto; | 78 | __u16 proto; |
79 | /* TOS to match iff flags & IP6T_F_TOS */ | 79 | /* TOS to match iff flags & IP6T_F_TOS */ |
80 | __u8 tos; | 80 | __u8 tos; |
81 | 81 | ||
82 | /* Flags word */ | 82 | /* Flags word */ |
83 | __u8 flags; | 83 | __u8 flags; |
84 | /* Inverse flags */ | 84 | /* Inverse flags */ |
85 | __u8 invflags; | 85 | __u8 invflags; |
86 | }; | 86 | }; |
87 | 87 | ||
88 | /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ | 88 | /* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ |
89 | #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper | 89 | #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper |
90 | protocols */ | 90 | protocols */ |
91 | #define IP6T_F_TOS 0x02 /* Match the TOS. */ | 91 | #define IP6T_F_TOS 0x02 /* Match the TOS. */ |
92 | #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ | 92 | #define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ |
93 | #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ | 93 | #define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ |
94 | 94 | ||
95 | /* Values for "inv" field in struct ip6t_ip6. */ | 95 | /* Values for "inv" field in struct ip6t_ip6. */ |
96 | #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ | 96 | #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ |
97 | #define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ | 97 | #define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ |
98 | #define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ | 98 | #define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ |
99 | #define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ | 99 | #define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ |
100 | #define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ | 100 | #define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ |
101 | #define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ | 101 | #define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ |
102 | #define IP6T_INV_PROTO XT_INV_PROTO | 102 | #define IP6T_INV_PROTO XT_INV_PROTO |
103 | #define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ | 103 | #define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ |
104 | 104 | ||
105 | /* This structure defines each of the firewall rules. Consists of 3 | 105 | /* This structure defines each of the firewall rules. Consists of 3 |
106 | parts which are 1) general IP header stuff 2) match specific | 106 | parts which are 1) general IP header stuff 2) match specific |
107 | stuff 3) the target to perform if the rule matches */ | 107 | stuff 3) the target to perform if the rule matches */ |
108 | struct ip6t_entry { | 108 | struct ip6t_entry { |
109 | struct ip6t_ip6 ipv6; | 109 | struct ip6t_ip6 ipv6; |
110 | 110 | ||
111 | /* Mark with fields that we care about. */ | 111 | /* Mark with fields that we care about. */ |
112 | unsigned int nfcache; | 112 | unsigned int nfcache; |
113 | 113 | ||
114 | /* Size of ipt_entry + matches */ | 114 | /* Size of ipt_entry + matches */ |
115 | __u16 target_offset; | 115 | __u16 target_offset; |
116 | /* Size of ipt_entry + matches + target */ | 116 | /* Size of ipt_entry + matches + target */ |
117 | __u16 next_offset; | 117 | __u16 next_offset; |
118 | 118 | ||
119 | /* Back pointer */ | 119 | /* Back pointer */ |
120 | unsigned int comefrom; | 120 | unsigned int comefrom; |
121 | 121 | ||
122 | /* Packet and byte counters. */ | 122 | /* Packet and byte counters. */ |
123 | struct xt_counters counters; | 123 | struct xt_counters counters; |
124 | 124 | ||
125 | /* The matches (if any), then the target. */ | 125 | /* The matches (if any), then the target. */ |
126 | unsigned char elems[0]; | 126 | unsigned char elems[0]; |
127 | }; | 127 | }; |
128 | 128 | ||
129 | /* Standard entry */ | 129 | /* Standard entry */ |
130 | struct ip6t_standard { | 130 | struct ip6t_standard { |
131 | struct ip6t_entry entry; | 131 | struct ip6t_entry entry; |
132 | struct xt_standard_target target; | 132 | struct xt_standard_target target; |
133 | }; | 133 | }; |
134 | 134 | ||
135 | struct ip6t_error { | 135 | struct ip6t_error { |
136 | struct ip6t_entry entry; | 136 | struct ip6t_entry entry; |
137 | struct xt_error_target target; | 137 | struct xt_error_target target; |
138 | }; | 138 | }; |
139 | 139 | ||
140 | #define IP6T_ENTRY_INIT(__size) \ | 140 | #define IP6T_ENTRY_INIT(__size) \ |
141 | { \ | 141 | { \ |
142 | .target_offset = sizeof(struct ip6t_entry), \ | 142 | .target_offset = sizeof(struct ip6t_entry), \ |
143 | .next_offset = (__size), \ | 143 | .next_offset = (__size), \ |
144 | } | 144 | } |
145 | 145 | ||
146 | #define IP6T_STANDARD_INIT(__verdict) \ | 146 | #define IP6T_STANDARD_INIT(__verdict) \ |
147 | { \ | 147 | { \ |
148 | .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ | 148 | .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ |
149 | .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ | 149 | .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ |
150 | sizeof(struct xt_standard_target)), \ | 150 | sizeof(struct xt_standard_target)), \ |
151 | .target.verdict = -(__verdict) - 1, \ | 151 | .target.verdict = -(__verdict) - 1, \ |
152 | } | 152 | } |
153 | 153 | ||
154 | #define IP6T_ERROR_INIT \ | 154 | #define IP6T_ERROR_INIT \ |
155 | { \ | 155 | { \ |
156 | .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ | 156 | .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ |
157 | .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ | 157 | .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ |
158 | sizeof(struct xt_error_target)), \ | 158 | sizeof(struct xt_error_target)), \ |
159 | .target.errorname = "ERROR", \ | 159 | .target.errorname = "ERROR", \ |
160 | } | 160 | } |
161 | 161 | ||
162 | /* | 162 | /* |
163 | * New IP firewall options for [gs]etsockopt at the RAW IP level. | 163 | * New IP firewall options for [gs]etsockopt at the RAW IP level. |
164 | * Unlike BSD Linux inherits IP options so you don't have to use | 164 | * Unlike BSD Linux inherits IP options so you don't have to use |
165 | * a raw socket for this. Instead we check rights in the calls. | 165 | * a raw socket for this. Instead we check rights in the calls. |
166 | * | 166 | * |
167 | * ATTENTION: check linux/in6.h before adding new number here. | 167 | * ATTENTION: check linux/in6.h before adding new number here. |
168 | */ | 168 | */ |
169 | #define IP6T_BASE_CTL 64 | 169 | #define IP6T_BASE_CTL 64 |
170 | 170 | ||
171 | #define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) | 171 | #define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) |
172 | #define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) | 172 | #define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) |
173 | #define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS | 173 | #define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS |
174 | 174 | ||
175 | #define IP6T_SO_GET_INFO (IP6T_BASE_CTL) | 175 | #define IP6T_SO_GET_INFO (IP6T_BASE_CTL) |
176 | #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) | 176 | #define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) |
177 | #define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) | 177 | #define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) |
178 | #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) | 178 | #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) |
179 | #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET | 179 | #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET |
180 | 180 | ||
181 | /* obtain original address if REDIRECT'd connection */ | ||
182 | #define IP6T_SO_ORIGINAL_DST 80 | ||
183 | |||
181 | /* ICMP matching stuff */ | 184 | /* ICMP matching stuff */ |
182 | struct ip6t_icmp { | 185 | struct ip6t_icmp { |
183 | __u8 type; /* type to match */ | 186 | __u8 type; /* type to match */ |
184 | __u8 code[2]; /* range of code */ | 187 | __u8 code[2]; /* range of code */ |
185 | __u8 invflags; /* Inverse flags */ | 188 | __u8 invflags; /* Inverse flags */ |
186 | }; | 189 | }; |
187 | 190 | ||
188 | /* Values for "inv" field for struct ipt_icmp. */ | 191 | /* Values for "inv" field for struct ipt_icmp. */ |
189 | #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ | 192 | #define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ |
190 | 193 | ||
191 | /* The argument to IP6T_SO_GET_INFO */ | 194 | /* The argument to IP6T_SO_GET_INFO */ |
192 | struct ip6t_getinfo { | 195 | struct ip6t_getinfo { |
193 | /* Which table: caller fills this in. */ | 196 | /* Which table: caller fills this in. */ |
194 | char name[XT_TABLE_MAXNAMELEN]; | 197 | char name[XT_TABLE_MAXNAMELEN]; |
195 | 198 | ||
196 | /* Kernel fills these in. */ | 199 | /* Kernel fills these in. */ |
197 | /* Which hook entry points are valid: bitmask */ | 200 | /* Which hook entry points are valid: bitmask */ |
198 | unsigned int valid_hooks; | 201 | unsigned int valid_hooks; |
199 | 202 | ||
200 | /* Hook entry points: one per netfilter hook. */ | 203 | /* Hook entry points: one per netfilter hook. */ |
201 | unsigned int hook_entry[NF_INET_NUMHOOKS]; | 204 | unsigned int hook_entry[NF_INET_NUMHOOKS]; |
202 | 205 | ||
203 | /* Underflow points. */ | 206 | /* Underflow points. */ |
204 | unsigned int underflow[NF_INET_NUMHOOKS]; | 207 | unsigned int underflow[NF_INET_NUMHOOKS]; |
205 | 208 | ||
206 | /* Number of entries */ | 209 | /* Number of entries */ |
207 | unsigned int num_entries; | 210 | unsigned int num_entries; |
208 | 211 | ||
209 | /* Size of entries. */ | 212 | /* Size of entries. */ |
210 | unsigned int size; | 213 | unsigned int size; |
211 | }; | 214 | }; |
212 | 215 | ||
213 | /* The argument to IP6T_SO_SET_REPLACE. */ | 216 | /* The argument to IP6T_SO_SET_REPLACE. */ |
214 | struct ip6t_replace { | 217 | struct ip6t_replace { |
215 | /* Which table. */ | 218 | /* Which table. */ |
216 | char name[XT_TABLE_MAXNAMELEN]; | 219 | char name[XT_TABLE_MAXNAMELEN]; |
217 | 220 | ||
218 | /* Which hook entry points are valid: bitmask. You can't | 221 | /* Which hook entry points are valid: bitmask. You can't |
219 | change this. */ | 222 | change this. */ |
220 | unsigned int valid_hooks; | 223 | unsigned int valid_hooks; |
221 | 224 | ||
222 | /* Number of entries */ | 225 | /* Number of entries */ |
223 | unsigned int num_entries; | 226 | unsigned int num_entries; |
224 | 227 | ||
225 | /* Total size of new entries */ | 228 | /* Total size of new entries */ |
226 | unsigned int size; | 229 | unsigned int size; |
227 | 230 | ||
228 | /* Hook entry points. */ | 231 | /* Hook entry points. */ |
229 | unsigned int hook_entry[NF_INET_NUMHOOKS]; | 232 | unsigned int hook_entry[NF_INET_NUMHOOKS]; |
230 | 233 | ||
231 | /* Underflow points. */ | 234 | /* Underflow points. */ |
232 | unsigned int underflow[NF_INET_NUMHOOKS]; | 235 | unsigned int underflow[NF_INET_NUMHOOKS]; |
233 | 236 | ||
234 | /* Information about old entries: */ | 237 | /* Information about old entries: */ |
235 | /* Number of counters (must be equal to current number of entries). */ | 238 | /* Number of counters (must be equal to current number of entries). */ |
236 | unsigned int num_counters; | 239 | unsigned int num_counters; |
237 | /* The old entries' counters. */ | 240 | /* The old entries' counters. */ |
238 | struct xt_counters __user *counters; | 241 | struct xt_counters __user *counters; |
239 | 242 | ||
240 | /* The entries (hang off end: not really an array). */ | 243 | /* The entries (hang off end: not really an array). */ |
241 | struct ip6t_entry entries[0]; | 244 | struct ip6t_entry entries[0]; |
242 | }; | 245 | }; |
243 | 246 | ||
244 | /* The argument to IP6T_SO_GET_ENTRIES. */ | 247 | /* The argument to IP6T_SO_GET_ENTRIES. */ |
245 | struct ip6t_get_entries { | 248 | struct ip6t_get_entries { |
246 | /* Which table: user fills this in. */ | 249 | /* Which table: user fills this in. */ |
247 | char name[XT_TABLE_MAXNAMELEN]; | 250 | char name[XT_TABLE_MAXNAMELEN]; |
248 | 251 | ||
249 | /* User fills this in: total entry size. */ | 252 | /* User fills this in: total entry size. */ |
250 | unsigned int size; | 253 | unsigned int size; |
251 | 254 | ||
252 | /* The entries. */ | 255 | /* The entries. */ |
253 | struct ip6t_entry entrytable[0]; | 256 | struct ip6t_entry entrytable[0]; |
254 | }; | 257 | }; |
255 | 258 | ||
256 | /* Helper functions */ | 259 | /* Helper functions */ |
257 | static __inline__ struct xt_entry_target * | 260 | static __inline__ struct xt_entry_target * |
258 | ip6t_get_target(struct ip6t_entry *e) | 261 | ip6t_get_target(struct ip6t_entry *e) |
259 | { | 262 | { |
260 | return (void *)e + e->target_offset; | 263 | return (void *)e + e->target_offset; |
261 | } | 264 | } |
262 | 265 | ||
263 | /* | 266 | /* |
264 | * Main firewall chains definitions and global var's definitions. | 267 | * Main firewall chains definitions and global var's definitions. |
265 | */ | 268 | */ |
266 | 269 | ||
267 | #endif /* _UAPI_IP6_TABLES_H */ | 270 | #endif /* _UAPI_IP6_TABLES_H */ |
268 | 271 |
net/ipv4/netfilter/iptable_nat.c
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> |
3 | * (C) 2011 Patrick McHardy <kaber@trash.net> | 3 | * (C) 2011 Patrick McHardy <kaber@trash.net> |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/netfilter.h> | 11 | #include <linux/netfilter.h> |
12 | #include <linux/netfilter_ipv4.h> | 12 | #include <linux/netfilter_ipv4.h> |
13 | #include <linux/netfilter_ipv4/ip_tables.h> | 13 | #include <linux/netfilter_ipv4/ip_tables.h> |
14 | #include <linux/ip.h> | 14 | #include <linux/ip.h> |
15 | #include <net/ip.h> | 15 | #include <net/ip.h> |
16 | 16 | ||
17 | #include <net/netfilter/nf_nat.h> | 17 | #include <net/netfilter/nf_nat.h> |
18 | #include <net/netfilter/nf_nat_core.h> | 18 | #include <net/netfilter/nf_nat_core.h> |
19 | #include <net/netfilter/nf_nat_l3proto.h> | 19 | #include <net/netfilter/nf_nat_l3proto.h> |
20 | 20 | ||
21 | static const struct xt_table nf_nat_ipv4_table = { | 21 | static const struct xt_table nf_nat_ipv4_table = { |
22 | .name = "nat", | 22 | .name = "nat", |
23 | .valid_hooks = (1 << NF_INET_PRE_ROUTING) | | 23 | .valid_hooks = (1 << NF_INET_PRE_ROUTING) | |
24 | (1 << NF_INET_POST_ROUTING) | | 24 | (1 << NF_INET_POST_ROUTING) | |
25 | (1 << NF_INET_LOCAL_OUT) | | 25 | (1 << NF_INET_LOCAL_OUT) | |
26 | (1 << NF_INET_LOCAL_IN), | 26 | (1 << NF_INET_LOCAL_IN), |
27 | .me = THIS_MODULE, | 27 | .me = THIS_MODULE, |
28 | .af = NFPROTO_IPV4, | 28 | .af = NFPROTO_IPV4, |
29 | }; | 29 | }; |
30 | 30 | ||
31 | static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | 31 | static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
32 | { | 32 | { |
33 | /* Force range to this IP; let proto decide mapping for | 33 | /* Force range to this IP; let proto decide mapping for |
34 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). | 34 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). |
35 | */ | 35 | */ |
36 | struct nf_nat_range range; | 36 | struct nf_nat_range range; |
37 | 37 | ||
38 | range.flags = 0; | 38 | range.flags = 0; |
39 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, | 39 | pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, |
40 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? | 40 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? |
41 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : | 41 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : |
42 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); | 42 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); |
43 | 43 | ||
44 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | 44 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); |
45 | } | 45 | } |
46 | 46 | ||
47 | static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, | 47 | static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, |
48 | const struct net_device *in, | 48 | const struct net_device *in, |
49 | const struct net_device *out, | 49 | const struct net_device *out, |
50 | struct nf_conn *ct) | 50 | struct nf_conn *ct) |
51 | { | 51 | { |
52 | struct net *net = nf_ct_net(ct); | 52 | struct net *net = nf_ct_net(ct); |
53 | unsigned int ret; | 53 | unsigned int ret; |
54 | 54 | ||
55 | ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); | 55 | ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); |
56 | if (ret == NF_ACCEPT) { | 56 | if (ret == NF_ACCEPT) { |
57 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) | 57 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) |
58 | ret = alloc_null_binding(ct, hooknum); | 58 | ret = alloc_null_binding(ct, hooknum); |
59 | } | 59 | } |
60 | return ret; | 60 | return ret; |
61 | } | 61 | } |
62 | 62 | ||
63 | static unsigned int | 63 | static unsigned int |
64 | nf_nat_ipv4_fn(unsigned int hooknum, | 64 | nf_nat_ipv4_fn(unsigned int hooknum, |
65 | struct sk_buff *skb, | 65 | struct sk_buff *skb, |
66 | const struct net_device *in, | 66 | const struct net_device *in, |
67 | const struct net_device *out, | 67 | const struct net_device *out, |
68 | int (*okfn)(struct sk_buff *)) | 68 | int (*okfn)(struct sk_buff *)) |
69 | { | 69 | { |
70 | struct nf_conn *ct; | 70 | struct nf_conn *ct; |
71 | enum ip_conntrack_info ctinfo; | 71 | enum ip_conntrack_info ctinfo; |
72 | struct nf_conn_nat *nat; | 72 | struct nf_conn_nat *nat; |
73 | /* maniptype == SRC for postrouting. */ | 73 | /* maniptype == SRC for postrouting. */ |
74 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); | 74 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); |
75 | 75 | ||
76 | /* We never see fragments: conntrack defrags on pre-routing | 76 | /* We never see fragments: conntrack defrags on pre-routing |
77 | * and local-out, and nf_nat_out protects post-routing. | 77 | * and local-out, and nf_nat_out protects post-routing. |
78 | */ | 78 | */ |
79 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); | 79 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); |
80 | 80 | ||
81 | ct = nf_ct_get(skb, &ctinfo); | 81 | ct = nf_ct_get(skb, &ctinfo); |
82 | /* Can't track? It's not due to stress, or conntrack would | 82 | /* Can't track? It's not due to stress, or conntrack would |
83 | * have dropped it. Hence it's the user's responsibilty to | 83 | * have dropped it. Hence it's the user's responsibilty to |
84 | * packet filter it out, or implement conntrack/NAT for that | 84 | * packet filter it out, or implement conntrack/NAT for that |
85 | * protocol. 8) --RR | 85 | * protocol. 8) --RR |
86 | */ | 86 | */ |
87 | if (!ct) | 87 | if (!ct) |
88 | return NF_ACCEPT; | 88 | return NF_ACCEPT; |
89 | 89 | ||
90 | /* Don't try to NAT if this packet is not conntracked */ | 90 | /* Don't try to NAT if this packet is not conntracked */ |
91 | if (nf_ct_is_untracked(ct)) | 91 | if (nf_ct_is_untracked(ct)) |
92 | return NF_ACCEPT; | 92 | return NF_ACCEPT; |
93 | 93 | ||
94 | nat = nfct_nat(ct); | 94 | nat = nfct_nat(ct); |
95 | if (!nat) { | 95 | if (!nat) { |
96 | /* NAT module was loaded late. */ | 96 | /* NAT module was loaded late. */ |
97 | if (nf_ct_is_confirmed(ct)) | 97 | if (nf_ct_is_confirmed(ct)) |
98 | return NF_ACCEPT; | 98 | return NF_ACCEPT; |
99 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | 99 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); |
100 | if (nat == NULL) { | 100 | if (nat == NULL) { |
101 | pr_debug("failed to add NAT extension\n"); | 101 | pr_debug("failed to add NAT extension\n"); |
102 | return NF_ACCEPT; | 102 | return NF_ACCEPT; |
103 | } | 103 | } |
104 | } | 104 | } |
105 | 105 | ||
106 | switch (ctinfo) { | 106 | switch (ctinfo) { |
107 | case IP_CT_RELATED: | 107 | case IP_CT_RELATED: |
108 | case IP_CT_RELATED_REPLY: | 108 | case IP_CT_RELATED_REPLY: |
109 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { | 109 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { |
110 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, | 110 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, |
111 | hooknum)) | 111 | hooknum)) |
112 | return NF_DROP; | 112 | return NF_DROP; |
113 | else | 113 | else |
114 | return NF_ACCEPT; | 114 | return NF_ACCEPT; |
115 | } | 115 | } |
116 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ | 116 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ |
117 | case IP_CT_NEW: | 117 | case IP_CT_NEW: |
118 | /* Seen it before? This can happen for loopback, retrans, | 118 | /* Seen it before? This can happen for loopback, retrans, |
119 | * or local packets. | 119 | * or local packets. |
120 | */ | 120 | */ |
121 | if (!nf_nat_initialized(ct, maniptype)) { | 121 | if (!nf_nat_initialized(ct, maniptype)) { |
122 | unsigned int ret; | 122 | unsigned int ret; |
123 | 123 | ||
124 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); | 124 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
125 | if (ret != NF_ACCEPT) | 125 | if (ret != NF_ACCEPT) |
126 | return ret; | 126 | return ret; |
127 | } else | 127 | } else |
128 | pr_debug("Already setup manip %s for ct %p\n", | 128 | pr_debug("Already setup manip %s for ct %p\n", |
129 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", | 129 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", |
130 | ct); | 130 | ct); |
131 | break; | 131 | break; |
132 | 132 | ||
133 | default: | 133 | default: |
134 | /* ESTABLISHED */ | 134 | /* ESTABLISHED */ |
135 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || | 135 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || |
136 | ctinfo == IP_CT_ESTABLISHED_REPLY); | 136 | ctinfo == IP_CT_ESTABLISHED_REPLY); |
137 | } | 137 | } |
138 | 138 | ||
139 | return nf_nat_packet(ct, ctinfo, hooknum, skb); | 139 | return nf_nat_packet(ct, ctinfo, hooknum, skb); |
140 | } | 140 | } |
141 | 141 | ||
142 | static unsigned int | 142 | static unsigned int |
143 | nf_nat_ipv4_in(unsigned int hooknum, | 143 | nf_nat_ipv4_in(unsigned int hooknum, |
144 | struct sk_buff *skb, | 144 | struct sk_buff *skb, |
145 | const struct net_device *in, | 145 | const struct net_device *in, |
146 | const struct net_device *out, | 146 | const struct net_device *out, |
147 | int (*okfn)(struct sk_buff *)) | 147 | int (*okfn)(struct sk_buff *)) |
148 | { | 148 | { |
149 | unsigned int ret; | 149 | unsigned int ret; |
150 | __be32 daddr = ip_hdr(skb)->daddr; | 150 | __be32 daddr = ip_hdr(skb)->daddr; |
151 | 151 | ||
152 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); | 152 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
153 | if (ret != NF_DROP && ret != NF_STOLEN && | 153 | if (ret != NF_DROP && ret != NF_STOLEN && |
154 | daddr != ip_hdr(skb)->daddr) | 154 | daddr != ip_hdr(skb)->daddr) |
155 | skb_dst_drop(skb); | 155 | skb_dst_drop(skb); |
156 | 156 | ||
157 | return ret; | 157 | return ret; |
158 | } | 158 | } |
159 | 159 | ||
160 | static unsigned int | 160 | static unsigned int |
161 | nf_nat_ipv4_out(unsigned int hooknum, | 161 | nf_nat_ipv4_out(unsigned int hooknum, |
162 | struct sk_buff *skb, | 162 | struct sk_buff *skb, |
163 | const struct net_device *in, | 163 | const struct net_device *in, |
164 | const struct net_device *out, | 164 | const struct net_device *out, |
165 | int (*okfn)(struct sk_buff *)) | 165 | int (*okfn)(struct sk_buff *)) |
166 | { | 166 | { |
167 | #ifdef CONFIG_XFRM | 167 | #ifdef CONFIG_XFRM |
168 | const struct nf_conn *ct; | 168 | const struct nf_conn *ct; |
169 | enum ip_conntrack_info ctinfo; | 169 | enum ip_conntrack_info ctinfo; |
170 | #endif | 170 | #endif |
171 | unsigned int ret; | 171 | unsigned int ret; |
172 | 172 | ||
173 | /* root is playing with raw sockets. */ | 173 | /* root is playing with raw sockets. */ |
174 | if (skb->len < sizeof(struct iphdr) || | 174 | if (skb->len < sizeof(struct iphdr) || |
175 | ip_hdrlen(skb) < sizeof(struct iphdr)) | 175 | ip_hdrlen(skb) < sizeof(struct iphdr)) |
176 | return NF_ACCEPT; | 176 | return NF_ACCEPT; |
177 | 177 | ||
178 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); | 178 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
179 | #ifdef CONFIG_XFRM | 179 | #ifdef CONFIG_XFRM |
180 | if (ret != NF_DROP && ret != NF_STOLEN && | 180 | if (ret != NF_DROP && ret != NF_STOLEN && |
181 | !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | 181 | !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
182 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 182 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
183 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 183 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
184 | 184 | ||
185 | if ((ct->tuplehash[dir].tuple.src.u3.ip != | 185 | if ((ct->tuplehash[dir].tuple.src.u3.ip != |
186 | ct->tuplehash[!dir].tuple.dst.u3.ip) || | 186 | ct->tuplehash[!dir].tuple.dst.u3.ip) || |
187 | (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && | 187 | (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && |
188 | ct->tuplehash[dir].tuple.src.u.all != | 188 | ct->tuplehash[dir].tuple.src.u.all != |
189 | ct->tuplehash[!dir].tuple.dst.u.all)) | 189 | ct->tuplehash[!dir].tuple.dst.u.all)) |
190 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) | 190 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) |
191 | ret = NF_DROP; | 191 | ret = NF_DROP; |
192 | } | 192 | } |
193 | #endif | 193 | #endif |
194 | return ret; | 194 | return ret; |
195 | } | 195 | } |
196 | 196 | ||
197 | static unsigned int | 197 | static unsigned int |
198 | nf_nat_ipv4_local_fn(unsigned int hooknum, | 198 | nf_nat_ipv4_local_fn(unsigned int hooknum, |
199 | struct sk_buff *skb, | 199 | struct sk_buff *skb, |
200 | const struct net_device *in, | 200 | const struct net_device *in, |
201 | const struct net_device *out, | 201 | const struct net_device *out, |
202 | int (*okfn)(struct sk_buff *)) | 202 | int (*okfn)(struct sk_buff *)) |
203 | { | 203 | { |
204 | const struct nf_conn *ct; | 204 | const struct nf_conn *ct; |
205 | enum ip_conntrack_info ctinfo; | 205 | enum ip_conntrack_info ctinfo; |
206 | unsigned int ret; | 206 | unsigned int ret; |
207 | 207 | ||
208 | /* root is playing with raw sockets. */ | 208 | /* root is playing with raw sockets. */ |
209 | if (skb->len < sizeof(struct iphdr) || | 209 | if (skb->len < sizeof(struct iphdr) || |
210 | ip_hdrlen(skb) < sizeof(struct iphdr)) | 210 | ip_hdrlen(skb) < sizeof(struct iphdr)) |
211 | return NF_ACCEPT; | 211 | return NF_ACCEPT; |
212 | 212 | ||
213 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); | 213 | ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); |
214 | if (ret != NF_DROP && ret != NF_STOLEN && | 214 | if (ret != NF_DROP && ret != NF_STOLEN && |
215 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 215 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
216 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 216 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
217 | 217 | ||
218 | if (ct->tuplehash[dir].tuple.dst.u3.ip != | 218 | if (ct->tuplehash[dir].tuple.dst.u3.ip != |
219 | ct->tuplehash[!dir].tuple.src.u3.ip) { | 219 | ct->tuplehash[!dir].tuple.src.u3.ip) { |
220 | if (ip_route_me_harder(skb, RTN_UNSPEC)) | 220 | if (ip_route_me_harder(skb, RTN_UNSPEC)) |
221 | ret = NF_DROP; | 221 | ret = NF_DROP; |
222 | } | 222 | } |
223 | #ifdef CONFIG_XFRM | 223 | #ifdef CONFIG_XFRM |
224 | else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && | 224 | else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && |
225 | ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && | 225 | ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && |
226 | ct->tuplehash[dir].tuple.dst.u.all != | 226 | ct->tuplehash[dir].tuple.dst.u.all != |
227 | ct->tuplehash[!dir].tuple.src.u.all) | 227 | ct->tuplehash[!dir].tuple.src.u.all) |
228 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) | 228 | if (nf_xfrm_me_harder(skb, AF_INET) < 0) |
229 | ret = NF_DROP; | 229 | ret = NF_DROP; |
230 | #endif | 230 | #endif |
231 | } | 231 | } |
232 | return ret; | 232 | return ret; |
233 | } | 233 | } |
234 | 234 | ||
235 | static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { | 235 | static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { |
236 | /* Before packet filtering, change destination */ | 236 | /* Before packet filtering, change destination */ |
237 | { | 237 | { |
238 | .hook = nf_nat_ipv4_in, | 238 | .hook = nf_nat_ipv4_in, |
239 | .owner = THIS_MODULE, | 239 | .owner = THIS_MODULE, |
240 | .pf = NFPROTO_IPV4, | 240 | .pf = NFPROTO_IPV4, |
241 | .hooknum = NF_INET_PRE_ROUTING, | 241 | .hooknum = NF_INET_PRE_ROUTING, |
242 | .priority = NF_IP_PRI_NAT_DST, | 242 | .priority = NF_IP_PRI_NAT_DST, |
243 | }, | 243 | }, |
244 | /* After packet filtering, change source */ | 244 | /* After packet filtering, change source */ |
245 | { | 245 | { |
246 | .hook = nf_nat_ipv4_out, | 246 | .hook = nf_nat_ipv4_out, |
247 | .owner = THIS_MODULE, | 247 | .owner = THIS_MODULE, |
248 | .pf = NFPROTO_IPV4, | 248 | .pf = NFPROTO_IPV4, |
249 | .hooknum = NF_INET_POST_ROUTING, | 249 | .hooknum = NF_INET_POST_ROUTING, |
250 | .priority = NF_IP_PRI_NAT_SRC, | 250 | .priority = NF_IP_PRI_NAT_SRC, |
251 | }, | 251 | }, |
252 | /* Before packet filtering, change destination */ | 252 | /* Before packet filtering, change destination */ |
253 | { | 253 | { |
254 | .hook = nf_nat_ipv4_local_fn, | 254 | .hook = nf_nat_ipv4_local_fn, |
255 | .owner = THIS_MODULE, | 255 | .owner = THIS_MODULE, |
256 | .pf = NFPROTO_IPV4, | 256 | .pf = NFPROTO_IPV4, |
257 | .hooknum = NF_INET_LOCAL_OUT, | 257 | .hooknum = NF_INET_LOCAL_OUT, |
258 | .priority = NF_IP_PRI_NAT_DST, | 258 | .priority = NF_IP_PRI_NAT_DST, |
259 | }, | 259 | }, |
260 | /* After packet filtering, change source */ | 260 | /* After packet filtering, change source */ |
261 | { | 261 | { |
262 | .hook = nf_nat_ipv4_fn, | 262 | .hook = nf_nat_ipv4_fn, |
263 | .owner = THIS_MODULE, | 263 | .owner = THIS_MODULE, |
264 | .pf = NFPROTO_IPV4, | 264 | .pf = NFPROTO_IPV4, |
265 | .hooknum = NF_INET_LOCAL_IN, | 265 | .hooknum = NF_INET_LOCAL_IN, |
266 | .priority = NF_IP_PRI_NAT_SRC, | 266 | .priority = NF_IP_PRI_NAT_SRC, |
267 | }, | 267 | }, |
268 | }; | 268 | }; |
269 | 269 | ||
270 | static int __net_init iptable_nat_net_init(struct net *net) | 270 | static int __net_init iptable_nat_net_init(struct net *net) |
271 | { | 271 | { |
272 | struct ipt_replace *repl; | 272 | struct ipt_replace *repl; |
273 | 273 | ||
274 | repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); | 274 | repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); |
275 | if (repl == NULL) | 275 | if (repl == NULL) |
276 | return -ENOMEM; | 276 | return -ENOMEM; |
277 | net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); | 277 | net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); |
278 | kfree(repl); | 278 | kfree(repl); |
279 | if (IS_ERR(net->ipv4.nat_table)) | 279 | return PTR_RET(net->ipv4.nat_table); |
280 | return PTR_ERR(net->ipv4.nat_table); | ||
281 | return 0; | ||
282 | } | 280 | } |
283 | 281 | ||
284 | static void __net_exit iptable_nat_net_exit(struct net *net) | 282 | static void __net_exit iptable_nat_net_exit(struct net *net) |
285 | { | 283 | { |
286 | ipt_unregister_table(net, net->ipv4.nat_table); | 284 | ipt_unregister_table(net, net->ipv4.nat_table); |
287 | } | 285 | } |
288 | 286 | ||
289 | static struct pernet_operations iptable_nat_net_ops = { | 287 | static struct pernet_operations iptable_nat_net_ops = { |
290 | .init = iptable_nat_net_init, | 288 | .init = iptable_nat_net_init, |
291 | .exit = iptable_nat_net_exit, | 289 | .exit = iptable_nat_net_exit, |
292 | }; | 290 | }; |
293 | 291 | ||
294 | static int __init iptable_nat_init(void) | 292 | static int __init iptable_nat_init(void) |
295 | { | 293 | { |
296 | int err; | 294 | int err; |
297 | 295 | ||
298 | err = register_pernet_subsys(&iptable_nat_net_ops); | 296 | err = register_pernet_subsys(&iptable_nat_net_ops); |
299 | if (err < 0) | 297 | if (err < 0) |
300 | goto err1; | 298 | goto err1; |
301 | 299 | ||
302 | err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); | 300 | err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); |
303 | if (err < 0) | 301 | if (err < 0) |
304 | goto err2; | 302 | goto err2; |
305 | return 0; | 303 | return 0; |
306 | 304 | ||
307 | err2: | 305 | err2: |
308 | unregister_pernet_subsys(&iptable_nat_net_ops); | 306 | unregister_pernet_subsys(&iptable_nat_net_ops); |
309 | err1: | 307 | err1: |
310 | return err; | 308 | return err; |
311 | } | 309 | } |
312 | 310 | ||
313 | static void __exit iptable_nat_exit(void) | 311 | static void __exit iptable_nat_exit(void) |
314 | { | 312 | { |
315 | nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); | 313 | nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); |
316 | unregister_pernet_subsys(&iptable_nat_net_ops); | 314 | unregister_pernet_subsys(&iptable_nat_net_ops); |
317 | } | 315 | } |
318 | 316 | ||
319 | module_init(iptable_nat_init); | 317 | module_init(iptable_nat_init); |
320 | module_exit(iptable_nat_exit); | 318 | module_exit(iptable_nat_exit); |
321 | 319 | ||
322 | MODULE_LICENSE("GPL"); | 320 | MODULE_LICENSE("GPL"); |
323 | 321 |
net/ipv6/netfilter/ip6table_nat.c
1 | /* | 1 | /* |
2 | * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> | 2 | * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT | 8 | * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT |
9 | * funded by Astaro. | 9 | * funded by Astaro. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/netfilter.h> | 13 | #include <linux/netfilter.h> |
14 | #include <linux/netfilter_ipv6.h> | 14 | #include <linux/netfilter_ipv6.h> |
15 | #include <linux/netfilter_ipv6/ip6_tables.h> | 15 | #include <linux/netfilter_ipv6/ip6_tables.h> |
16 | #include <linux/ipv6.h> | 16 | #include <linux/ipv6.h> |
17 | #include <net/ipv6.h> | 17 | #include <net/ipv6.h> |
18 | 18 | ||
19 | #include <net/netfilter/nf_nat.h> | 19 | #include <net/netfilter/nf_nat.h> |
20 | #include <net/netfilter/nf_nat_core.h> | 20 | #include <net/netfilter/nf_nat_core.h> |
21 | #include <net/netfilter/nf_nat_l3proto.h> | 21 | #include <net/netfilter/nf_nat_l3proto.h> |
22 | 22 | ||
23 | static const struct xt_table nf_nat_ipv6_table = { | 23 | static const struct xt_table nf_nat_ipv6_table = { |
24 | .name = "nat", | 24 | .name = "nat", |
25 | .valid_hooks = (1 << NF_INET_PRE_ROUTING) | | 25 | .valid_hooks = (1 << NF_INET_PRE_ROUTING) | |
26 | (1 << NF_INET_POST_ROUTING) | | 26 | (1 << NF_INET_POST_ROUTING) | |
27 | (1 << NF_INET_LOCAL_OUT) | | 27 | (1 << NF_INET_LOCAL_OUT) | |
28 | (1 << NF_INET_LOCAL_IN), | 28 | (1 << NF_INET_LOCAL_IN), |
29 | .me = THIS_MODULE, | 29 | .me = THIS_MODULE, |
30 | .af = NFPROTO_IPV6, | 30 | .af = NFPROTO_IPV6, |
31 | }; | 31 | }; |
32 | 32 | ||
33 | static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | 33 | static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
34 | { | 34 | { |
35 | /* Force range to this IP; let proto decide mapping for | 35 | /* Force range to this IP; let proto decide mapping for |
36 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). | 36 | * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). |
37 | */ | 37 | */ |
38 | struct nf_nat_range range; | 38 | struct nf_nat_range range; |
39 | 39 | ||
40 | range.flags = 0; | 40 | range.flags = 0; |
41 | pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, | 41 | pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, |
42 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? | 42 | HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? |
43 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : | 43 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : |
44 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); | 44 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); |
45 | 45 | ||
46 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | 46 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); |
47 | } | 47 | } |
48 | 48 | ||
49 | static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, | 49 | static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, |
50 | const struct net_device *in, | 50 | const struct net_device *in, |
51 | const struct net_device *out, | 51 | const struct net_device *out, |
52 | struct nf_conn *ct) | 52 | struct nf_conn *ct) |
53 | { | 53 | { |
54 | struct net *net = nf_ct_net(ct); | 54 | struct net *net = nf_ct_net(ct); |
55 | unsigned int ret; | 55 | unsigned int ret; |
56 | 56 | ||
57 | ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); | 57 | ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); |
58 | if (ret == NF_ACCEPT) { | 58 | if (ret == NF_ACCEPT) { |
59 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) | 59 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) |
60 | ret = alloc_null_binding(ct, hooknum); | 60 | ret = alloc_null_binding(ct, hooknum); |
61 | } | 61 | } |
62 | return ret; | 62 | return ret; |
63 | } | 63 | } |
64 | 64 | ||
65 | static unsigned int | 65 | static unsigned int |
66 | nf_nat_ipv6_fn(unsigned int hooknum, | 66 | nf_nat_ipv6_fn(unsigned int hooknum, |
67 | struct sk_buff *skb, | 67 | struct sk_buff *skb, |
68 | const struct net_device *in, | 68 | const struct net_device *in, |
69 | const struct net_device *out, | 69 | const struct net_device *out, |
70 | int (*okfn)(struct sk_buff *)) | 70 | int (*okfn)(struct sk_buff *)) |
71 | { | 71 | { |
72 | struct nf_conn *ct; | 72 | struct nf_conn *ct; |
73 | enum ip_conntrack_info ctinfo; | 73 | enum ip_conntrack_info ctinfo; |
74 | struct nf_conn_nat *nat; | 74 | struct nf_conn_nat *nat; |
75 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); | 75 | enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); |
76 | __be16 frag_off; | 76 | __be16 frag_off; |
77 | int hdrlen; | 77 | int hdrlen; |
78 | u8 nexthdr; | 78 | u8 nexthdr; |
79 | 79 | ||
80 | ct = nf_ct_get(skb, &ctinfo); | 80 | ct = nf_ct_get(skb, &ctinfo); |
81 | /* Can't track? It's not due to stress, or conntrack would | 81 | /* Can't track? It's not due to stress, or conntrack would |
82 | * have dropped it. Hence it's the user's responsibilty to | 82 | * have dropped it. Hence it's the user's responsibilty to |
83 | * packet filter it out, or implement conntrack/NAT for that | 83 | * packet filter it out, or implement conntrack/NAT for that |
84 | * protocol. 8) --RR | 84 | * protocol. 8) --RR |
85 | */ | 85 | */ |
86 | if (!ct) | 86 | if (!ct) |
87 | return NF_ACCEPT; | 87 | return NF_ACCEPT; |
88 | 88 | ||
89 | /* Don't try to NAT if this packet is not conntracked */ | 89 | /* Don't try to NAT if this packet is not conntracked */ |
90 | if (nf_ct_is_untracked(ct)) | 90 | if (nf_ct_is_untracked(ct)) |
91 | return NF_ACCEPT; | 91 | return NF_ACCEPT; |
92 | 92 | ||
93 | nat = nfct_nat(ct); | 93 | nat = nfct_nat(ct); |
94 | if (!nat) { | 94 | if (!nat) { |
95 | /* NAT module was loaded late. */ | 95 | /* NAT module was loaded late. */ |
96 | if (nf_ct_is_confirmed(ct)) | 96 | if (nf_ct_is_confirmed(ct)) |
97 | return NF_ACCEPT; | 97 | return NF_ACCEPT; |
98 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | 98 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); |
99 | if (nat == NULL) { | 99 | if (nat == NULL) { |
100 | pr_debug("failed to add NAT extension\n"); | 100 | pr_debug("failed to add NAT extension\n"); |
101 | return NF_ACCEPT; | 101 | return NF_ACCEPT; |
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | switch (ctinfo) { | 105 | switch (ctinfo) { |
106 | case IP_CT_RELATED: | 106 | case IP_CT_RELATED: |
107 | case IP_CT_RELATED_REPLY: | 107 | case IP_CT_RELATED_REPLY: |
108 | nexthdr = ipv6_hdr(skb)->nexthdr; | 108 | nexthdr = ipv6_hdr(skb)->nexthdr; |
109 | hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), | 109 | hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), |
110 | &nexthdr, &frag_off); | 110 | &nexthdr, &frag_off); |
111 | 111 | ||
112 | if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { | 112 | if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { |
113 | if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, | 113 | if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, |
114 | hooknum, hdrlen)) | 114 | hooknum, hdrlen)) |
115 | return NF_DROP; | 115 | return NF_DROP; |
116 | else | 116 | else |
117 | return NF_ACCEPT; | 117 | return NF_ACCEPT; |
118 | } | 118 | } |
119 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ | 119 | /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ |
120 | case IP_CT_NEW: | 120 | case IP_CT_NEW: |
121 | /* Seen it before? This can happen for loopback, retrans, | 121 | /* Seen it before? This can happen for loopback, retrans, |
122 | * or local packets. | 122 | * or local packets. |
123 | */ | 123 | */ |
124 | if (!nf_nat_initialized(ct, maniptype)) { | 124 | if (!nf_nat_initialized(ct, maniptype)) { |
125 | unsigned int ret; | 125 | unsigned int ret; |
126 | 126 | ||
127 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); | 127 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
128 | if (ret != NF_ACCEPT) | 128 | if (ret != NF_ACCEPT) |
129 | return ret; | 129 | return ret; |
130 | } else | 130 | } else |
131 | pr_debug("Already setup manip %s for ct %p\n", | 131 | pr_debug("Already setup manip %s for ct %p\n", |
132 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", | 132 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", |
133 | ct); | 133 | ct); |
134 | break; | 134 | break; |
135 | 135 | ||
136 | default: | 136 | default: |
137 | /* ESTABLISHED */ | 137 | /* ESTABLISHED */ |
138 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || | 138 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || |
139 | ctinfo == IP_CT_ESTABLISHED_REPLY); | 139 | ctinfo == IP_CT_ESTABLISHED_REPLY); |
140 | } | 140 | } |
141 | 141 | ||
142 | return nf_nat_packet(ct, ctinfo, hooknum, skb); | 142 | return nf_nat_packet(ct, ctinfo, hooknum, skb); |
143 | } | 143 | } |
144 | 144 | ||
145 | static unsigned int | 145 | static unsigned int |
146 | nf_nat_ipv6_in(unsigned int hooknum, | 146 | nf_nat_ipv6_in(unsigned int hooknum, |
147 | struct sk_buff *skb, | 147 | struct sk_buff *skb, |
148 | const struct net_device *in, | 148 | const struct net_device *in, |
149 | const struct net_device *out, | 149 | const struct net_device *out, |
150 | int (*okfn)(struct sk_buff *)) | 150 | int (*okfn)(struct sk_buff *)) |
151 | { | 151 | { |
152 | unsigned int ret; | 152 | unsigned int ret; |
153 | struct in6_addr daddr = ipv6_hdr(skb)->daddr; | 153 | struct in6_addr daddr = ipv6_hdr(skb)->daddr; |
154 | 154 | ||
155 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); | 155 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); |
156 | if (ret != NF_DROP && ret != NF_STOLEN && | 156 | if (ret != NF_DROP && ret != NF_STOLEN && |
157 | ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) | 157 | ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) |
158 | skb_dst_drop(skb); | 158 | skb_dst_drop(skb); |
159 | 159 | ||
160 | return ret; | 160 | return ret; |
161 | } | 161 | } |
162 | 162 | ||
163 | static unsigned int | 163 | static unsigned int |
164 | nf_nat_ipv6_out(unsigned int hooknum, | 164 | nf_nat_ipv6_out(unsigned int hooknum, |
165 | struct sk_buff *skb, | 165 | struct sk_buff *skb, |
166 | const struct net_device *in, | 166 | const struct net_device *in, |
167 | const struct net_device *out, | 167 | const struct net_device *out, |
168 | int (*okfn)(struct sk_buff *)) | 168 | int (*okfn)(struct sk_buff *)) |
169 | { | 169 | { |
170 | #ifdef CONFIG_XFRM | 170 | #ifdef CONFIG_XFRM |
171 | const struct nf_conn *ct; | 171 | const struct nf_conn *ct; |
172 | enum ip_conntrack_info ctinfo; | 172 | enum ip_conntrack_info ctinfo; |
173 | #endif | 173 | #endif |
174 | unsigned int ret; | 174 | unsigned int ret; |
175 | 175 | ||
176 | /* root is playing with raw sockets. */ | 176 | /* root is playing with raw sockets. */ |
177 | if (skb->len < sizeof(struct ipv6hdr)) | 177 | if (skb->len < sizeof(struct ipv6hdr)) |
178 | return NF_ACCEPT; | 178 | return NF_ACCEPT; |
179 | 179 | ||
180 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); | 180 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); |
181 | #ifdef CONFIG_XFRM | 181 | #ifdef CONFIG_XFRM |
182 | if (ret != NF_DROP && ret != NF_STOLEN && | 182 | if (ret != NF_DROP && ret != NF_STOLEN && |
183 | !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && | 183 | !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && |
184 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 184 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
185 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 185 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
186 | 186 | ||
187 | if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, | 187 | if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, |
188 | &ct->tuplehash[!dir].tuple.dst.u3) || | 188 | &ct->tuplehash[!dir].tuple.dst.u3) || |
189 | (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && | 189 | (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && |
190 | ct->tuplehash[dir].tuple.src.u.all != | 190 | ct->tuplehash[dir].tuple.src.u.all != |
191 | ct->tuplehash[!dir].tuple.dst.u.all)) | 191 | ct->tuplehash[!dir].tuple.dst.u.all)) |
192 | if (nf_xfrm_me_harder(skb, AF_INET6) < 0) | 192 | if (nf_xfrm_me_harder(skb, AF_INET6) < 0) |
193 | ret = NF_DROP; | 193 | ret = NF_DROP; |
194 | } | 194 | } |
195 | #endif | 195 | #endif |
196 | return ret; | 196 | return ret; |
197 | } | 197 | } |
198 | 198 | ||
199 | static unsigned int | 199 | static unsigned int |
200 | nf_nat_ipv6_local_fn(unsigned int hooknum, | 200 | nf_nat_ipv6_local_fn(unsigned int hooknum, |
201 | struct sk_buff *skb, | 201 | struct sk_buff *skb, |
202 | const struct net_device *in, | 202 | const struct net_device *in, |
203 | const struct net_device *out, | 203 | const struct net_device *out, |
204 | int (*okfn)(struct sk_buff *)) | 204 | int (*okfn)(struct sk_buff *)) |
205 | { | 205 | { |
206 | const struct nf_conn *ct; | 206 | const struct nf_conn *ct; |
207 | enum ip_conntrack_info ctinfo; | 207 | enum ip_conntrack_info ctinfo; |
208 | unsigned int ret; | 208 | unsigned int ret; |
209 | 209 | ||
210 | /* root is playing with raw sockets. */ | 210 | /* root is playing with raw sockets. */ |
211 | if (skb->len < sizeof(struct ipv6hdr)) | 211 | if (skb->len < sizeof(struct ipv6hdr)) |
212 | return NF_ACCEPT; | 212 | return NF_ACCEPT; |
213 | 213 | ||
214 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); | 214 | ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); |
215 | if (ret != NF_DROP && ret != NF_STOLEN && | 215 | if (ret != NF_DROP && ret != NF_STOLEN && |
216 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { | 216 | (ct = nf_ct_get(skb, &ctinfo)) != NULL) { |
217 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 217 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
218 | 218 | ||
219 | if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, | 219 | if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, |
220 | &ct->tuplehash[!dir].tuple.src.u3)) { | 220 | &ct->tuplehash[!dir].tuple.src.u3)) { |
221 | if (ip6_route_me_harder(skb)) | 221 | if (ip6_route_me_harder(skb)) |
222 | ret = NF_DROP; | 222 | ret = NF_DROP; |
223 | } | 223 | } |
224 | #ifdef CONFIG_XFRM | 224 | #ifdef CONFIG_XFRM |
225 | else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && | 225 | else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && |
226 | ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && | 226 | ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && |
227 | ct->tuplehash[dir].tuple.dst.u.all != | 227 | ct->tuplehash[dir].tuple.dst.u.all != |
228 | ct->tuplehash[!dir].tuple.src.u.all) | 228 | ct->tuplehash[!dir].tuple.src.u.all) |
229 | if (nf_xfrm_me_harder(skb, AF_INET6)) | 229 | if (nf_xfrm_me_harder(skb, AF_INET6)) |
230 | ret = NF_DROP; | 230 | ret = NF_DROP; |
231 | #endif | 231 | #endif |
232 | } | 232 | } |
233 | return ret; | 233 | return ret; |
234 | } | 234 | } |
235 | 235 | ||
236 | static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { | 236 | static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { |
237 | /* Before packet filtering, change destination */ | 237 | /* Before packet filtering, change destination */ |
238 | { | 238 | { |
239 | .hook = nf_nat_ipv6_in, | 239 | .hook = nf_nat_ipv6_in, |
240 | .owner = THIS_MODULE, | 240 | .owner = THIS_MODULE, |
241 | .pf = NFPROTO_IPV6, | 241 | .pf = NFPROTO_IPV6, |
242 | .hooknum = NF_INET_PRE_ROUTING, | 242 | .hooknum = NF_INET_PRE_ROUTING, |
243 | .priority = NF_IP6_PRI_NAT_DST, | 243 | .priority = NF_IP6_PRI_NAT_DST, |
244 | }, | 244 | }, |
245 | /* After packet filtering, change source */ | 245 | /* After packet filtering, change source */ |
246 | { | 246 | { |
247 | .hook = nf_nat_ipv6_out, | 247 | .hook = nf_nat_ipv6_out, |
248 | .owner = THIS_MODULE, | 248 | .owner = THIS_MODULE, |
249 | .pf = NFPROTO_IPV6, | 249 | .pf = NFPROTO_IPV6, |
250 | .hooknum = NF_INET_POST_ROUTING, | 250 | .hooknum = NF_INET_POST_ROUTING, |
251 | .priority = NF_IP6_PRI_NAT_SRC, | 251 | .priority = NF_IP6_PRI_NAT_SRC, |
252 | }, | 252 | }, |
253 | /* Before packet filtering, change destination */ | 253 | /* Before packet filtering, change destination */ |
254 | { | 254 | { |
255 | .hook = nf_nat_ipv6_local_fn, | 255 | .hook = nf_nat_ipv6_local_fn, |
256 | .owner = THIS_MODULE, | 256 | .owner = THIS_MODULE, |
257 | .pf = NFPROTO_IPV6, | 257 | .pf = NFPROTO_IPV6, |
258 | .hooknum = NF_INET_LOCAL_OUT, | 258 | .hooknum = NF_INET_LOCAL_OUT, |
259 | .priority = NF_IP6_PRI_NAT_DST, | 259 | .priority = NF_IP6_PRI_NAT_DST, |
260 | }, | 260 | }, |
261 | /* After packet filtering, change source */ | 261 | /* After packet filtering, change source */ |
262 | { | 262 | { |
263 | .hook = nf_nat_ipv6_fn, | 263 | .hook = nf_nat_ipv6_fn, |
264 | .owner = THIS_MODULE, | 264 | .owner = THIS_MODULE, |
265 | .pf = NFPROTO_IPV6, | 265 | .pf = NFPROTO_IPV6, |
266 | .hooknum = NF_INET_LOCAL_IN, | 266 | .hooknum = NF_INET_LOCAL_IN, |
267 | .priority = NF_IP6_PRI_NAT_SRC, | 267 | .priority = NF_IP6_PRI_NAT_SRC, |
268 | }, | 268 | }, |
269 | }; | 269 | }; |
270 | 270 | ||
271 | static int __net_init ip6table_nat_net_init(struct net *net) | 271 | static int __net_init ip6table_nat_net_init(struct net *net) |
272 | { | 272 | { |
273 | struct ip6t_replace *repl; | 273 | struct ip6t_replace *repl; |
274 | 274 | ||
275 | repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); | 275 | repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); |
276 | if (repl == NULL) | 276 | if (repl == NULL) |
277 | return -ENOMEM; | 277 | return -ENOMEM; |
278 | net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); | 278 | net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); |
279 | kfree(repl); | 279 | kfree(repl); |
280 | if (IS_ERR(net->ipv6.ip6table_nat)) | 280 | return PTR_RET(net->ipv6.ip6table_nat); |
281 | return PTR_ERR(net->ipv6.ip6table_nat); | ||
282 | return 0; | ||
283 | } | 281 | } |
284 | 282 | ||
285 | static void __net_exit ip6table_nat_net_exit(struct net *net) | 283 | static void __net_exit ip6table_nat_net_exit(struct net *net) |
286 | { | 284 | { |
287 | ip6t_unregister_table(net, net->ipv6.ip6table_nat); | 285 | ip6t_unregister_table(net, net->ipv6.ip6table_nat); |
288 | } | 286 | } |
289 | 287 | ||
290 | static struct pernet_operations ip6table_nat_net_ops = { | 288 | static struct pernet_operations ip6table_nat_net_ops = { |
291 | .init = ip6table_nat_net_init, | 289 | .init = ip6table_nat_net_init, |
292 | .exit = ip6table_nat_net_exit, | 290 | .exit = ip6table_nat_net_exit, |
293 | }; | 291 | }; |
294 | 292 | ||
295 | static int __init ip6table_nat_init(void) | 293 | static int __init ip6table_nat_init(void) |
296 | { | 294 | { |
297 | int err; | 295 | int err; |
298 | 296 | ||
299 | err = register_pernet_subsys(&ip6table_nat_net_ops); | 297 | err = register_pernet_subsys(&ip6table_nat_net_ops); |
300 | if (err < 0) | 298 | if (err < 0) |
301 | goto err1; | 299 | goto err1; |
302 | 300 | ||
303 | err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); | 301 | err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); |
304 | if (err < 0) | 302 | if (err < 0) |
305 | goto err2; | 303 | goto err2; |
306 | return 0; | 304 | return 0; |
307 | 305 | ||
308 | err2: | 306 | err2: |
309 | unregister_pernet_subsys(&ip6table_nat_net_ops); | 307 | unregister_pernet_subsys(&ip6table_nat_net_ops); |
310 | err1: | 308 | err1: |
311 | return err; | 309 | return err; |
312 | } | 310 | } |
313 | 311 | ||
314 | static void __exit ip6table_nat_exit(void) | 312 | static void __exit ip6table_nat_exit(void) |
315 | { | 313 | { |
316 | nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); | 314 | nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); |
317 | unregister_pernet_subsys(&ip6table_nat_net_ops); | 315 | unregister_pernet_subsys(&ip6table_nat_net_ops); |
318 | } | 316 | } |
319 | 317 | ||
320 | module_init(ip6table_nat_init); | 318 | module_init(ip6table_nat_init); |
321 | module_exit(ip6table_nat_exit); | 319 | module_exit(ip6table_nat_exit); |
322 | 320 | ||
323 | MODULE_LICENSE("GPL"); | 321 | MODULE_LICENSE("GPL"); |
324 | 322 |
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
1 | /* | 1 | /* |
2 | * Copyright (C)2004 USAGI/WIDE Project | 2 | * Copyright (C)2004 USAGI/WIDE Project |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * Author: | 8 | * Author: |
9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | 9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/ipv6.h> | 13 | #include <linux/ipv6.h> |
14 | #include <linux/in6.h> | 14 | #include <linux/in6.h> |
15 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
18 | #include <linux/icmp.h> | 18 | #include <linux/icmp.h> |
19 | #include <net/ipv6.h> | 19 | #include <net/ipv6.h> |
20 | #include <net/inet_frag.h> | 20 | #include <net/inet_frag.h> |
21 | 21 | ||
22 | #include <linux/netfilter_bridge.h> | 22 | #include <linux/netfilter_bridge.h> |
23 | #include <linux/netfilter_ipv6.h> | 23 | #include <linux/netfilter_ipv6.h> |
24 | #include <linux/netfilter_ipv6/ip6_tables.h> | ||
24 | #include <net/netfilter/nf_conntrack.h> | 25 | #include <net/netfilter/nf_conntrack.h> |
25 | #include <net/netfilter/nf_conntrack_helper.h> | 26 | #include <net/netfilter/nf_conntrack_helper.h> |
26 | #include <net/netfilter/nf_conntrack_l4proto.h> | 27 | #include <net/netfilter/nf_conntrack_l4proto.h> |
27 | #include <net/netfilter/nf_conntrack_l3proto.h> | 28 | #include <net/netfilter/nf_conntrack_l3proto.h> |
28 | #include <net/netfilter/nf_conntrack_core.h> | 29 | #include <net/netfilter/nf_conntrack_core.h> |
29 | #include <net/netfilter/nf_conntrack_zones.h> | 30 | #include <net/netfilter/nf_conntrack_zones.h> |
30 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> | 31 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> |
31 | #include <net/netfilter/nf_nat_helper.h> | 32 | #include <net/netfilter/nf_nat_helper.h> |
32 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | 33 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> |
33 | #include <net/netfilter/nf_log.h> | 34 | #include <net/netfilter/nf_log.h> |
34 | 35 | ||
35 | static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | 36 | static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, |
36 | struct nf_conntrack_tuple *tuple) | 37 | struct nf_conntrack_tuple *tuple) |
37 | { | 38 | { |
38 | const u_int32_t *ap; | 39 | const u_int32_t *ap; |
39 | u_int32_t _addrs[8]; | 40 | u_int32_t _addrs[8]; |
40 | 41 | ||
41 | ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), | 42 | ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), |
42 | sizeof(_addrs), _addrs); | 43 | sizeof(_addrs), _addrs); |
43 | if (ap == NULL) | 44 | if (ap == NULL) |
44 | return false; | 45 | return false; |
45 | 46 | ||
46 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); | 47 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); |
47 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); | 48 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); |
48 | 49 | ||
49 | return true; | 50 | return true; |
50 | } | 51 | } |
51 | 52 | ||
52 | static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, | 53 | static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, |
53 | const struct nf_conntrack_tuple *orig) | 54 | const struct nf_conntrack_tuple *orig) |
54 | { | 55 | { |
55 | memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); | 56 | memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); |
56 | memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); | 57 | memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); |
57 | 58 | ||
58 | return true; | 59 | return true; |
59 | } | 60 | } |
60 | 61 | ||
61 | static int ipv6_print_tuple(struct seq_file *s, | 62 | static int ipv6_print_tuple(struct seq_file *s, |
62 | const struct nf_conntrack_tuple *tuple) | 63 | const struct nf_conntrack_tuple *tuple) |
63 | { | 64 | { |
64 | return seq_printf(s, "src=%pI6 dst=%pI6 ", | 65 | return seq_printf(s, "src=%pI6 dst=%pI6 ", |
65 | tuple->src.u3.ip6, tuple->dst.u3.ip6); | 66 | tuple->src.u3.ip6, tuple->dst.u3.ip6); |
66 | } | 67 | } |
67 | 68 | ||
68 | static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | 69 | static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, |
69 | unsigned int *dataoff, u_int8_t *protonum) | 70 | unsigned int *dataoff, u_int8_t *protonum) |
70 | { | 71 | { |
71 | unsigned int extoff = nhoff + sizeof(struct ipv6hdr); | 72 | unsigned int extoff = nhoff + sizeof(struct ipv6hdr); |
72 | __be16 frag_off; | 73 | __be16 frag_off; |
73 | int protoff; | 74 | int protoff; |
74 | u8 nexthdr; | 75 | u8 nexthdr; |
75 | 76 | ||
76 | if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), | 77 | if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), |
77 | &nexthdr, sizeof(nexthdr)) != 0) { | 78 | &nexthdr, sizeof(nexthdr)) != 0) { |
78 | pr_debug("ip6_conntrack_core: can't get nexthdr\n"); | 79 | pr_debug("ip6_conntrack_core: can't get nexthdr\n"); |
79 | return -NF_ACCEPT; | 80 | return -NF_ACCEPT; |
80 | } | 81 | } |
81 | protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); | 82 | protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); |
82 | /* | 83 | /* |
83 | * (protoff == skb->len) mean that the packet doesn't have no data | 84 | * (protoff == skb->len) mean that the packet doesn't have no data |
84 | * except of IPv6 & ext headers. but it's tracked anyway. - YK | 85 | * except of IPv6 & ext headers. but it's tracked anyway. - YK |
85 | */ | 86 | */ |
86 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { | 87 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { |
87 | pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); | 88 | pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); |
88 | return -NF_ACCEPT; | 89 | return -NF_ACCEPT; |
89 | } | 90 | } |
90 | 91 | ||
91 | *dataoff = protoff; | 92 | *dataoff = protoff; |
92 | *protonum = nexthdr; | 93 | *protonum = nexthdr; |
93 | return NF_ACCEPT; | 94 | return NF_ACCEPT; |
94 | } | 95 | } |
95 | 96 | ||
96 | static unsigned int ipv6_helper(unsigned int hooknum, | 97 | static unsigned int ipv6_helper(unsigned int hooknum, |
97 | struct sk_buff *skb, | 98 | struct sk_buff *skb, |
98 | const struct net_device *in, | 99 | const struct net_device *in, |
99 | const struct net_device *out, | 100 | const struct net_device *out, |
100 | int (*okfn)(struct sk_buff *)) | 101 | int (*okfn)(struct sk_buff *)) |
101 | { | 102 | { |
102 | struct nf_conn *ct; | 103 | struct nf_conn *ct; |
103 | const struct nf_conn_help *help; | 104 | const struct nf_conn_help *help; |
104 | const struct nf_conntrack_helper *helper; | 105 | const struct nf_conntrack_helper *helper; |
105 | enum ip_conntrack_info ctinfo; | 106 | enum ip_conntrack_info ctinfo; |
106 | unsigned int ret; | 107 | unsigned int ret; |
107 | __be16 frag_off; | 108 | __be16 frag_off; |
108 | int protoff; | 109 | int protoff; |
109 | u8 nexthdr; | 110 | u8 nexthdr; |
110 | 111 | ||
111 | /* This is where we call the helper: as the packet goes out. */ | 112 | /* This is where we call the helper: as the packet goes out. */ |
112 | ct = nf_ct_get(skb, &ctinfo); | 113 | ct = nf_ct_get(skb, &ctinfo); |
113 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | 114 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) |
114 | return NF_ACCEPT; | 115 | return NF_ACCEPT; |
115 | 116 | ||
116 | help = nfct_help(ct); | 117 | help = nfct_help(ct); |
117 | if (!help) | 118 | if (!help) |
118 | return NF_ACCEPT; | 119 | return NF_ACCEPT; |
119 | /* rcu_read_lock()ed by nf_hook_slow */ | 120 | /* rcu_read_lock()ed by nf_hook_slow */ |
120 | helper = rcu_dereference(help->helper); | 121 | helper = rcu_dereference(help->helper); |
121 | if (!helper) | 122 | if (!helper) |
122 | return NF_ACCEPT; | 123 | return NF_ACCEPT; |
123 | 124 | ||
124 | nexthdr = ipv6_hdr(skb)->nexthdr; | 125 | nexthdr = ipv6_hdr(skb)->nexthdr; |
125 | protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, | 126 | protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, |
126 | &frag_off); | 127 | &frag_off); |
127 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { | 128 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { |
128 | pr_debug("proto header not found\n"); | 129 | pr_debug("proto header not found\n"); |
129 | return NF_ACCEPT; | 130 | return NF_ACCEPT; |
130 | } | 131 | } |
131 | 132 | ||
132 | ret = helper->help(skb, protoff, ct, ctinfo); | 133 | ret = helper->help(skb, protoff, ct, ctinfo); |
133 | if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { | 134 | if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { |
134 | nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, | 135 | nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, |
135 | "nf_ct_%s: dropping packet", helper->name); | 136 | "nf_ct_%s: dropping packet", helper->name); |
136 | } | 137 | } |
137 | return ret; | 138 | return ret; |
138 | } | 139 | } |
139 | 140 | ||
140 | static unsigned int ipv6_confirm(unsigned int hooknum, | 141 | static unsigned int ipv6_confirm(unsigned int hooknum, |
141 | struct sk_buff *skb, | 142 | struct sk_buff *skb, |
142 | const struct net_device *in, | 143 | const struct net_device *in, |
143 | const struct net_device *out, | 144 | const struct net_device *out, |
144 | int (*okfn)(struct sk_buff *)) | 145 | int (*okfn)(struct sk_buff *)) |
145 | { | 146 | { |
146 | struct nf_conn *ct; | 147 | struct nf_conn *ct; |
147 | enum ip_conntrack_info ctinfo; | 148 | enum ip_conntrack_info ctinfo; |
148 | unsigned char pnum = ipv6_hdr(skb)->nexthdr; | 149 | unsigned char pnum = ipv6_hdr(skb)->nexthdr; |
149 | int protoff; | 150 | int protoff; |
150 | __be16 frag_off; | 151 | __be16 frag_off; |
151 | 152 | ||
152 | ct = nf_ct_get(skb, &ctinfo); | 153 | ct = nf_ct_get(skb, &ctinfo); |
153 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | 154 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) |
154 | goto out; | 155 | goto out; |
155 | 156 | ||
156 | protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, | 157 | protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, |
157 | &frag_off); | 158 | &frag_off); |
158 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { | 159 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { |
159 | pr_debug("proto header not found\n"); | 160 | pr_debug("proto header not found\n"); |
160 | goto out; | 161 | goto out; |
161 | } | 162 | } |
162 | 163 | ||
163 | /* adjust seqs for loopback traffic only in outgoing direction */ | 164 | /* adjust seqs for loopback traffic only in outgoing direction */ |
164 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && | 165 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && |
165 | !nf_is_loopback_packet(skb)) { | 166 | !nf_is_loopback_packet(skb)) { |
166 | typeof(nf_nat_seq_adjust_hook) seq_adjust; | 167 | typeof(nf_nat_seq_adjust_hook) seq_adjust; |
167 | 168 | ||
168 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); | 169 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); |
169 | if (!seq_adjust || | 170 | if (!seq_adjust || |
170 | !seq_adjust(skb, ct, ctinfo, protoff)) { | 171 | !seq_adjust(skb, ct, ctinfo, protoff)) { |
171 | NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); | 172 | NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); |
172 | return NF_DROP; | 173 | return NF_DROP; |
173 | } | 174 | } |
174 | } | 175 | } |
175 | out: | 176 | out: |
176 | /* We've seen it coming out the other side: confirm it */ | 177 | /* We've seen it coming out the other side: confirm it */ |
177 | return nf_conntrack_confirm(skb); | 178 | return nf_conntrack_confirm(skb); |
178 | } | 179 | } |
179 | 180 | ||
180 | static unsigned int __ipv6_conntrack_in(struct net *net, | 181 | static unsigned int __ipv6_conntrack_in(struct net *net, |
181 | unsigned int hooknum, | 182 | unsigned int hooknum, |
182 | struct sk_buff *skb, | 183 | struct sk_buff *skb, |
183 | const struct net_device *in, | 184 | const struct net_device *in, |
184 | const struct net_device *out, | 185 | const struct net_device *out, |
185 | int (*okfn)(struct sk_buff *)) | 186 | int (*okfn)(struct sk_buff *)) |
186 | { | 187 | { |
187 | struct sk_buff *reasm = skb->nfct_reasm; | 188 | struct sk_buff *reasm = skb->nfct_reasm; |
188 | const struct nf_conn_help *help; | 189 | const struct nf_conn_help *help; |
189 | struct nf_conn *ct; | 190 | struct nf_conn *ct; |
190 | enum ip_conntrack_info ctinfo; | 191 | enum ip_conntrack_info ctinfo; |
191 | 192 | ||
192 | /* This packet is fragmented and has reassembled packet. */ | 193 | /* This packet is fragmented and has reassembled packet. */ |
193 | if (reasm) { | 194 | if (reasm) { |
194 | /* Reassembled packet isn't parsed yet ? */ | 195 | /* Reassembled packet isn't parsed yet ? */ |
195 | if (!reasm->nfct) { | 196 | if (!reasm->nfct) { |
196 | unsigned int ret; | 197 | unsigned int ret; |
197 | 198 | ||
198 | ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); | 199 | ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); |
199 | if (ret != NF_ACCEPT) | 200 | if (ret != NF_ACCEPT) |
200 | return ret; | 201 | return ret; |
201 | } | 202 | } |
202 | 203 | ||
203 | /* Conntrack helpers need the entire reassembled packet in the | 204 | /* Conntrack helpers need the entire reassembled packet in the |
204 | * POST_ROUTING hook. In case of unconfirmed connections NAT | 205 | * POST_ROUTING hook. In case of unconfirmed connections NAT |
205 | * might reassign a helper, so the entire packet is also | 206 | * might reassign a helper, so the entire packet is also |
206 | * required. | 207 | * required. |
207 | */ | 208 | */ |
208 | ct = nf_ct_get(reasm, &ctinfo); | 209 | ct = nf_ct_get(reasm, &ctinfo); |
209 | if (ct != NULL && !nf_ct_is_untracked(ct)) { | 210 | if (ct != NULL && !nf_ct_is_untracked(ct)) { |
210 | help = nfct_help(ct); | 211 | help = nfct_help(ct); |
211 | if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { | 212 | if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { |
212 | nf_conntrack_get_reasm(skb); | 213 | nf_conntrack_get_reasm(skb); |
213 | NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, | 214 | NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, |
214 | (struct net_device *)in, | 215 | (struct net_device *)in, |
215 | (struct net_device *)out, | 216 | (struct net_device *)out, |
216 | okfn, NF_IP6_PRI_CONNTRACK + 1); | 217 | okfn, NF_IP6_PRI_CONNTRACK + 1); |
217 | return NF_DROP_ERR(-ECANCELED); | 218 | return NF_DROP_ERR(-ECANCELED); |
218 | } | 219 | } |
219 | } | 220 | } |
220 | 221 | ||
221 | nf_conntrack_get(reasm->nfct); | 222 | nf_conntrack_get(reasm->nfct); |
222 | skb->nfct = reasm->nfct; | 223 | skb->nfct = reasm->nfct; |
223 | skb->nfctinfo = reasm->nfctinfo; | 224 | skb->nfctinfo = reasm->nfctinfo; |
224 | return NF_ACCEPT; | 225 | return NF_ACCEPT; |
225 | } | 226 | } |
226 | 227 | ||
227 | return nf_conntrack_in(net, PF_INET6, hooknum, skb); | 228 | return nf_conntrack_in(net, PF_INET6, hooknum, skb); |
228 | } | 229 | } |
229 | 230 | ||
230 | static unsigned int ipv6_conntrack_in(unsigned int hooknum, | 231 | static unsigned int ipv6_conntrack_in(unsigned int hooknum, |
231 | struct sk_buff *skb, | 232 | struct sk_buff *skb, |
232 | const struct net_device *in, | 233 | const struct net_device *in, |
233 | const struct net_device *out, | 234 | const struct net_device *out, |
234 | int (*okfn)(struct sk_buff *)) | 235 | int (*okfn)(struct sk_buff *)) |
235 | { | 236 | { |
236 | return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); | 237 | return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); |
237 | } | 238 | } |
238 | 239 | ||
239 | static unsigned int ipv6_conntrack_local(unsigned int hooknum, | 240 | static unsigned int ipv6_conntrack_local(unsigned int hooknum, |
240 | struct sk_buff *skb, | 241 | struct sk_buff *skb, |
241 | const struct net_device *in, | 242 | const struct net_device *in, |
242 | const struct net_device *out, | 243 | const struct net_device *out, |
243 | int (*okfn)(struct sk_buff *)) | 244 | int (*okfn)(struct sk_buff *)) |
244 | { | 245 | { |
245 | /* root is playing with raw sockets. */ | 246 | /* root is playing with raw sockets. */ |
246 | if (skb->len < sizeof(struct ipv6hdr)) { | 247 | if (skb->len < sizeof(struct ipv6hdr)) { |
247 | net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); | 248 | net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); |
248 | return NF_ACCEPT; | 249 | return NF_ACCEPT; |
249 | } | 250 | } |
250 | return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); | 251 | return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); |
251 | } | 252 | } |
252 | 253 | ||
253 | static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { | 254 | static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { |
254 | { | 255 | { |
255 | .hook = ipv6_conntrack_in, | 256 | .hook = ipv6_conntrack_in, |
256 | .owner = THIS_MODULE, | 257 | .owner = THIS_MODULE, |
257 | .pf = NFPROTO_IPV6, | 258 | .pf = NFPROTO_IPV6, |
258 | .hooknum = NF_INET_PRE_ROUTING, | 259 | .hooknum = NF_INET_PRE_ROUTING, |
259 | .priority = NF_IP6_PRI_CONNTRACK, | 260 | .priority = NF_IP6_PRI_CONNTRACK, |
260 | }, | 261 | }, |
261 | { | 262 | { |
262 | .hook = ipv6_conntrack_local, | 263 | .hook = ipv6_conntrack_local, |
263 | .owner = THIS_MODULE, | 264 | .owner = THIS_MODULE, |
264 | .pf = NFPROTO_IPV6, | 265 | .pf = NFPROTO_IPV6, |
265 | .hooknum = NF_INET_LOCAL_OUT, | 266 | .hooknum = NF_INET_LOCAL_OUT, |
266 | .priority = NF_IP6_PRI_CONNTRACK, | 267 | .priority = NF_IP6_PRI_CONNTRACK, |
267 | }, | 268 | }, |
268 | { | 269 | { |
269 | .hook = ipv6_helper, | 270 | .hook = ipv6_helper, |
270 | .owner = THIS_MODULE, | 271 | .owner = THIS_MODULE, |
271 | .pf = NFPROTO_IPV6, | 272 | .pf = NFPROTO_IPV6, |
272 | .hooknum = NF_INET_POST_ROUTING, | 273 | .hooknum = NF_INET_POST_ROUTING, |
273 | .priority = NF_IP6_PRI_CONNTRACK_HELPER, | 274 | .priority = NF_IP6_PRI_CONNTRACK_HELPER, |
274 | }, | 275 | }, |
275 | { | 276 | { |
276 | .hook = ipv6_confirm, | 277 | .hook = ipv6_confirm, |
277 | .owner = THIS_MODULE, | 278 | .owner = THIS_MODULE, |
278 | .pf = NFPROTO_IPV6, | 279 | .pf = NFPROTO_IPV6, |
279 | .hooknum = NF_INET_POST_ROUTING, | 280 | .hooknum = NF_INET_POST_ROUTING, |
280 | .priority = NF_IP6_PRI_LAST, | 281 | .priority = NF_IP6_PRI_LAST, |
281 | }, | 282 | }, |
282 | { | 283 | { |
283 | .hook = ipv6_helper, | 284 | .hook = ipv6_helper, |
284 | .owner = THIS_MODULE, | 285 | .owner = THIS_MODULE, |
285 | .pf = NFPROTO_IPV6, | 286 | .pf = NFPROTO_IPV6, |
286 | .hooknum = NF_INET_LOCAL_IN, | 287 | .hooknum = NF_INET_LOCAL_IN, |
287 | .priority = NF_IP6_PRI_CONNTRACK_HELPER, | 288 | .priority = NF_IP6_PRI_CONNTRACK_HELPER, |
288 | }, | 289 | }, |
289 | { | 290 | { |
290 | .hook = ipv6_confirm, | 291 | .hook = ipv6_confirm, |
291 | .owner = THIS_MODULE, | 292 | .owner = THIS_MODULE, |
292 | .pf = NFPROTO_IPV6, | 293 | .pf = NFPROTO_IPV6, |
293 | .hooknum = NF_INET_LOCAL_IN, | 294 | .hooknum = NF_INET_LOCAL_IN, |
294 | .priority = NF_IP6_PRI_LAST-1, | 295 | .priority = NF_IP6_PRI_LAST-1, |
295 | }, | 296 | }, |
296 | }; | 297 | }; |
297 | 298 | ||
299 | static int | ||
300 | ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) | ||
301 | { | ||
302 | const struct inet_sock *inet = inet_sk(sk); | ||
303 | const struct ipv6_pinfo *inet6 = inet6_sk(sk); | ||
304 | const struct nf_conntrack_tuple_hash *h; | ||
305 | struct sockaddr_in6 sin6; | ||
306 | struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; | ||
307 | struct nf_conn *ct; | ||
308 | |||
309 | tuple.src.u3.in6 = inet6->rcv_saddr; | ||
310 | tuple.src.u.tcp.port = inet->inet_sport; | ||
311 | tuple.dst.u3.in6 = inet6->daddr; | ||
312 | tuple.dst.u.tcp.port = inet->inet_dport; | ||
313 | tuple.dst.protonum = sk->sk_protocol; | ||
314 | |||
315 | if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) | ||
316 | return -ENOPROTOOPT; | ||
317 | |||
318 | if (*len < 0 || (unsigned int) *len < sizeof(sin6)) | ||
319 | return -EINVAL; | ||
320 | |||
321 | h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); | ||
322 | if (!h) { | ||
323 | pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", | ||
324 | &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), | ||
325 | &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); | ||
326 | return -ENOENT; | ||
327 | } | ||
328 | |||
329 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
330 | |||
331 | sin6.sin6_family = AF_INET6; | ||
332 | sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; | ||
333 | sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; | ||
334 | memcpy(&sin6.sin6_addr, | ||
335 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, | ||
336 | sizeof(sin6.sin6_addr)); | ||
337 | |||
338 | nf_ct_put(ct); | ||
339 | |||
340 | if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) | ||
341 | sin6.sin6_scope_id = sk->sk_bound_dev_if; | ||
342 | else | ||
343 | sin6.sin6_scope_id = 0; | ||
344 | |||
345 | return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; | ||
346 | } | ||
347 | |||
298 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 348 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
299 | 349 | ||
300 | #include <linux/netfilter/nfnetlink.h> | 350 | #include <linux/netfilter/nfnetlink.h> |
301 | #include <linux/netfilter/nfnetlink_conntrack.h> | 351 | #include <linux/netfilter/nfnetlink_conntrack.h> |
302 | 352 | ||
303 | static int ipv6_tuple_to_nlattr(struct sk_buff *skb, | 353 | static int ipv6_tuple_to_nlattr(struct sk_buff *skb, |
304 | const struct nf_conntrack_tuple *tuple) | 354 | const struct nf_conntrack_tuple *tuple) |
305 | { | 355 | { |
306 | if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, | 356 | if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, |
307 | &tuple->src.u3.ip6) || | 357 | &tuple->src.u3.ip6) || |
308 | nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, | 358 | nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, |
309 | &tuple->dst.u3.ip6)) | 359 | &tuple->dst.u3.ip6)) |
310 | goto nla_put_failure; | 360 | goto nla_put_failure; |
311 | return 0; | 361 | return 0; |
312 | 362 | ||
313 | nla_put_failure: | 363 | nla_put_failure: |
314 | return -1; | 364 | return -1; |
315 | } | 365 | } |
316 | 366 | ||
317 | static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { | 367 | static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { |
318 | [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, | 368 | [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, |
319 | [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, | 369 | [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, |
320 | }; | 370 | }; |
321 | 371 | ||
322 | static int ipv6_nlattr_to_tuple(struct nlattr *tb[], | 372 | static int ipv6_nlattr_to_tuple(struct nlattr *tb[], |
323 | struct nf_conntrack_tuple *t) | 373 | struct nf_conntrack_tuple *t) |
324 | { | 374 | { |
325 | if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) | 375 | if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) |
326 | return -EINVAL; | 376 | return -EINVAL; |
327 | 377 | ||
328 | memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), | 378 | memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), |
329 | sizeof(u_int32_t) * 4); | 379 | sizeof(u_int32_t) * 4); |
330 | memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), | 380 | memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), |
331 | sizeof(u_int32_t) * 4); | 381 | sizeof(u_int32_t) * 4); |
332 | 382 | ||
333 | return 0; | 383 | return 0; |
334 | } | 384 | } |
335 | 385 | ||
336 | static int ipv6_nlattr_tuple_size(void) | 386 | static int ipv6_nlattr_tuple_size(void) |
337 | { | 387 | { |
338 | return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); | 388 | return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); |
339 | } | 389 | } |
340 | #endif | 390 | #endif |
341 | 391 | ||
342 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { | 392 | struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { |
343 | .l3proto = PF_INET6, | 393 | .l3proto = PF_INET6, |
344 | .name = "ipv6", | 394 | .name = "ipv6", |
345 | .pkt_to_tuple = ipv6_pkt_to_tuple, | 395 | .pkt_to_tuple = ipv6_pkt_to_tuple, |
346 | .invert_tuple = ipv6_invert_tuple, | 396 | .invert_tuple = ipv6_invert_tuple, |
347 | .print_tuple = ipv6_print_tuple, | 397 | .print_tuple = ipv6_print_tuple, |
348 | .get_l4proto = ipv6_get_l4proto, | 398 | .get_l4proto = ipv6_get_l4proto, |
349 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 399 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
350 | .tuple_to_nlattr = ipv6_tuple_to_nlattr, | 400 | .tuple_to_nlattr = ipv6_tuple_to_nlattr, |
351 | .nlattr_tuple_size = ipv6_nlattr_tuple_size, | 401 | .nlattr_tuple_size = ipv6_nlattr_tuple_size, |
352 | .nlattr_to_tuple = ipv6_nlattr_to_tuple, | 402 | .nlattr_to_tuple = ipv6_nlattr_to_tuple, |
353 | .nla_policy = ipv6_nla_policy, | 403 | .nla_policy = ipv6_nla_policy, |
354 | #endif | 404 | #endif |
355 | .me = THIS_MODULE, | 405 | .me = THIS_MODULE, |
356 | }; | 406 | }; |
357 | 407 | ||
358 | MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); | 408 | MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); |
359 | MODULE_LICENSE("GPL"); | 409 | MODULE_LICENSE("GPL"); |
360 | MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); | 410 | MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); |
361 | 411 | ||
412 | static struct nf_sockopt_ops so_getorigdst6 = { | ||
413 | .pf = NFPROTO_IPV6, | ||
414 | .get_optmin = IP6T_SO_ORIGINAL_DST, | ||
415 | .get_optmax = IP6T_SO_ORIGINAL_DST + 1, | ||
416 | .get = ipv6_getorigdst, | ||
417 | .owner = THIS_MODULE, | ||
418 | }; | ||
419 | |||
362 | static int ipv6_net_init(struct net *net) | 420 | static int ipv6_net_init(struct net *net) |
363 | { | 421 | { |
364 | int ret = 0; | 422 | int ret = 0; |
365 | 423 | ||
366 | ret = nf_conntrack_l4proto_register(net, | 424 | ret = nf_conntrack_l4proto_register(net, |
367 | &nf_conntrack_l4proto_tcp6); | 425 | &nf_conntrack_l4proto_tcp6); |
368 | if (ret < 0) { | 426 | if (ret < 0) { |
369 | printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); | 427 | printk(KERN_ERR "nf_conntrack_l4proto_tcp6: protocol register failed\n"); |
370 | goto out; | 428 | goto out; |
371 | } | 429 | } |
372 | ret = nf_conntrack_l4proto_register(net, | 430 | ret = nf_conntrack_l4proto_register(net, |
373 | &nf_conntrack_l4proto_udp6); | 431 | &nf_conntrack_l4proto_udp6); |
374 | if (ret < 0) { | 432 | if (ret < 0) { |
375 | printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); | 433 | printk(KERN_ERR "nf_conntrack_l4proto_udp6: protocol register failed\n"); |
376 | goto cleanup_tcp6; | 434 | goto cleanup_tcp6; |
377 | } | 435 | } |
378 | ret = nf_conntrack_l4proto_register(net, | 436 | ret = nf_conntrack_l4proto_register(net, |
379 | &nf_conntrack_l4proto_icmpv6); | 437 | &nf_conntrack_l4proto_icmpv6); |
380 | if (ret < 0) { | 438 | if (ret < 0) { |
381 | printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); | 439 | printk(KERN_ERR "nf_conntrack_l4proto_icmp6: protocol register failed\n"); |
382 | goto cleanup_udp6; | 440 | goto cleanup_udp6; |
383 | } | 441 | } |
384 | ret = nf_conntrack_l3proto_register(net, | 442 | ret = nf_conntrack_l3proto_register(net, |
385 | &nf_conntrack_l3proto_ipv6); | 443 | &nf_conntrack_l3proto_ipv6); |
386 | if (ret < 0) { | 444 | if (ret < 0) { |
387 | printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); | 445 | printk(KERN_ERR "nf_conntrack_l3proto_ipv6: protocol register failed\n"); |
388 | goto cleanup_icmpv6; | 446 | goto cleanup_icmpv6; |
389 | } | 447 | } |
390 | return 0; | 448 | return 0; |
391 | cleanup_icmpv6: | 449 | cleanup_icmpv6: |
392 | nf_conntrack_l4proto_unregister(net, | 450 | nf_conntrack_l4proto_unregister(net, |
393 | &nf_conntrack_l4proto_icmpv6); | 451 | &nf_conntrack_l4proto_icmpv6); |
394 | cleanup_udp6: | 452 | cleanup_udp6: |
395 | nf_conntrack_l4proto_unregister(net, | 453 | nf_conntrack_l4proto_unregister(net, |
396 | &nf_conntrack_l4proto_udp6); | 454 | &nf_conntrack_l4proto_udp6); |
397 | cleanup_tcp6: | 455 | cleanup_tcp6: |
398 | nf_conntrack_l4proto_unregister(net, | 456 | nf_conntrack_l4proto_unregister(net, |
399 | &nf_conntrack_l4proto_tcp6); | 457 | &nf_conntrack_l4proto_tcp6); |
400 | out: | 458 | out: |
401 | return ret; | 459 | return ret; |
402 | } | 460 | } |
403 | 461 | ||
404 | static void ipv6_net_exit(struct net *net) | 462 | static void ipv6_net_exit(struct net *net) |
405 | { | 463 | { |
406 | nf_conntrack_l3proto_unregister(net, | 464 | nf_conntrack_l3proto_unregister(net, |
407 | &nf_conntrack_l3proto_ipv6); | 465 | &nf_conntrack_l3proto_ipv6); |
408 | nf_conntrack_l4proto_unregister(net, | 466 | nf_conntrack_l4proto_unregister(net, |
409 | &nf_conntrack_l4proto_icmpv6); | 467 | &nf_conntrack_l4proto_icmpv6); |
410 | nf_conntrack_l4proto_unregister(net, | 468 | nf_conntrack_l4proto_unregister(net, |
411 | &nf_conntrack_l4proto_udp6); | 469 | &nf_conntrack_l4proto_udp6); |
412 | nf_conntrack_l4proto_unregister(net, | 470 | nf_conntrack_l4proto_unregister(net, |
413 | &nf_conntrack_l4proto_tcp6); | 471 | &nf_conntrack_l4proto_tcp6); |
414 | } | 472 | } |
415 | 473 | ||
416 | static struct pernet_operations ipv6_net_ops = { | 474 | static struct pernet_operations ipv6_net_ops = { |
417 | .init = ipv6_net_init, | 475 | .init = ipv6_net_init, |
418 | .exit = ipv6_net_exit, | 476 | .exit = ipv6_net_exit, |
419 | }; | 477 | }; |
420 | 478 | ||
421 | static int __init nf_conntrack_l3proto_ipv6_init(void) | 479 | static int __init nf_conntrack_l3proto_ipv6_init(void) |
422 | { | 480 | { |
423 | int ret = 0; | 481 | int ret = 0; |
424 | 482 | ||
425 | need_conntrack(); | 483 | need_conntrack(); |
426 | nf_defrag_ipv6_enable(); | 484 | nf_defrag_ipv6_enable(); |
427 | 485 | ||
486 | ret = nf_register_sockopt(&so_getorigdst6); | ||
487 | if (ret < 0) { | ||
488 | pr_err("Unable to register netfilter socket option\n"); | ||
489 | return ret; | ||
490 | } | ||
491 | |||
428 | ret = register_pernet_subsys(&ipv6_net_ops); | 492 | ret = register_pernet_subsys(&ipv6_net_ops); |
429 | if (ret < 0) | 493 | if (ret < 0) |
430 | goto cleanup_pernet; | 494 | goto cleanup_pernet; |
431 | ret = nf_register_hooks(ipv6_conntrack_ops, | 495 | ret = nf_register_hooks(ipv6_conntrack_ops, |
432 | ARRAY_SIZE(ipv6_conntrack_ops)); | 496 | ARRAY_SIZE(ipv6_conntrack_ops)); |
433 | if (ret < 0) { | 497 | if (ret < 0) { |
434 | pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " | 498 | pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " |
435 | "hook.\n"); | 499 | "hook.\n"); |
436 | goto cleanup_ipv6; | 500 | goto cleanup_ipv6; |
437 | } | 501 | } |
438 | return ret; | 502 | return ret; |
439 | 503 | ||
440 | cleanup_ipv6: | 504 | cleanup_ipv6: |
441 | unregister_pernet_subsys(&ipv6_net_ops); | 505 | unregister_pernet_subsys(&ipv6_net_ops); |
442 | cleanup_pernet: | 506 | cleanup_pernet: |
507 | nf_unregister_sockopt(&so_getorigdst6); | ||
443 | return ret; | 508 | return ret; |
444 | } | 509 | } |
445 | 510 | ||
446 | static void __exit nf_conntrack_l3proto_ipv6_fini(void) | 511 | static void __exit nf_conntrack_l3proto_ipv6_fini(void) |
447 | { | 512 | { |
448 | synchronize_net(); | 513 | synchronize_net(); |
449 | nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); | 514 | nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); |
450 | unregister_pernet_subsys(&ipv6_net_ops); | 515 | unregister_pernet_subsys(&ipv6_net_ops); |
516 | nf_unregister_sockopt(&so_getorigdst6); | ||
451 | } | 517 | } |
452 | 518 | ||
453 | module_init(nf_conntrack_l3proto_ipv6_init); | 519 | module_init(nf_conntrack_l3proto_ipv6_init); |
454 | module_exit(nf_conntrack_l3proto_ipv6_fini); | 520 | module_exit(nf_conntrack_l3proto_ipv6_fini); |
455 | 521 |
net/netfilter/ipvs/ip_vs_nfct.c
1 | /* | 1 | /* |
2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | 2 | * ip_vs_nfct.c: Netfilter connection tracking support for IPVS |
3 | * | 3 | * |
4 | * Portions Copyright (C) 2001-2002 | 4 | * Portions Copyright (C) 2001-2002 |
5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | 5 | * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. |
6 | * | 6 | * |
7 | * Portions Copyright (C) 2003-2010 | 7 | * Portions Copyright (C) 2003-2010 |
8 | * Julian Anastasov | 8 | * Julian Anastasov |
9 | * | 9 | * |
10 | * | 10 | * |
11 | * This code is free software; you can redistribute it and/or modify | 11 | * This code is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 12 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or | 13 | * the Free Software Foundation; either version 2 of the License, or |
14 | * (at your option) any later version. | 14 | * (at your option) any later version. |
15 | * | 15 | * |
16 | * This program is distributed in the hope that it will be useful, | 16 | * This program is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 | * GNU General Public License for more details. | 19 | * GNU General Public License for more details. |
20 | * | 20 | * |
21 | * You should have received a copy of the GNU General Public License | 21 | * You should have received a copy of the GNU General Public License |
22 | * along with this program; if not, write to the Free Software | 22 | * along with this program; if not, write to the Free Software |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 | * | 24 | * |
25 | * | 25 | * |
26 | * Authors: | 26 | * Authors: |
27 | * Ben North <ben@redfrontdoor.org> | 27 | * Ben North <ben@redfrontdoor.org> |
28 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | 28 | * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels |
29 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match | 29 | * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match |
30 | * | 30 | * |
31 | * | 31 | * |
32 | * Current status: | 32 | * Current status: |
33 | * | 33 | * |
34 | * - provide conntrack confirmation for new and related connections, by | 34 | * - provide conntrack confirmation for new and related connections, by |
35 | * this way we can see their proper conntrack state in all hooks | 35 | * this way we can see their proper conntrack state in all hooks |
36 | * - support for all forwarding methods, not only NAT | 36 | * - support for all forwarding methods, not only NAT |
37 | * - FTP support (NAT), ability to support other NAT apps with expectations | 37 | * - FTP support (NAT), ability to support other NAT apps with expectations |
38 | * - to correctly create expectations for related NAT connections the proper | 38 | * - to correctly create expectations for related NAT connections the proper |
39 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires | 39 | * NF conntrack support must be already installed, eg. ip_vs_ftp requires |
40 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables | 40 | * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables |
41 | * NAT rules are needed) | 41 | * NAT rules are needed) |
42 | * - alter reply for NAT when forwarding packet in original direction: | 42 | * - alter reply for NAT when forwarding packet in original direction: |
43 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or | 43 | * conntrack from client in NEW or RELATED (Passive FTP DATA) state or |
44 | * when RELATED conntrack is created from real server (Active FTP DATA) | 44 | * when RELATED conntrack is created from real server (Active FTP DATA) |
45 | * - if iptables_nat is not loaded the Passive FTP will not work (the | 45 | * - if iptables_nat is not loaded the Passive FTP will not work (the |
46 | * PASV response can not be NAT-ed) but Active FTP should work | 46 | * PASV response can not be NAT-ed) but Active FTP should work |
47 | * | 47 | * |
48 | */ | 48 | */ |
49 | 49 | ||
50 | #define KMSG_COMPONENT "IPVS" | 50 | #define KMSG_COMPONENT "IPVS" |
51 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 51 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
52 | 52 | ||
53 | #include <linux/module.h> | 53 | #include <linux/module.h> |
54 | #include <linux/types.h> | 54 | #include <linux/types.h> |
55 | #include <linux/kernel.h> | 55 | #include <linux/kernel.h> |
56 | #include <linux/errno.h> | 56 | #include <linux/errno.h> |
57 | #include <linux/compiler.h> | 57 | #include <linux/compiler.h> |
58 | #include <linux/vmalloc.h> | 58 | #include <linux/vmalloc.h> |
59 | #include <linux/skbuff.h> | 59 | #include <linux/skbuff.h> |
60 | #include <net/ip.h> | 60 | #include <net/ip.h> |
61 | #include <linux/netfilter.h> | 61 | #include <linux/netfilter.h> |
62 | #include <linux/netfilter_ipv4.h> | 62 | #include <linux/netfilter_ipv4.h> |
63 | #include <net/ip_vs.h> | 63 | #include <net/ip_vs.h> |
64 | #include <net/netfilter/nf_conntrack_core.h> | 64 | #include <net/netfilter/nf_conntrack_core.h> |
65 | #include <net/netfilter/nf_conntrack_expect.h> | 65 | #include <net/netfilter/nf_conntrack_expect.h> |
66 | #include <net/netfilter/nf_conntrack_helper.h> | 66 | #include <net/netfilter/nf_conntrack_helper.h> |
67 | #include <net/netfilter/nf_conntrack_zones.h> | 67 | #include <net/netfilter/nf_conntrack_zones.h> |
68 | 68 | ||
69 | 69 | ||
70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" | 70 | #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" |
71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ | 71 | #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ |
72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ | 72 | &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ |
73 | (T)->dst.protonum | 73 | (T)->dst.protonum |
74 | 74 | ||
75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" | 75 | #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" |
76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ | 76 | #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ |
77 | &((C)->vaddr.ip), ntohs((C)->vport), \ | 77 | &((C)->vaddr.ip), ntohs((C)->vport), \ |
78 | &((C)->daddr.ip), ntohs((C)->dport), \ | 78 | &((C)->daddr.ip), ntohs((C)->dport), \ |
79 | (C)->protocol, (C)->state | 79 | (C)->protocol, (C)->state |
80 | 80 | ||
81 | void | 81 | void |
82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) | 82 | ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) |
83 | { | 83 | { |
84 | enum ip_conntrack_info ctinfo; | 84 | enum ip_conntrack_info ctinfo; |
85 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | 85 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
86 | struct nf_conntrack_tuple new_tuple; | 86 | struct nf_conntrack_tuple new_tuple; |
87 | 87 | ||
88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || | 88 | if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || |
89 | nf_ct_is_dying(ct)) | 89 | nf_ct_is_dying(ct)) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | /* Never alter conntrack for non-NAT conns */ | 92 | /* Never alter conntrack for non-NAT conns */ |
93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | 93 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) |
94 | return; | 94 | return; |
95 | 95 | ||
96 | /* Alter reply only in original direction */ | 96 | /* Alter reply only in original direction */ |
97 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | 97 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) |
98 | return; | 98 | return; |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * The connection is not yet in the hashtable, so we update it. | 101 | * The connection is not yet in the hashtable, so we update it. |
102 | * CIP->VIP will remain the same, so leave the tuple in | 102 | * CIP->VIP will remain the same, so leave the tuple in |
103 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the | 103 | * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the |
104 | * real-server we will see RIP->DIP. | 104 | * real-server we will see RIP->DIP. |
105 | */ | 105 | */ |
106 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | 106 | new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
107 | /* | 107 | /* |
108 | * This will also take care of UDP and other protocols. | 108 | * This will also take care of UDP and other protocols. |
109 | */ | 109 | */ |
110 | if (outin) { | 110 | if (outin) { |
111 | new_tuple.src.u3 = cp->daddr; | 111 | new_tuple.src.u3 = cp->daddr; |
112 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | 112 | if (new_tuple.dst.protonum != IPPROTO_ICMP && |
113 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | 113 | new_tuple.dst.protonum != IPPROTO_ICMPV6) |
114 | new_tuple.src.u.tcp.port = cp->dport; | 114 | new_tuple.src.u.tcp.port = cp->dport; |
115 | } else { | 115 | } else { |
116 | new_tuple.dst.u3 = cp->vaddr; | 116 | new_tuple.dst.u3 = cp->vaddr; |
117 | if (new_tuple.dst.protonum != IPPROTO_ICMP && | 117 | if (new_tuple.dst.protonum != IPPROTO_ICMP && |
118 | new_tuple.dst.protonum != IPPROTO_ICMPV6) | 118 | new_tuple.dst.protonum != IPPROTO_ICMPV6) |
119 | new_tuple.dst.u.tcp.port = cp->vport; | 119 | new_tuple.dst.u.tcp.port = cp->vport; |
120 | } | 120 | } |
121 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " | 121 | IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " |
122 | "ctinfo=%d, old reply=" FMT_TUPLE | 122 | "ctinfo=%d, old reply=" FMT_TUPLE |
123 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", | 123 | ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", |
124 | __func__, ct, ct->status, ctinfo, | 124 | __func__, ct, ct->status, ctinfo, |
125 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), | 125 | ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), |
126 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); | 126 | ARG_TUPLE(&new_tuple), ARG_CONN(cp)); |
127 | nf_conntrack_alter_reply(ct, &new_tuple); | 127 | nf_conntrack_alter_reply(ct, &new_tuple); |
128 | } | 128 | } |
129 | 129 | ||
130 | int ip_vs_confirm_conntrack(struct sk_buff *skb) | 130 | int ip_vs_confirm_conntrack(struct sk_buff *skb) |
131 | { | 131 | { |
132 | return nf_conntrack_confirm(skb); | 132 | return nf_conntrack_confirm(skb); |
133 | } | 133 | } |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Called from init_conntrack() as expectfn handler. | 136 | * Called from init_conntrack() as expectfn handler. |
137 | */ | 137 | */ |
138 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | 138 | static void ip_vs_nfct_expect_callback(struct nf_conn *ct, |
139 | struct nf_conntrack_expect *exp) | 139 | struct nf_conntrack_expect *exp) |
140 | { | 140 | { |
141 | struct nf_conntrack_tuple *orig, new_reply; | 141 | struct nf_conntrack_tuple *orig, new_reply; |
142 | struct ip_vs_conn *cp; | 142 | struct ip_vs_conn *cp; |
143 | struct ip_vs_conn_param p; | 143 | struct ip_vs_conn_param p; |
144 | struct net *net = nf_ct_net(ct); | 144 | struct net *net = nf_ct_net(ct); |
145 | 145 | ||
146 | if (exp->tuple.src.l3num != PF_INET) | 146 | if (exp->tuple.src.l3num != PF_INET) |
147 | return; | 147 | return; |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * We assume that no NF locks are held before this callback. | 150 | * We assume that no NF locks are held before this callback. |
151 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their | 151 | * ip_vs_conn_out_get and ip_vs_conn_in_get should match their |
152 | * expectations even if they use wildcard values, now we provide the | 152 | * expectations even if they use wildcard values, now we provide the |
153 | * actual values from the newly created original conntrack direction. | 153 | * actual values from the newly created original conntrack direction. |
154 | * The conntrack is confirmed when packet reaches IPVS hooks. | 154 | * The conntrack is confirmed when packet reaches IPVS hooks. |
155 | */ | 155 | */ |
156 | 156 | ||
157 | /* RS->CLIENT */ | 157 | /* RS->CLIENT */ |
158 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | 158 | orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; |
159 | ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, | 159 | ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, |
160 | &orig->src.u3, orig->src.u.tcp.port, | 160 | &orig->src.u3, orig->src.u.tcp.port, |
161 | &orig->dst.u3, orig->dst.u.tcp.port, &p); | 161 | &orig->dst.u3, orig->dst.u.tcp.port, &p); |
162 | cp = ip_vs_conn_out_get(&p); | 162 | cp = ip_vs_conn_out_get(&p); |
163 | if (cp) { | 163 | if (cp) { |
164 | /* Change reply CLIENT->RS to CLIENT->VS */ | 164 | /* Change reply CLIENT->RS to CLIENT->VS */ |
165 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | 165 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
166 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | 166 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " |
167 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", | 167 | FMT_TUPLE ", found inout cp=" FMT_CONN "\n", |
168 | __func__, ct, ct->status, | 168 | __func__, ct, ct->status, |
169 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | 169 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
170 | ARG_CONN(cp)); | 170 | ARG_CONN(cp)); |
171 | new_reply.dst.u3 = cp->vaddr; | 171 | new_reply.dst.u3 = cp->vaddr; |
172 | new_reply.dst.u.tcp.port = cp->vport; | 172 | new_reply.dst.u.tcp.port = cp->vport; |
173 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | 173 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE |
174 | ", inout cp=" FMT_CONN "\n", | 174 | ", inout cp=" FMT_CONN "\n", |
175 | __func__, ct, | 175 | __func__, ct, |
176 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | 176 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
177 | ARG_CONN(cp)); | 177 | ARG_CONN(cp)); |
178 | goto alter; | 178 | goto alter; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* CLIENT->VS */ | 181 | /* CLIENT->VS */ |
182 | cp = ip_vs_conn_in_get(&p); | 182 | cp = ip_vs_conn_in_get(&p); |
183 | if (cp) { | 183 | if (cp) { |
184 | /* Change reply VS->CLIENT to RS->CLIENT */ | 184 | /* Change reply VS->CLIENT to RS->CLIENT */ |
185 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | 185 | new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; |
186 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " | 186 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " |
187 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", | 187 | FMT_TUPLE ", found outin cp=" FMT_CONN "\n", |
188 | __func__, ct, ct->status, | 188 | __func__, ct, ct->status, |
189 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | 189 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
190 | ARG_CONN(cp)); | 190 | ARG_CONN(cp)); |
191 | new_reply.src.u3 = cp->daddr; | 191 | new_reply.src.u3 = cp->daddr; |
192 | new_reply.src.u.tcp.port = cp->dport; | 192 | new_reply.src.u.tcp.port = cp->dport; |
193 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " | 193 | IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " |
194 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", | 194 | FMT_TUPLE ", outin cp=" FMT_CONN "\n", |
195 | __func__, ct, | 195 | __func__, ct, |
196 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), | 196 | ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
197 | ARG_CONN(cp)); | 197 | ARG_CONN(cp)); |
198 | goto alter; | 198 | goto alter; |
199 | } | 199 | } |
200 | 200 | ||
201 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE | 201 | IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE |
202 | " - unknown expect\n", | 202 | " - unknown expect\n", |
203 | __func__, ct, ct->status, ARG_TUPLE(orig)); | 203 | __func__, ct, ct->status, ARG_TUPLE(orig)); |
204 | return; | 204 | return; |
205 | 205 | ||
206 | alter: | 206 | alter: |
207 | /* Never alter conntrack for non-NAT conns */ | 207 | /* Never alter conntrack for non-NAT conns */ |
208 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | 208 | if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) |
209 | nf_conntrack_alter_reply(ct, &new_reply); | 209 | nf_conntrack_alter_reply(ct, &new_reply); |
210 | ip_vs_conn_put(cp); | 210 | ip_vs_conn_put(cp); |
211 | return; | 211 | return; |
212 | } | 212 | } |
213 | 213 | ||
214 | /* | 214 | /* |
215 | * Create NF conntrack expectation with wildcard (optional) source port. | 215 | * Create NF conntrack expectation with wildcard (optional) source port. |
216 | * Then the default callback function will alter the reply and will confirm | 216 | * Then the default callback function will alter the reply and will confirm |
217 | * the conntrack entry when the first packet comes. | 217 | * the conntrack entry when the first packet comes. |
218 | * Use port 0 to expect connection from any port. | 218 | * Use port 0 to expect connection from any port. |
219 | */ | 219 | */ |
220 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, | 220 | void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, |
221 | struct ip_vs_conn *cp, u_int8_t proto, | 221 | struct ip_vs_conn *cp, u_int8_t proto, |
222 | const __be16 port, int from_rs) | 222 | const __be16 port, int from_rs) |
223 | { | 223 | { |
224 | struct nf_conntrack_expect *exp; | 224 | struct nf_conntrack_expect *exp; |
225 | 225 | ||
226 | if (ct == NULL || nf_ct_is_untracked(ct)) | 226 | if (ct == NULL || nf_ct_is_untracked(ct)) |
227 | return; | 227 | return; |
228 | 228 | ||
229 | exp = nf_ct_expect_alloc(ct); | 229 | exp = nf_ct_expect_alloc(ct); |
230 | if (!exp) | 230 | if (!exp) |
231 | return; | 231 | return; |
232 | 232 | ||
233 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), | 233 | nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), |
234 | from_rs ? &cp->daddr : &cp->caddr, | 234 | from_rs ? &cp->daddr : &cp->caddr, |
235 | from_rs ? &cp->caddr : &cp->vaddr, | 235 | from_rs ? &cp->caddr : &cp->vaddr, |
236 | proto, port ? &port : NULL, | 236 | proto, port ? &port : NULL, |
237 | from_rs ? &cp->cport : &cp->vport); | 237 | from_rs ? &cp->cport : &cp->vport); |
238 | 238 | ||
239 | exp->expectfn = ip_vs_nfct_expect_callback; | 239 | exp->expectfn = ip_vs_nfct_expect_callback; |
240 | 240 | ||
241 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | 241 | IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", |
242 | __func__, ct, ARG_TUPLE(&exp->tuple)); | 242 | __func__, ct, ARG_TUPLE(&exp->tuple)); |
243 | nf_ct_expect_related(exp); | 243 | nf_ct_expect_related(exp); |
244 | nf_ct_expect_put(exp); | 244 | nf_ct_expect_put(exp); |
245 | } | 245 | } |
246 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); | 246 | EXPORT_SYMBOL(ip_vs_nfct_expect_related); |
247 | 247 | ||
248 | /* | 248 | /* |
249 | * Our connection was terminated, try to drop the conntrack immediately | 249 | * Our connection was terminated, try to drop the conntrack immediately |
250 | */ | 250 | */ |
251 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) | 251 | void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) |
252 | { | 252 | { |
253 | struct nf_conntrack_tuple_hash *h; | 253 | struct nf_conntrack_tuple_hash *h; |
254 | struct nf_conn *ct; | 254 | struct nf_conn *ct; |
255 | struct nf_conntrack_tuple tuple; | 255 | struct nf_conntrack_tuple tuple; |
256 | 256 | ||
257 | if (!cp->cport) | 257 | if (!cp->cport) |
258 | return; | 258 | return; |
259 | 259 | ||
260 | tuple = (struct nf_conntrack_tuple) { | 260 | tuple = (struct nf_conntrack_tuple) { |
261 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | 261 | .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; |
262 | tuple.src.u3 = cp->caddr; | 262 | tuple.src.u3 = cp->caddr; |
263 | tuple.src.u.all = cp->cport; | 263 | tuple.src.u.all = cp->cport; |
264 | tuple.src.l3num = cp->af; | 264 | tuple.src.l3num = cp->af; |
265 | tuple.dst.u3 = cp->vaddr; | 265 | tuple.dst.u3 = cp->vaddr; |
266 | tuple.dst.u.all = cp->vport; | 266 | tuple.dst.u.all = cp->vport; |
267 | 267 | ||
268 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | 268 | IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE |
269 | " for conn " FMT_CONN "\n", | 269 | " for conn " FMT_CONN "\n", |
270 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); | 270 | __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); |
271 | 271 | ||
272 | h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, | 272 | h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, |
273 | &tuple); | 273 | &tuple); |
274 | if (h) { | 274 | if (h) { |
275 | ct = nf_ct_tuplehash_to_ctrack(h); | 275 | ct = nf_ct_tuplehash_to_ctrack(h); |
276 | /* Show what happens instead of calling nf_ct_kill() */ | 276 | /* Show what happens instead of calling nf_ct_kill() */ |
277 | if (del_timer(&ct->timeout)) { | 277 | if (del_timer(&ct->timeout)) { |
278 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" | 278 | IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" |
279 | FMT_TUPLE "\n", | 279 | FMT_TUPLE "\n", |
280 | __func__, ct, ARG_TUPLE(&tuple)); | 280 | __func__, ct, ARG_TUPLE(&tuple)); |
281 | if (ct->timeout.function) | 281 | if (ct->timeout.function) |
282 | ct->timeout.function(ct->timeout.data); | 282 | ct->timeout.function(ct->timeout.data); |
283 | } else { | 283 | } else { |
284 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | 284 | IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" |
285 | FMT_TUPLE "\n", | 285 | FMT_TUPLE "\n", |
286 | __func__, ct, ARG_TUPLE(&tuple)); | 286 | __func__, ct, ARG_TUPLE(&tuple)); |
287 | } | 287 | } |
288 | nf_ct_put(ct); | 288 | nf_ct_put(ct); |
289 | } else { | 289 | } else { |
290 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | 290 | IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", |
291 | __func__, ARG_TUPLE(&tuple)); | 291 | __func__, ARG_TUPLE(&tuple)); |
292 | } | 292 | } |
293 | } | 293 | } |
294 | 294 | ||
295 | 295 |
net/netfilter/ipvs/ip_vs_xmit.c
1 | /* | 1 | /* |
2 | * ip_vs_xmit.c: various packet transmitters for IPVS | 2 | * ip_vs_xmit.c: various packet transmitters for IPVS |
3 | * | 3 | * |
4 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | 4 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
5 | * Julian Anastasov <ja@ssi.bg> | 5 | * Julian Anastasov <ja@ssi.bg> |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or | 7 | * This program is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU General Public License | 8 | * modify it under the terms of the GNU General Public License |
9 | * as published by the Free Software Foundation; either version | 9 | * as published by the Free Software Foundation; either version |
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * Changes: | 12 | * Changes: |
13 | * | 13 | * |
14 | * Description of forwarding methods: | 14 | * Description of forwarding methods: |
15 | * - all transmitters are called from LOCAL_IN (remote clients) and | 15 | * - all transmitters are called from LOCAL_IN (remote clients) and |
16 | * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD | 16 | * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD |
17 | * - not all connections have destination server, for example, | 17 | * - not all connections have destination server, for example, |
18 | * connections in backup server when fwmark is used | 18 | * connections in backup server when fwmark is used |
19 | * - bypass connections use daddr from packet | 19 | * - bypass connections use daddr from packet |
20 | * LOCAL_OUT rules: | 20 | * LOCAL_OUT rules: |
21 | * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) | 21 | * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) |
22 | * - skb->pkt_type is not set yet | 22 | * - skb->pkt_type is not set yet |
23 | * - the only place where we can see skb->sk != NULL | 23 | * - the only place where we can see skb->sk != NULL |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #define KMSG_COMPONENT "IPVS" | 26 | #define KMSG_COMPONENT "IPVS" |
27 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | 27 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
28 | 28 | ||
29 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/tcp.h> /* for tcphdr */ | 31 | #include <linux/tcp.h> /* for tcphdr */ |
32 | #include <net/ip.h> | 32 | #include <net/ip.h> |
33 | #include <net/tcp.h> /* for csum_tcpudp_magic */ | 33 | #include <net/tcp.h> /* for csum_tcpudp_magic */ |
34 | #include <net/udp.h> | 34 | #include <net/udp.h> |
35 | #include <net/icmp.h> /* for icmp_send */ | 35 | #include <net/icmp.h> /* for icmp_send */ |
36 | #include <net/route.h> /* for ip_route_output */ | 36 | #include <net/route.h> /* for ip_route_output */ |
37 | #include <net/ipv6.h> | 37 | #include <net/ipv6.h> |
38 | #include <net/ip6_route.h> | 38 | #include <net/ip6_route.h> |
39 | #include <net/addrconf.h> | 39 | #include <net/addrconf.h> |
40 | #include <linux/icmpv6.h> | 40 | #include <linux/icmpv6.h> |
41 | #include <linux/netfilter.h> | 41 | #include <linux/netfilter.h> |
42 | #include <linux/netfilter_ipv4.h> | 42 | #include <linux/netfilter_ipv4.h> |
43 | 43 | ||
44 | #include <net/ip_vs.h> | 44 | #include <net/ip_vs.h> |
45 | 45 | ||
46 | enum { | 46 | enum { |
47 | IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ | 47 | IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ |
48 | IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ | 48 | IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ |
49 | IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to | 49 | IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to |
50 | * local | 50 | * local |
51 | */ | 51 | */ |
52 | IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ | 52 | IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ |
53 | IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ | 53 | IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ |
54 | }; | 54 | }; |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Destination cache to speed up outgoing route lookup | 57 | * Destination cache to speed up outgoing route lookup |
58 | */ | 58 | */ |
59 | static inline void | 59 | static inline void |
60 | __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, | 60 | __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, |
61 | u32 dst_cookie) | 61 | u32 dst_cookie) |
62 | { | 62 | { |
63 | struct dst_entry *old_dst; | 63 | struct dst_entry *old_dst; |
64 | 64 | ||
65 | old_dst = dest->dst_cache; | 65 | old_dst = dest->dst_cache; |
66 | dest->dst_cache = dst; | 66 | dest->dst_cache = dst; |
67 | dest->dst_rtos = rtos; | 67 | dest->dst_rtos = rtos; |
68 | dest->dst_cookie = dst_cookie; | 68 | dest->dst_cookie = dst_cookie; |
69 | dst_release(old_dst); | 69 | dst_release(old_dst); |
70 | } | 70 | } |
71 | 71 | ||
72 | static inline struct dst_entry * | 72 | static inline struct dst_entry * |
73 | __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) | 73 | __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) |
74 | { | 74 | { |
75 | struct dst_entry *dst = dest->dst_cache; | 75 | struct dst_entry *dst = dest->dst_cache; |
76 | 76 | ||
77 | if (!dst) | 77 | if (!dst) |
78 | return NULL; | 78 | return NULL; |
79 | if ((dst->obsolete || rtos != dest->dst_rtos) && | 79 | if ((dst->obsolete || rtos != dest->dst_rtos) && |
80 | dst->ops->check(dst, dest->dst_cookie) == NULL) { | 80 | dst->ops->check(dst, dest->dst_cookie) == NULL) { |
81 | dest->dst_cache = NULL; | 81 | dest->dst_cache = NULL; |
82 | dst_release(dst); | 82 | dst_release(dst); |
83 | return NULL; | 83 | return NULL; |
84 | } | 84 | } |
85 | dst_hold(dst); | 85 | dst_hold(dst); |
86 | return dst; | 86 | return dst; |
87 | } | 87 | } |
88 | 88 | ||
89 | static inline bool | 89 | static inline bool |
90 | __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) | 90 | __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) |
91 | { | 91 | { |
92 | if (IP6CB(skb)->frag_max_size) { | 92 | if (IP6CB(skb)->frag_max_size) { |
93 | /* frag_max_size tell us that, this packet have been | 93 | /* frag_max_size tell us that, this packet have been |
94 | * defragmented by netfilter IPv6 conntrack module. | 94 | * defragmented by netfilter IPv6 conntrack module. |
95 | */ | 95 | */ |
96 | if (IP6CB(skb)->frag_max_size > mtu) | 96 | if (IP6CB(skb)->frag_max_size > mtu) |
97 | return true; /* largest fragment violate MTU */ | 97 | return true; /* largest fragment violate MTU */ |
98 | } | 98 | } |
99 | else if (skb->len > mtu && !skb_is_gso(skb)) { | 99 | else if (skb->len > mtu && !skb_is_gso(skb)) { |
100 | return true; /* Packet size violate MTU size */ | 100 | return true; /* Packet size violate MTU size */ |
101 | } | 101 | } |
102 | return false; | 102 | return false; |
103 | } | 103 | } |
104 | 104 | ||
105 | /* Get route to daddr, update *saddr, optionally bind route to saddr */ | 105 | /* Get route to daddr, update *saddr, optionally bind route to saddr */ |
106 | static struct rtable *do_output_route4(struct net *net, __be32 daddr, | 106 | static struct rtable *do_output_route4(struct net *net, __be32 daddr, |
107 | u32 rtos, int rt_mode, __be32 *saddr) | 107 | u32 rtos, int rt_mode, __be32 *saddr) |
108 | { | 108 | { |
109 | struct flowi4 fl4; | 109 | struct flowi4 fl4; |
110 | struct rtable *rt; | 110 | struct rtable *rt; |
111 | int loop = 0; | 111 | int loop = 0; |
112 | 112 | ||
113 | memset(&fl4, 0, sizeof(fl4)); | 113 | memset(&fl4, 0, sizeof(fl4)); |
114 | fl4.daddr = daddr; | 114 | fl4.daddr = daddr; |
115 | fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; | 115 | fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; |
116 | fl4.flowi4_tos = rtos; | 116 | fl4.flowi4_tos = rtos; |
117 | fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? | 117 | fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? |
118 | FLOWI_FLAG_KNOWN_NH : 0; | 118 | FLOWI_FLAG_KNOWN_NH : 0; |
119 | 119 | ||
120 | retry: | 120 | retry: |
121 | rt = ip_route_output_key(net, &fl4); | 121 | rt = ip_route_output_key(net, &fl4); |
122 | if (IS_ERR(rt)) { | 122 | if (IS_ERR(rt)) { |
123 | /* Invalid saddr ? */ | 123 | /* Invalid saddr ? */ |
124 | if (PTR_ERR(rt) == -EINVAL && *saddr && | 124 | if (PTR_ERR(rt) == -EINVAL && *saddr && |
125 | rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { | 125 | rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { |
126 | *saddr = 0; | 126 | *saddr = 0; |
127 | flowi4_update_output(&fl4, 0, rtos, daddr, 0); | 127 | flowi4_update_output(&fl4, 0, rtos, daddr, 0); |
128 | goto retry; | 128 | goto retry; |
129 | } | 129 | } |
130 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); | 130 | IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); |
131 | return NULL; | 131 | return NULL; |
132 | } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { | 132 | } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { |
133 | ip_rt_put(rt); | 133 | ip_rt_put(rt); |
134 | *saddr = fl4.saddr; | 134 | *saddr = fl4.saddr; |
135 | flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); | 135 | flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); |
136 | loop++; | 136 | loop++; |
137 | goto retry; | 137 | goto retry; |
138 | } | 138 | } |
139 | *saddr = fl4.saddr; | 139 | *saddr = fl4.saddr; |
140 | return rt; | 140 | return rt; |
141 | } | 141 | } |
142 | 142 | ||
143 | /* Get route to destination or remote server */ | 143 | /* Get route to destination or remote server */ |
144 | static struct rtable * | 144 | static struct rtable * |
145 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, | 145 | __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, |
146 | __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) | 146 | __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) |
147 | { | 147 | { |
148 | struct net *net = dev_net(skb_dst(skb)->dev); | 148 | struct net *net = dev_net(skb_dst(skb)->dev); |
149 | struct rtable *rt; /* Route to the other host */ | 149 | struct rtable *rt; /* Route to the other host */ |
150 | struct rtable *ort; /* Original route */ | 150 | struct rtable *ort; /* Original route */ |
151 | int local; | 151 | int local; |
152 | 152 | ||
153 | if (dest) { | 153 | if (dest) { |
154 | spin_lock(&dest->dst_lock); | 154 | spin_lock(&dest->dst_lock); |
155 | if (!(rt = (struct rtable *) | 155 | if (!(rt = (struct rtable *) |
156 | __ip_vs_dst_check(dest, rtos))) { | 156 | __ip_vs_dst_check(dest, rtos))) { |
157 | rt = do_output_route4(net, dest->addr.ip, rtos, | 157 | rt = do_output_route4(net, dest->addr.ip, rtos, |
158 | rt_mode, &dest->dst_saddr.ip); | 158 | rt_mode, &dest->dst_saddr.ip); |
159 | if (!rt) { | 159 | if (!rt) { |
160 | spin_unlock(&dest->dst_lock); | 160 | spin_unlock(&dest->dst_lock); |
161 | return NULL; | 161 | return NULL; |
162 | } | 162 | } |
163 | __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); | 163 | __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); |
164 | IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " | 164 | IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " |
165 | "rtos=%X\n", | 165 | "rtos=%X\n", |
166 | &dest->addr.ip, &dest->dst_saddr.ip, | 166 | &dest->addr.ip, &dest->dst_saddr.ip, |
167 | atomic_read(&rt->dst.__refcnt), rtos); | 167 | atomic_read(&rt->dst.__refcnt), rtos); |
168 | } | 168 | } |
169 | daddr = dest->addr.ip; | 169 | daddr = dest->addr.ip; |
170 | if (ret_saddr) | 170 | if (ret_saddr) |
171 | *ret_saddr = dest->dst_saddr.ip; | 171 | *ret_saddr = dest->dst_saddr.ip; |
172 | spin_unlock(&dest->dst_lock); | 172 | spin_unlock(&dest->dst_lock); |
173 | } else { | 173 | } else { |
174 | __be32 saddr = htonl(INADDR_ANY); | 174 | __be32 saddr = htonl(INADDR_ANY); |
175 | 175 | ||
176 | /* For such unconfigured boxes avoid many route lookups | 176 | /* For such unconfigured boxes avoid many route lookups |
177 | * for performance reasons because we do not remember saddr | 177 | * for performance reasons because we do not remember saddr |
178 | */ | 178 | */ |
179 | rt_mode &= ~IP_VS_RT_MODE_CONNECT; | 179 | rt_mode &= ~IP_VS_RT_MODE_CONNECT; |
180 | rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); | 180 | rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); |
181 | if (!rt) | 181 | if (!rt) |
182 | return NULL; | 182 | return NULL; |
183 | if (ret_saddr) | 183 | if (ret_saddr) |
184 | *ret_saddr = saddr; | 184 | *ret_saddr = saddr; |
185 | } | 185 | } |
186 | 186 | ||
187 | local = rt->rt_flags & RTCF_LOCAL; | 187 | local = rt->rt_flags & RTCF_LOCAL; |
188 | if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & | 188 | if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & |
189 | rt_mode)) { | 189 | rt_mode)) { |
190 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", | 190 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", |
191 | (rt->rt_flags & RTCF_LOCAL) ? | 191 | (rt->rt_flags & RTCF_LOCAL) ? |
192 | "local":"non-local", &daddr); | 192 | "local":"non-local", &daddr); |
193 | ip_rt_put(rt); | 193 | ip_rt_put(rt); |
194 | return NULL; | 194 | return NULL; |
195 | } | 195 | } |
196 | if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && | 196 | if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && |
197 | !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { | 197 | !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { |
198 | IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " | 198 | IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " |
199 | "requires NAT method, dest: %pI4\n", | 199 | "requires NAT method, dest: %pI4\n", |
200 | &ip_hdr(skb)->daddr, &daddr); | 200 | &ip_hdr(skb)->daddr, &daddr); |
201 | ip_rt_put(rt); | 201 | ip_rt_put(rt); |
202 | return NULL; | 202 | return NULL; |
203 | } | 203 | } |
204 | if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { | 204 | if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { |
205 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " | 205 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " |
206 | "to non-local address, dest: %pI4\n", | 206 | "to non-local address, dest: %pI4\n", |
207 | &ip_hdr(skb)->saddr, &daddr); | 207 | &ip_hdr(skb)->saddr, &daddr); |
208 | ip_rt_put(rt); | 208 | ip_rt_put(rt); |
209 | return NULL; | 209 | return NULL; |
210 | } | 210 | } |
211 | 211 | ||
212 | return rt; | 212 | return rt; |
213 | } | 213 | } |
214 | 214 | ||
215 | /* Reroute packet to local IPv4 stack after DNAT */ | 215 | /* Reroute packet to local IPv4 stack after DNAT */ |
216 | static int | 216 | static int |
217 | __ip_vs_reroute_locally(struct sk_buff *skb) | 217 | __ip_vs_reroute_locally(struct sk_buff *skb) |
218 | { | 218 | { |
219 | struct rtable *rt = skb_rtable(skb); | 219 | struct rtable *rt = skb_rtable(skb); |
220 | struct net_device *dev = rt->dst.dev; | 220 | struct net_device *dev = rt->dst.dev; |
221 | struct net *net = dev_net(dev); | 221 | struct net *net = dev_net(dev); |
222 | struct iphdr *iph = ip_hdr(skb); | 222 | struct iphdr *iph = ip_hdr(skb); |
223 | 223 | ||
224 | if (rt_is_input_route(rt)) { | 224 | if (rt_is_input_route(rt)) { |
225 | unsigned long orefdst = skb->_skb_refdst; | 225 | unsigned long orefdst = skb->_skb_refdst; |
226 | 226 | ||
227 | if (ip_route_input(skb, iph->daddr, iph->saddr, | 227 | if (ip_route_input(skb, iph->daddr, iph->saddr, |
228 | iph->tos, skb->dev)) | 228 | iph->tos, skb->dev)) |
229 | return 0; | 229 | return 0; |
230 | refdst_drop(orefdst); | 230 | refdst_drop(orefdst); |
231 | } else { | 231 | } else { |
232 | struct flowi4 fl4 = { | 232 | struct flowi4 fl4 = { |
233 | .daddr = iph->daddr, | 233 | .daddr = iph->daddr, |
234 | .saddr = iph->saddr, | 234 | .saddr = iph->saddr, |
235 | .flowi4_tos = RT_TOS(iph->tos), | 235 | .flowi4_tos = RT_TOS(iph->tos), |
236 | .flowi4_mark = skb->mark, | 236 | .flowi4_mark = skb->mark, |
237 | }; | 237 | }; |
238 | 238 | ||
239 | rt = ip_route_output_key(net, &fl4); | 239 | rt = ip_route_output_key(net, &fl4); |
240 | if (IS_ERR(rt)) | 240 | if (IS_ERR(rt)) |
241 | return 0; | 241 | return 0; |
242 | if (!(rt->rt_flags & RTCF_LOCAL)) { | 242 | if (!(rt->rt_flags & RTCF_LOCAL)) { |
243 | ip_rt_put(rt); | 243 | ip_rt_put(rt); |
244 | return 0; | 244 | return 0; |
245 | } | 245 | } |
246 | /* Drop old route. */ | 246 | /* Drop old route. */ |
247 | skb_dst_drop(skb); | 247 | skb_dst_drop(skb); |
248 | skb_dst_set(skb, &rt->dst); | 248 | skb_dst_set(skb, &rt->dst); |
249 | } | 249 | } |
250 | return 1; | 250 | return 1; |
251 | } | 251 | } |
252 | 252 | ||
253 | #ifdef CONFIG_IP_VS_IPV6 | 253 | #ifdef CONFIG_IP_VS_IPV6 |
254 | 254 | ||
255 | static inline int __ip_vs_is_local_route6(struct rt6_info *rt) | 255 | static inline int __ip_vs_is_local_route6(struct rt6_info *rt) |
256 | { | 256 | { |
257 | return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; | 257 | return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK; |
258 | } | 258 | } |
259 | 259 | ||
260 | static struct dst_entry * | 260 | static struct dst_entry * |
261 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, | 261 | __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, |
262 | struct in6_addr *ret_saddr, int do_xfrm) | 262 | struct in6_addr *ret_saddr, int do_xfrm) |
263 | { | 263 | { |
264 | struct dst_entry *dst; | 264 | struct dst_entry *dst; |
265 | struct flowi6 fl6 = { | 265 | struct flowi6 fl6 = { |
266 | .daddr = *daddr, | 266 | .daddr = *daddr, |
267 | }; | 267 | }; |
268 | 268 | ||
269 | dst = ip6_route_output(net, NULL, &fl6); | 269 | dst = ip6_route_output(net, NULL, &fl6); |
270 | if (dst->error) | 270 | if (dst->error) |
271 | goto out_err; | 271 | goto out_err; |
272 | if (!ret_saddr) | 272 | if (!ret_saddr) |
273 | return dst; | 273 | return dst; |
274 | if (ipv6_addr_any(&fl6.saddr) && | 274 | if (ipv6_addr_any(&fl6.saddr) && |
275 | ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, | 275 | ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, |
276 | &fl6.daddr, 0, &fl6.saddr) < 0) | 276 | &fl6.daddr, 0, &fl6.saddr) < 0) |
277 | goto out_err; | 277 | goto out_err; |
278 | if (do_xfrm) { | 278 | if (do_xfrm) { |
279 | dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); | 279 | dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); |
280 | if (IS_ERR(dst)) { | 280 | if (IS_ERR(dst)) { |
281 | dst = NULL; | 281 | dst = NULL; |
282 | goto out_err; | 282 | goto out_err; |
283 | } | 283 | } |
284 | } | 284 | } |
285 | *ret_saddr = fl6.saddr; | 285 | *ret_saddr = fl6.saddr; |
286 | return dst; | 286 | return dst; |
287 | 287 | ||
288 | out_err: | 288 | out_err: |
289 | dst_release(dst); | 289 | dst_release(dst); |
290 | IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); | 290 | IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); |
291 | return NULL; | 291 | return NULL; |
292 | } | 292 | } |
293 | 293 | ||
294 | /* | 294 | /* |
295 | * Get route to destination or remote server | 295 | * Get route to destination or remote server |
296 | */ | 296 | */ |
297 | static struct rt6_info * | 297 | static struct rt6_info * |
298 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, | 298 | __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, |
299 | struct in6_addr *daddr, struct in6_addr *ret_saddr, | 299 | struct in6_addr *daddr, struct in6_addr *ret_saddr, |
300 | int do_xfrm, int rt_mode) | 300 | int do_xfrm, int rt_mode) |
301 | { | 301 | { |
302 | struct net *net = dev_net(skb_dst(skb)->dev); | 302 | struct net *net = dev_net(skb_dst(skb)->dev); |
303 | struct rt6_info *rt; /* Route to the other host */ | 303 | struct rt6_info *rt; /* Route to the other host */ |
304 | struct rt6_info *ort; /* Original route */ | 304 | struct rt6_info *ort; /* Original route */ |
305 | struct dst_entry *dst; | 305 | struct dst_entry *dst; |
306 | int local; | 306 | int local; |
307 | 307 | ||
308 | if (dest) { | 308 | if (dest) { |
309 | spin_lock(&dest->dst_lock); | 309 | spin_lock(&dest->dst_lock); |
310 | rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); | 310 | rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); |
311 | if (!rt) { | 311 | if (!rt) { |
312 | u32 cookie; | 312 | u32 cookie; |
313 | 313 | ||
314 | dst = __ip_vs_route_output_v6(net, &dest->addr.in6, | 314 | dst = __ip_vs_route_output_v6(net, &dest->addr.in6, |
315 | &dest->dst_saddr.in6, | 315 | &dest->dst_saddr.in6, |
316 | do_xfrm); | 316 | do_xfrm); |
317 | if (!dst) { | 317 | if (!dst) { |
318 | spin_unlock(&dest->dst_lock); | 318 | spin_unlock(&dest->dst_lock); |
319 | return NULL; | 319 | return NULL; |
320 | } | 320 | } |
321 | rt = (struct rt6_info *) dst; | 321 | rt = (struct rt6_info *) dst; |
322 | cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; | 322 | cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; |
323 | __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); | 323 | __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); |
324 | IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", | 324 | IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", |
325 | &dest->addr.in6, &dest->dst_saddr.in6, | 325 | &dest->addr.in6, &dest->dst_saddr.in6, |
326 | atomic_read(&rt->dst.__refcnt)); | 326 | atomic_read(&rt->dst.__refcnt)); |
327 | } | 327 | } |
328 | if (ret_saddr) | 328 | if (ret_saddr) |
329 | *ret_saddr = dest->dst_saddr.in6; | 329 | *ret_saddr = dest->dst_saddr.in6; |
330 | spin_unlock(&dest->dst_lock); | 330 | spin_unlock(&dest->dst_lock); |
331 | } else { | 331 | } else { |
332 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); | 332 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); |
333 | if (!dst) | 333 | if (!dst) |
334 | return NULL; | 334 | return NULL; |
335 | rt = (struct rt6_info *) dst; | 335 | rt = (struct rt6_info *) dst; |
336 | } | 336 | } |
337 | 337 | ||
338 | local = __ip_vs_is_local_route6(rt); | 338 | local = __ip_vs_is_local_route6(rt); |
339 | if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & | 339 | if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & |
340 | rt_mode)) { | 340 | rt_mode)) { |
341 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", | 341 | IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", |
342 | local ? "local":"non-local", daddr); | 342 | local ? "local":"non-local", daddr); |
343 | dst_release(&rt->dst); | 343 | dst_release(&rt->dst); |
344 | return NULL; | 344 | return NULL; |
345 | } | 345 | } |
346 | if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && | 346 | if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && |
347 | !((ort = (struct rt6_info *) skb_dst(skb)) && | 347 | !((ort = (struct rt6_info *) skb_dst(skb)) && |
348 | __ip_vs_is_local_route6(ort))) { | 348 | __ip_vs_is_local_route6(ort))) { |
349 | IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " | 349 | IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " |
350 | "requires NAT method, dest: %pI6c\n", | 350 | "requires NAT method, dest: %pI6c\n", |
351 | &ipv6_hdr(skb)->daddr, daddr); | 351 | &ipv6_hdr(skb)->daddr, daddr); |
352 | dst_release(&rt->dst); | 352 | dst_release(&rt->dst); |
353 | return NULL; | 353 | return NULL; |
354 | } | 354 | } |
355 | if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && | 355 | if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && |
356 | ipv6_addr_type(&ipv6_hdr(skb)->saddr) & | 356 | ipv6_addr_type(&ipv6_hdr(skb)->saddr) & |
357 | IPV6_ADDR_LOOPBACK)) { | 357 | IPV6_ADDR_LOOPBACK)) { |
358 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " | 358 | IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " |
359 | "to non-local address, dest: %pI6c\n", | 359 | "to non-local address, dest: %pI6c\n", |
360 | &ipv6_hdr(skb)->saddr, daddr); | 360 | &ipv6_hdr(skb)->saddr, daddr); |
361 | dst_release(&rt->dst); | 361 | dst_release(&rt->dst); |
362 | return NULL; | 362 | return NULL; |
363 | } | 363 | } |
364 | 364 | ||
365 | return rt; | 365 | return rt; |
366 | } | 366 | } |
367 | #endif | 367 | #endif |
368 | 368 | ||
369 | 369 | ||
370 | /* | 370 | /* |
371 | * Release dest->dst_cache before a dest is removed | 371 | * Release dest->dst_cache before a dest is removed |
372 | */ | 372 | */ |
373 | void | 373 | void |
374 | ip_vs_dst_reset(struct ip_vs_dest *dest) | 374 | ip_vs_dst_reset(struct ip_vs_dest *dest) |
375 | { | 375 | { |
376 | struct dst_entry *old_dst; | 376 | struct dst_entry *old_dst; |
377 | 377 | ||
378 | old_dst = dest->dst_cache; | 378 | old_dst = dest->dst_cache; |
379 | dest->dst_cache = NULL; | 379 | dest->dst_cache = NULL; |
380 | dst_release(old_dst); | 380 | dst_release(old_dst); |
381 | dest->dst_saddr.ip = 0; | 381 | dest->dst_saddr.ip = 0; |
382 | } | 382 | } |
383 | 383 | ||
384 | #define IP_VS_XMIT_TUNNEL(skb, cp) \ | 384 | #define IP_VS_XMIT_TUNNEL(skb, cp) \ |
385 | ({ \ | 385 | ({ \ |
386 | int __ret = NF_ACCEPT; \ | 386 | int __ret = NF_ACCEPT; \ |
387 | \ | 387 | \ |
388 | (skb)->ipvs_property = 1; \ | 388 | (skb)->ipvs_property = 1; \ |
389 | if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ | 389 | if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ |
390 | __ret = ip_vs_confirm_conntrack(skb); \ | 390 | __ret = ip_vs_confirm_conntrack(skb); \ |
391 | if (__ret == NF_ACCEPT) { \ | 391 | if (__ret == NF_ACCEPT) { \ |
392 | nf_reset(skb); \ | 392 | nf_reset(skb); \ |
393 | skb_forward_csum(skb); \ | 393 | skb_forward_csum(skb); \ |
394 | } \ | 394 | } \ |
395 | __ret; \ | 395 | __ret; \ |
396 | }) | 396 | }) |
397 | 397 | ||
398 | #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ | 398 | #define IP_VS_XMIT_NAT(pf, skb, cp, local) \ |
399 | do { \ | 399 | do { \ |
400 | (skb)->ipvs_property = 1; \ | 400 | (skb)->ipvs_property = 1; \ |
401 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 401 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
402 | ip_vs_notrack(skb); \ | 402 | ip_vs_notrack(skb); \ |
403 | else \ | 403 | else \ |
404 | ip_vs_update_conntrack(skb, cp, 1); \ | 404 | ip_vs_update_conntrack(skb, cp, 1); \ |
405 | if (local) \ | 405 | if (local) \ |
406 | return NF_ACCEPT; \ | 406 | return NF_ACCEPT; \ |
407 | skb_forward_csum(skb); \ | 407 | skb_forward_csum(skb); \ |
408 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 408 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
409 | skb_dst(skb)->dev, dst_output); \ | 409 | skb_dst(skb)->dev, dst_output); \ |
410 | } while (0) | 410 | } while (0) |
411 | 411 | ||
412 | #define IP_VS_XMIT(pf, skb, cp, local) \ | 412 | #define IP_VS_XMIT(pf, skb, cp, local) \ |
413 | do { \ | 413 | do { \ |
414 | (skb)->ipvs_property = 1; \ | 414 | (skb)->ipvs_property = 1; \ |
415 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ | 415 | if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ |
416 | ip_vs_notrack(skb); \ | 416 | ip_vs_notrack(skb); \ |
417 | if (local) \ | 417 | if (local) \ |
418 | return NF_ACCEPT; \ | 418 | return NF_ACCEPT; \ |
419 | skb_forward_csum(skb); \ | 419 | skb_forward_csum(skb); \ |
420 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ | 420 | NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ |
421 | skb_dst(skb)->dev, dst_output); \ | 421 | skb_dst(skb)->dev, dst_output); \ |
422 | } while (0) | 422 | } while (0) |
423 | 423 | ||
424 | 424 | ||
425 | /* | 425 | /* |
426 | * NULL transmitter (do nothing except return NF_ACCEPT) | 426 | * NULL transmitter (do nothing except return NF_ACCEPT) |
427 | */ | 427 | */ |
428 | int | 428 | int |
429 | ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 429 | ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
430 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 430 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
431 | { | 431 | { |
432 | /* we do not touch skb and do not need pskb ptr */ | 432 | /* we do not touch skb and do not need pskb ptr */ |
433 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | 433 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); |
434 | } | 434 | } |
435 | 435 | ||
436 | 436 | ||
437 | /* | 437 | /* |
438 | * Bypass transmitter | 438 | * Bypass transmitter |
439 | * Let packets bypass the destination when the destination is not | 439 | * Let packets bypass the destination when the destination is not |
440 | * available, it may be only used in transparent cache cluster. | 440 | * available, it may be only used in transparent cache cluster. |
441 | */ | 441 | */ |
442 | int | 442 | int |
443 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 443 | ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
444 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 444 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
445 | { | 445 | { |
446 | struct rtable *rt; /* Route to the other host */ | 446 | struct rtable *rt; /* Route to the other host */ |
447 | struct iphdr *iph = ip_hdr(skb); | 447 | struct iphdr *iph = ip_hdr(skb); |
448 | int mtu; | 448 | int mtu; |
449 | 449 | ||
450 | EnterFunction(10); | 450 | EnterFunction(10); |
451 | 451 | ||
452 | if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), | 452 | if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), |
453 | IP_VS_RT_MODE_NON_LOCAL, NULL))) | 453 | IP_VS_RT_MODE_NON_LOCAL, NULL))) |
454 | goto tx_error_icmp; | 454 | goto tx_error_icmp; |
455 | 455 | ||
456 | /* MTU checking */ | 456 | /* MTU checking */ |
457 | mtu = dst_mtu(&rt->dst); | 457 | mtu = dst_mtu(&rt->dst); |
458 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && | 458 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
459 | !skb_is_gso(skb)) { | 459 | !skb_is_gso(skb)) { |
460 | ip_rt_put(rt); | 460 | ip_rt_put(rt); |
461 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 461 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
462 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 462 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
463 | goto tx_error; | 463 | goto tx_error; |
464 | } | 464 | } |
465 | 465 | ||
466 | /* | 466 | /* |
467 | * Call ip_send_check because we are not sure it is called | 467 | * Call ip_send_check because we are not sure it is called |
468 | * after ip_defrag. Is copy-on-write needed? | 468 | * after ip_defrag. Is copy-on-write needed? |
469 | */ | 469 | */ |
470 | if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { | 470 | if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { |
471 | ip_rt_put(rt); | 471 | ip_rt_put(rt); |
472 | return NF_STOLEN; | 472 | return NF_STOLEN; |
473 | } | 473 | } |
474 | ip_send_check(ip_hdr(skb)); | 474 | ip_send_check(ip_hdr(skb)); |
475 | 475 | ||
476 | /* drop old route */ | 476 | /* drop old route */ |
477 | skb_dst_drop(skb); | 477 | skb_dst_drop(skb); |
478 | skb_dst_set(skb, &rt->dst); | 478 | skb_dst_set(skb, &rt->dst); |
479 | 479 | ||
480 | /* Another hack: avoid icmp_send in ip_fragment */ | 480 | /* Another hack: avoid icmp_send in ip_fragment */ |
481 | skb->local_df = 1; | 481 | skb->local_df = 1; |
482 | 482 | ||
483 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); | 483 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
484 | 484 | ||
485 | LeaveFunction(10); | 485 | LeaveFunction(10); |
486 | return NF_STOLEN; | 486 | return NF_STOLEN; |
487 | 487 | ||
488 | tx_error_icmp: | 488 | tx_error_icmp: |
489 | dst_link_failure(skb); | 489 | dst_link_failure(skb); |
490 | tx_error: | 490 | tx_error: |
491 | kfree_skb(skb); | 491 | kfree_skb(skb); |
492 | LeaveFunction(10); | 492 | LeaveFunction(10); |
493 | return NF_STOLEN; | 493 | return NF_STOLEN; |
494 | } | 494 | } |
495 | 495 | ||
496 | #ifdef CONFIG_IP_VS_IPV6 | 496 | #ifdef CONFIG_IP_VS_IPV6 |
497 | int | 497 | int |
498 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 498 | ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
499 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) | 499 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) |
500 | { | 500 | { |
501 | struct rt6_info *rt; /* Route to the other host */ | 501 | struct rt6_info *rt; /* Route to the other host */ |
502 | int mtu; | 502 | int mtu; |
503 | 503 | ||
504 | EnterFunction(10); | 504 | EnterFunction(10); |
505 | 505 | ||
506 | rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, | 506 | rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, |
507 | IP_VS_RT_MODE_NON_LOCAL); | 507 | IP_VS_RT_MODE_NON_LOCAL); |
508 | if (!rt) | 508 | if (!rt) |
509 | goto tx_error_icmp; | 509 | goto tx_error_icmp; |
510 | 510 | ||
511 | /* MTU checking */ | 511 | /* MTU checking */ |
512 | mtu = dst_mtu(&rt->dst); | 512 | mtu = dst_mtu(&rt->dst); |
513 | if (__mtu_check_toobig_v6(skb, mtu)) { | 513 | if (__mtu_check_toobig_v6(skb, mtu)) { |
514 | if (!skb->dev) { | 514 | if (!skb->dev) { |
515 | struct net *net = dev_net(skb_dst(skb)->dev); | 515 | struct net *net = dev_net(skb_dst(skb)->dev); |
516 | 516 | ||
517 | skb->dev = net->loopback_dev; | 517 | skb->dev = net->loopback_dev; |
518 | } | 518 | } |
519 | /* only send ICMP too big on first fragment */ | 519 | /* only send ICMP too big on first fragment */ |
520 | if (!iph->fragoffs) | 520 | if (!iph->fragoffs) |
521 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 521 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
522 | dst_release(&rt->dst); | 522 | dst_release(&rt->dst); |
523 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 523 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
524 | goto tx_error; | 524 | goto tx_error; |
525 | } | 525 | } |
526 | 526 | ||
527 | /* | 527 | /* |
528 | * Call ip_send_check because we are not sure it is called | 528 | * Call ip_send_check because we are not sure it is called |
529 | * after ip_defrag. Is copy-on-write needed? | 529 | * after ip_defrag. Is copy-on-write needed? |
530 | */ | 530 | */ |
531 | skb = skb_share_check(skb, GFP_ATOMIC); | 531 | skb = skb_share_check(skb, GFP_ATOMIC); |
532 | if (unlikely(skb == NULL)) { | 532 | if (unlikely(skb == NULL)) { |
533 | dst_release(&rt->dst); | 533 | dst_release(&rt->dst); |
534 | return NF_STOLEN; | 534 | return NF_STOLEN; |
535 | } | 535 | } |
536 | 536 | ||
537 | /* drop old route */ | 537 | /* drop old route */ |
538 | skb_dst_drop(skb); | 538 | skb_dst_drop(skb); |
539 | skb_dst_set(skb, &rt->dst); | 539 | skb_dst_set(skb, &rt->dst); |
540 | 540 | ||
541 | /* Another hack: avoid icmp_send in ip_fragment */ | 541 | /* Another hack: avoid icmp_send in ip_fragment */ |
542 | skb->local_df = 1; | 542 | skb->local_df = 1; |
543 | 543 | ||
544 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); | 544 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
545 | 545 | ||
546 | LeaveFunction(10); | 546 | LeaveFunction(10); |
547 | return NF_STOLEN; | 547 | return NF_STOLEN; |
548 | 548 | ||
549 | tx_error_icmp: | 549 | tx_error_icmp: |
550 | dst_link_failure(skb); | 550 | dst_link_failure(skb); |
551 | tx_error: | 551 | tx_error: |
552 | kfree_skb(skb); | 552 | kfree_skb(skb); |
553 | LeaveFunction(10); | 553 | LeaveFunction(10); |
554 | return NF_STOLEN; | 554 | return NF_STOLEN; |
555 | } | 555 | } |
556 | #endif | 556 | #endif |
557 | 557 | ||
558 | /* | 558 | /* |
559 | * NAT transmitter (only for outside-to-inside nat forwarding) | 559 | * NAT transmitter (only for outside-to-inside nat forwarding) |
560 | * Not used for related ICMP | 560 | * Not used for related ICMP |
561 | */ | 561 | */ |
562 | int | 562 | int |
563 | ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 563 | ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
564 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 564 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
565 | { | 565 | { |
566 | struct rtable *rt; /* Route to the other host */ | 566 | struct rtable *rt; /* Route to the other host */ |
567 | int mtu; | 567 | int mtu; |
568 | struct iphdr *iph = ip_hdr(skb); | 568 | struct iphdr *iph = ip_hdr(skb); |
569 | int local; | 569 | int local; |
570 | 570 | ||
571 | EnterFunction(10); | 571 | EnterFunction(10); |
572 | 572 | ||
573 | /* check if it is a connection of no-client-port */ | 573 | /* check if it is a connection of no-client-port */ |
574 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { | 574 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { |
575 | __be16 _pt, *p; | 575 | __be16 _pt, *p; |
576 | p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); | 576 | p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); |
577 | if (p == NULL) | 577 | if (p == NULL) |
578 | goto tx_error; | 578 | goto tx_error; |
579 | ip_vs_conn_fill_cport(cp, *p); | 579 | ip_vs_conn_fill_cport(cp, *p); |
580 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 580 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
581 | } | 581 | } |
582 | 582 | ||
583 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, | 583 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
584 | RT_TOS(iph->tos), | 584 | RT_TOS(iph->tos), |
585 | IP_VS_RT_MODE_LOCAL | | 585 | IP_VS_RT_MODE_LOCAL | |
586 | IP_VS_RT_MODE_NON_LOCAL | | 586 | IP_VS_RT_MODE_NON_LOCAL | |
587 | IP_VS_RT_MODE_RDR, NULL))) | 587 | IP_VS_RT_MODE_RDR, NULL))) |
588 | goto tx_error_icmp; | 588 | goto tx_error_icmp; |
589 | local = rt->rt_flags & RTCF_LOCAL; | 589 | local = rt->rt_flags & RTCF_LOCAL; |
590 | /* | 590 | /* |
591 | * Avoid duplicate tuple in reply direction for NAT traffic | 591 | * Avoid duplicate tuple in reply direction for NAT traffic |
592 | * to local address when connection is sync-ed | 592 | * to local address when connection is sync-ed |
593 | */ | 593 | */ |
594 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | 594 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
595 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | 595 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
596 | enum ip_conntrack_info ctinfo; | 596 | enum ip_conntrack_info ctinfo; |
597 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | 597 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
598 | 598 | ||
599 | if (ct && !nf_ct_is_untracked(ct)) { | 599 | if (ct && !nf_ct_is_untracked(ct)) { |
600 | IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, | 600 | IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, |
601 | "ip_vs_nat_xmit(): " | 601 | "ip_vs_nat_xmit(): " |
602 | "stopping DNAT to local address"); | 602 | "stopping DNAT to local address"); |
603 | goto tx_error_put; | 603 | goto tx_error_put; |
604 | } | 604 | } |
605 | } | 605 | } |
606 | #endif | 606 | #endif |
607 | 607 | ||
608 | /* From world but DNAT to loopback address? */ | 608 | /* From world but DNAT to loopback address? */ |
609 | if (local && ipv4_is_loopback(cp->daddr.ip) && | 609 | if (local && ipv4_is_loopback(cp->daddr.ip) && |
610 | rt_is_input_route(skb_rtable(skb))) { | 610 | rt_is_input_route(skb_rtable(skb))) { |
611 | IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " | 611 | IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " |
612 | "stopping DNAT to loopback address"); | 612 | "stopping DNAT to loopback address"); |
613 | goto tx_error_put; | 613 | goto tx_error_put; |
614 | } | 614 | } |
615 | 615 | ||
616 | /* MTU checking */ | 616 | /* MTU checking */ |
617 | mtu = dst_mtu(&rt->dst); | 617 | mtu = dst_mtu(&rt->dst); |
618 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && | 618 | if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && |
619 | !skb_is_gso(skb)) { | 619 | !skb_is_gso(skb)) { |
620 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 620 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
621 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, | 621 | IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, |
622 | "ip_vs_nat_xmit(): frag needed for"); | 622 | "ip_vs_nat_xmit(): frag needed for"); |
623 | goto tx_error_put; | 623 | goto tx_error_put; |
624 | } | 624 | } |
625 | 625 | ||
626 | /* copy-on-write the packet before mangling it */ | 626 | /* copy-on-write the packet before mangling it */ |
627 | if (!skb_make_writable(skb, sizeof(struct iphdr))) | 627 | if (!skb_make_writable(skb, sizeof(struct iphdr))) |
628 | goto tx_error_put; | 628 | goto tx_error_put; |
629 | 629 | ||
630 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 630 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
631 | goto tx_error_put; | 631 | goto tx_error_put; |
632 | 632 | ||
633 | /* mangle the packet */ | 633 | /* mangle the packet */ |
634 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) | 634 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) |
635 | goto tx_error_put; | 635 | goto tx_error_put; |
636 | ip_hdr(skb)->daddr = cp->daddr.ip; | 636 | ip_hdr(skb)->daddr = cp->daddr.ip; |
637 | ip_send_check(ip_hdr(skb)); | 637 | ip_send_check(ip_hdr(skb)); |
638 | 638 | ||
639 | if (!local) { | 639 | if (!local) { |
640 | /* drop old route */ | 640 | /* drop old route */ |
641 | skb_dst_drop(skb); | 641 | skb_dst_drop(skb); |
642 | skb_dst_set(skb, &rt->dst); | 642 | skb_dst_set(skb, &rt->dst); |
643 | } else { | 643 | } else { |
644 | ip_rt_put(rt); | 644 | ip_rt_put(rt); |
645 | /* | 645 | /* |
646 | * Some IPv4 replies get local address from routes, | 646 | * Some IPv4 replies get local address from routes, |
647 | * not from iph, so while we DNAT after routing | 647 | * not from iph, so while we DNAT after routing |
648 | * we need this second input/output route. | 648 | * we need this second input/output route. |
649 | */ | 649 | */ |
650 | if (!__ip_vs_reroute_locally(skb)) | 650 | if (!__ip_vs_reroute_locally(skb)) |
651 | goto tx_error; | 651 | goto tx_error; |
652 | } | 652 | } |
653 | 653 | ||
654 | IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); | 654 | IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); |
655 | 655 | ||
656 | /* FIXME: when application helper enlarges the packet and the length | 656 | /* FIXME: when application helper enlarges the packet and the length |
657 | is larger than the MTU of outgoing device, there will be still | 657 | is larger than the MTU of outgoing device, there will be still |
658 | MTU problem. */ | 658 | MTU problem. */ |
659 | 659 | ||
660 | /* Another hack: avoid icmp_send in ip_fragment */ | 660 | /* Another hack: avoid icmp_send in ip_fragment */ |
661 | skb->local_df = 1; | 661 | skb->local_df = 1; |
662 | 662 | ||
663 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); | 663 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
664 | 664 | ||
665 | LeaveFunction(10); | 665 | LeaveFunction(10); |
666 | return NF_STOLEN; | 666 | return NF_STOLEN; |
667 | 667 | ||
668 | tx_error_icmp: | 668 | tx_error_icmp: |
669 | dst_link_failure(skb); | 669 | dst_link_failure(skb); |
670 | tx_error: | 670 | tx_error: |
671 | kfree_skb(skb); | 671 | kfree_skb(skb); |
672 | LeaveFunction(10); | 672 | LeaveFunction(10); |
673 | return NF_STOLEN; | 673 | return NF_STOLEN; |
674 | tx_error_put: | 674 | tx_error_put: |
675 | ip_rt_put(rt); | 675 | ip_rt_put(rt); |
676 | goto tx_error; | 676 | goto tx_error; |
677 | } | 677 | } |
678 | 678 | ||
679 | #ifdef CONFIG_IP_VS_IPV6 | 679 | #ifdef CONFIG_IP_VS_IPV6 |
680 | int | 680 | int |
681 | ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 681 | ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
682 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) | 682 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) |
683 | { | 683 | { |
684 | struct rt6_info *rt; /* Route to the other host */ | 684 | struct rt6_info *rt; /* Route to the other host */ |
685 | int mtu; | 685 | int mtu; |
686 | int local; | 686 | int local; |
687 | 687 | ||
688 | EnterFunction(10); | 688 | EnterFunction(10); |
689 | 689 | ||
690 | /* check if it is a connection of no-client-port */ | 690 | /* check if it is a connection of no-client-port */ |
691 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { | 691 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { |
692 | __be16 _pt, *p; | 692 | __be16 _pt, *p; |
693 | p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); | 693 | p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); |
694 | if (p == NULL) | 694 | if (p == NULL) |
695 | goto tx_error; | 695 | goto tx_error; |
696 | ip_vs_conn_fill_cport(cp, *p); | 696 | ip_vs_conn_fill_cport(cp, *p); |
697 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); | 697 | IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); |
698 | } | 698 | } |
699 | 699 | ||
700 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, | 700 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
701 | 0, (IP_VS_RT_MODE_LOCAL | | 701 | 0, (IP_VS_RT_MODE_LOCAL | |
702 | IP_VS_RT_MODE_NON_LOCAL | | 702 | IP_VS_RT_MODE_NON_LOCAL | |
703 | IP_VS_RT_MODE_RDR)))) | 703 | IP_VS_RT_MODE_RDR)))) |
704 | goto tx_error_icmp; | 704 | goto tx_error_icmp; |
705 | local = __ip_vs_is_local_route6(rt); | 705 | local = __ip_vs_is_local_route6(rt); |
706 | /* | 706 | /* |
707 | * Avoid duplicate tuple in reply direction for NAT traffic | 707 | * Avoid duplicate tuple in reply direction for NAT traffic |
708 | * to local address when connection is sync-ed | 708 | * to local address when connection is sync-ed |
709 | */ | 709 | */ |
710 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | 710 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
711 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | 711 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
712 | enum ip_conntrack_info ctinfo; | 712 | enum ip_conntrack_info ctinfo; |
713 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | 713 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
714 | 714 | ||
715 | if (ct && !nf_ct_is_untracked(ct)) { | 715 | if (ct && !nf_ct_is_untracked(ct)) { |
716 | IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, | 716 | IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, |
717 | "ip_vs_nat_xmit_v6(): " | 717 | "ip_vs_nat_xmit_v6(): " |
718 | "stopping DNAT to local address"); | 718 | "stopping DNAT to local address"); |
719 | goto tx_error_put; | 719 | goto tx_error_put; |
720 | } | 720 | } |
721 | } | 721 | } |
722 | #endif | 722 | #endif |
723 | 723 | ||
724 | /* From world but DNAT to loopback address? */ | 724 | /* From world but DNAT to loopback address? */ |
725 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | 725 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && |
726 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | 726 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { |
727 | IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, | 727 | IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, |
728 | "ip_vs_nat_xmit_v6(): " | 728 | "ip_vs_nat_xmit_v6(): " |
729 | "stopping DNAT to loopback address"); | 729 | "stopping DNAT to loopback address"); |
730 | goto tx_error_put; | 730 | goto tx_error_put; |
731 | } | 731 | } |
732 | 732 | ||
733 | /* MTU checking */ | 733 | /* MTU checking */ |
734 | mtu = dst_mtu(&rt->dst); | 734 | mtu = dst_mtu(&rt->dst); |
735 | if (__mtu_check_toobig_v6(skb, mtu)) { | 735 | if (__mtu_check_toobig_v6(skb, mtu)) { |
736 | if (!skb->dev) { | 736 | if (!skb->dev) { |
737 | struct net *net = dev_net(skb_dst(skb)->dev); | 737 | struct net *net = dev_net(skb_dst(skb)->dev); |
738 | 738 | ||
739 | skb->dev = net->loopback_dev; | 739 | skb->dev = net->loopback_dev; |
740 | } | 740 | } |
741 | /* only send ICMP too big on first fragment */ | 741 | /* only send ICMP too big on first fragment */ |
742 | if (!iph->fragoffs) | 742 | if (!iph->fragoffs) |
743 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 743 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
744 | IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, | 744 | IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, |
745 | "ip_vs_nat_xmit_v6(): frag needed for"); | 745 | "ip_vs_nat_xmit_v6(): frag needed for"); |
746 | goto tx_error_put; | 746 | goto tx_error_put; |
747 | } | 747 | } |
748 | 748 | ||
749 | /* copy-on-write the packet before mangling it */ | 749 | /* copy-on-write the packet before mangling it */ |
750 | if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) | 750 | if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) |
751 | goto tx_error_put; | 751 | goto tx_error_put; |
752 | 752 | ||
753 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 753 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
754 | goto tx_error_put; | 754 | goto tx_error_put; |
755 | 755 | ||
756 | /* mangle the packet */ | 756 | /* mangle the packet */ |
757 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) | 757 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) |
758 | goto tx_error; | 758 | goto tx_error; |
759 | ipv6_hdr(skb)->daddr = cp->daddr.in6; | 759 | ipv6_hdr(skb)->daddr = cp->daddr.in6; |
760 | 760 | ||
761 | if (!local || !skb->dev) { | 761 | if (!local || !skb->dev) { |
762 | /* drop the old route when skb is not shared */ | 762 | /* drop the old route when skb is not shared */ |
763 | skb_dst_drop(skb); | 763 | skb_dst_drop(skb); |
764 | skb_dst_set(skb, &rt->dst); | 764 | skb_dst_set(skb, &rt->dst); |
765 | } else { | 765 | } else { |
766 | /* destined to loopback, do we need to change route? */ | 766 | /* destined to loopback, do we need to change route? */ |
767 | dst_release(&rt->dst); | 767 | dst_release(&rt->dst); |
768 | } | 768 | } |
769 | 769 | ||
770 | IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); | 770 | IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); |
771 | 771 | ||
772 | /* FIXME: when application helper enlarges the packet and the length | 772 | /* FIXME: when application helper enlarges the packet and the length |
773 | is larger than the MTU of outgoing device, there will be still | 773 | is larger than the MTU of outgoing device, there will be still |
774 | MTU problem. */ | 774 | MTU problem. */ |
775 | 775 | ||
776 | /* Another hack: avoid icmp_send in ip_fragment */ | 776 | /* Another hack: avoid icmp_send in ip_fragment */ |
777 | skb->local_df = 1; | 777 | skb->local_df = 1; |
778 | 778 | ||
779 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); | 779 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
780 | 780 | ||
781 | LeaveFunction(10); | 781 | LeaveFunction(10); |
782 | return NF_STOLEN; | 782 | return NF_STOLEN; |
783 | 783 | ||
784 | tx_error_icmp: | 784 | tx_error_icmp: |
785 | dst_link_failure(skb); | 785 | dst_link_failure(skb); |
786 | tx_error: | 786 | tx_error: |
787 | LeaveFunction(10); | 787 | LeaveFunction(10); |
788 | kfree_skb(skb); | 788 | kfree_skb(skb); |
789 | return NF_STOLEN; | 789 | return NF_STOLEN; |
790 | tx_error_put: | 790 | tx_error_put: |
791 | dst_release(&rt->dst); | 791 | dst_release(&rt->dst); |
792 | goto tx_error; | 792 | goto tx_error; |
793 | } | 793 | } |
794 | #endif | 794 | #endif |
795 | 795 | ||
796 | 796 | ||
797 | /* | 797 | /* |
798 | * IP Tunneling transmitter | 798 | * IP Tunneling transmitter |
799 | * | 799 | * |
800 | * This function encapsulates the packet in a new IP packet, its | 800 | * This function encapsulates the packet in a new IP packet, its |
801 | * destination will be set to cp->daddr. Most code of this function | 801 | * destination will be set to cp->daddr. Most code of this function |
802 | * is taken from ipip.c. | 802 | * is taken from ipip.c. |
803 | * | 803 | * |
804 | * It is used in VS/TUN cluster. The load balancer selects a real | 804 | * It is used in VS/TUN cluster. The load balancer selects a real |
805 | * server from a cluster based on a scheduling algorithm, | 805 | * server from a cluster based on a scheduling algorithm, |
806 | * encapsulates the request packet and forwards it to the selected | 806 | * encapsulates the request packet and forwards it to the selected |
807 | * server. For example, all real servers are configured with | 807 | * server. For example, all real servers are configured with |
808 | * "ifconfig tunl0 <Virtual IP Address> up". When the server receives | 808 | * "ifconfig tunl0 <Virtual IP Address> up". When the server receives |
809 | * the encapsulated packet, it will decapsulate the packet, processe | 809 | * the encapsulated packet, it will decapsulate the packet, processe |
810 | * the request and return the response packets directly to the client | 810 | * the request and return the response packets directly to the client |
811 | * without passing the load balancer. This can greatly increase the | 811 | * without passing the load balancer. This can greatly increase the |
812 | * scalability of virtual server. | 812 | * scalability of virtual server. |
813 | * | 813 | * |
814 | * Used for ANY protocol | 814 | * Used for ANY protocol |
815 | */ | 815 | */ |
816 | int | 816 | int |
817 | ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 817 | ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
818 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 818 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
819 | { | 819 | { |
820 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); | 820 | struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); |
821 | struct rtable *rt; /* Route to the other host */ | 821 | struct rtable *rt; /* Route to the other host */ |
822 | __be32 saddr; /* Source for tunnel */ | 822 | __be32 saddr; /* Source for tunnel */ |
823 | struct net_device *tdev; /* Device to other host */ | 823 | struct net_device *tdev; /* Device to other host */ |
824 | struct iphdr *old_iph = ip_hdr(skb); | 824 | struct iphdr *old_iph = ip_hdr(skb); |
825 | u8 tos = old_iph->tos; | 825 | u8 tos = old_iph->tos; |
826 | __be16 df; | 826 | __be16 df; |
827 | struct iphdr *iph; /* Our new IP header */ | 827 | struct iphdr *iph; /* Our new IP header */ |
828 | unsigned int max_headroom; /* The extra header space needed */ | 828 | unsigned int max_headroom; /* The extra header space needed */ |
829 | int mtu; | 829 | int mtu; |
830 | int ret; | 830 | int ret; |
831 | 831 | ||
832 | EnterFunction(10); | 832 | EnterFunction(10); |
833 | 833 | ||
834 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, | 834 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
835 | RT_TOS(tos), IP_VS_RT_MODE_LOCAL | | 835 | RT_TOS(tos), IP_VS_RT_MODE_LOCAL | |
836 | IP_VS_RT_MODE_NON_LOCAL | | 836 | IP_VS_RT_MODE_NON_LOCAL | |
837 | IP_VS_RT_MODE_CONNECT, | 837 | IP_VS_RT_MODE_CONNECT, |
838 | &saddr))) | 838 | &saddr))) |
839 | goto tx_error_icmp; | 839 | goto tx_error_icmp; |
840 | if (rt->rt_flags & RTCF_LOCAL) { | 840 | if (rt->rt_flags & RTCF_LOCAL) { |
841 | ip_rt_put(rt); | 841 | ip_rt_put(rt); |
842 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | 842 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); |
843 | } | 843 | } |
844 | 844 | ||
845 | tdev = rt->dst.dev; | 845 | tdev = rt->dst.dev; |
846 | 846 | ||
847 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); | 847 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
848 | if (mtu < 68) { | 848 | if (mtu < 68) { |
849 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); | 849 | IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); |
850 | goto tx_error_put; | 850 | goto tx_error_put; |
851 | } | 851 | } |
852 | if (rt_is_output_route(skb_rtable(skb))) | 852 | if (rt_is_output_route(skb_rtable(skb))) |
853 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | 853 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); |
854 | 854 | ||
855 | /* Copy DF, reset fragment offset and MF */ | 855 | /* Copy DF, reset fragment offset and MF */ |
856 | df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; | 856 | df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; |
857 | 857 | ||
858 | if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { | 858 | if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { |
859 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 859 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
860 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 860 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
861 | goto tx_error_put; | 861 | goto tx_error_put; |
862 | } | 862 | } |
863 | 863 | ||
864 | /* | 864 | /* |
865 | * Okay, now see if we can stuff it in the buffer as-is. | 865 | * Okay, now see if we can stuff it in the buffer as-is. |
866 | */ | 866 | */ |
867 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); | 867 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); |
868 | 868 | ||
869 | if (skb_headroom(skb) < max_headroom | 869 | if (skb_headroom(skb) < max_headroom |
870 | || skb_cloned(skb) || skb_shared(skb)) { | 870 | || skb_cloned(skb) || skb_shared(skb)) { |
871 | struct sk_buff *new_skb = | 871 | struct sk_buff *new_skb = |
872 | skb_realloc_headroom(skb, max_headroom); | 872 | skb_realloc_headroom(skb, max_headroom); |
873 | if (!new_skb) { | 873 | if (!new_skb) { |
874 | ip_rt_put(rt); | 874 | ip_rt_put(rt); |
875 | kfree_skb(skb); | 875 | kfree_skb(skb); |
876 | IP_VS_ERR_RL("%s(): no memory\n", __func__); | 876 | IP_VS_ERR_RL("%s(): no memory\n", __func__); |
877 | return NF_STOLEN; | 877 | return NF_STOLEN; |
878 | } | 878 | } |
879 | consume_skb(skb); | 879 | consume_skb(skb); |
880 | skb = new_skb; | 880 | skb = new_skb; |
881 | old_iph = ip_hdr(skb); | 881 | old_iph = ip_hdr(skb); |
882 | } | 882 | } |
883 | 883 | ||
884 | skb->transport_header = skb->network_header; | 884 | skb->transport_header = skb->network_header; |
885 | 885 | ||
886 | /* fix old IP header checksum */ | 886 | /* fix old IP header checksum */ |
887 | ip_send_check(old_iph); | 887 | ip_send_check(old_iph); |
888 | 888 | ||
889 | skb_push(skb, sizeof(struct iphdr)); | 889 | skb_push(skb, sizeof(struct iphdr)); |
890 | skb_reset_network_header(skb); | 890 | skb_reset_network_header(skb); |
891 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 891 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
892 | 892 | ||
893 | /* drop old route */ | 893 | /* drop old route */ |
894 | skb_dst_drop(skb); | 894 | skb_dst_drop(skb); |
895 | skb_dst_set(skb, &rt->dst); | 895 | skb_dst_set(skb, &rt->dst); |
896 | 896 | ||
897 | /* | 897 | /* |
898 | * Push down and install the IPIP header. | 898 | * Push down and install the IPIP header. |
899 | */ | 899 | */ |
900 | iph = ip_hdr(skb); | 900 | iph = ip_hdr(skb); |
901 | iph->version = 4; | 901 | iph->version = 4; |
902 | iph->ihl = sizeof(struct iphdr)>>2; | 902 | iph->ihl = sizeof(struct iphdr)>>2; |
903 | iph->frag_off = df; | 903 | iph->frag_off = df; |
904 | iph->protocol = IPPROTO_IPIP; | 904 | iph->protocol = IPPROTO_IPIP; |
905 | iph->tos = tos; | 905 | iph->tos = tos; |
906 | iph->daddr = cp->daddr.ip; | 906 | iph->daddr = cp->daddr.ip; |
907 | iph->saddr = saddr; | 907 | iph->saddr = saddr; |
908 | iph->ttl = old_iph->ttl; | 908 | iph->ttl = old_iph->ttl; |
909 | ip_select_ident(iph, &rt->dst, NULL); | 909 | ip_select_ident(iph, &rt->dst, NULL); |
910 | 910 | ||
911 | /* Another hack: avoid icmp_send in ip_fragment */ | 911 | /* Another hack: avoid icmp_send in ip_fragment */ |
912 | skb->local_df = 1; | 912 | skb->local_df = 1; |
913 | 913 | ||
914 | ret = IP_VS_XMIT_TUNNEL(skb, cp); | 914 | ret = IP_VS_XMIT_TUNNEL(skb, cp); |
915 | if (ret == NF_ACCEPT) | 915 | if (ret == NF_ACCEPT) |
916 | ip_local_out(skb); | 916 | ip_local_out(skb); |
917 | else if (ret == NF_DROP) | 917 | else if (ret == NF_DROP) |
918 | kfree_skb(skb); | 918 | kfree_skb(skb); |
919 | 919 | ||
920 | LeaveFunction(10); | 920 | LeaveFunction(10); |
921 | 921 | ||
922 | return NF_STOLEN; | 922 | return NF_STOLEN; |
923 | 923 | ||
924 | tx_error_icmp: | 924 | tx_error_icmp: |
925 | dst_link_failure(skb); | 925 | dst_link_failure(skb); |
926 | tx_error: | 926 | tx_error: |
927 | kfree_skb(skb); | 927 | kfree_skb(skb); |
928 | LeaveFunction(10); | 928 | LeaveFunction(10); |
929 | return NF_STOLEN; | 929 | return NF_STOLEN; |
930 | tx_error_put: | 930 | tx_error_put: |
931 | ip_rt_put(rt); | 931 | ip_rt_put(rt); |
932 | goto tx_error; | 932 | goto tx_error; |
933 | } | 933 | } |
934 | 934 | ||
935 | #ifdef CONFIG_IP_VS_IPV6 | 935 | #ifdef CONFIG_IP_VS_IPV6 |
936 | int | 936 | int |
937 | ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 937 | ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
938 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 938 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
939 | { | 939 | { |
940 | struct rt6_info *rt; /* Route to the other host */ | 940 | struct rt6_info *rt; /* Route to the other host */ |
941 | struct in6_addr saddr; /* Source for tunnel */ | 941 | struct in6_addr saddr; /* Source for tunnel */ |
942 | struct net_device *tdev; /* Device to other host */ | 942 | struct net_device *tdev; /* Device to other host */ |
943 | struct ipv6hdr *old_iph = ipv6_hdr(skb); | 943 | struct ipv6hdr *old_iph = ipv6_hdr(skb); |
944 | struct ipv6hdr *iph; /* Our new IP header */ | 944 | struct ipv6hdr *iph; /* Our new IP header */ |
945 | unsigned int max_headroom; /* The extra header space needed */ | 945 | unsigned int max_headroom; /* The extra header space needed */ |
946 | int mtu; | 946 | int mtu; |
947 | int ret; | 947 | int ret; |
948 | 948 | ||
949 | EnterFunction(10); | 949 | EnterFunction(10); |
950 | 950 | ||
951 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, | 951 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, |
952 | &saddr, 1, (IP_VS_RT_MODE_LOCAL | | 952 | &saddr, 1, (IP_VS_RT_MODE_LOCAL | |
953 | IP_VS_RT_MODE_NON_LOCAL)))) | 953 | IP_VS_RT_MODE_NON_LOCAL)))) |
954 | goto tx_error_icmp; | 954 | goto tx_error_icmp; |
955 | if (__ip_vs_is_local_route6(rt)) { | 955 | if (__ip_vs_is_local_route6(rt)) { |
956 | dst_release(&rt->dst); | 956 | dst_release(&rt->dst); |
957 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | 957 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); |
958 | } | 958 | } |
959 | 959 | ||
960 | tdev = rt->dst.dev; | 960 | tdev = rt->dst.dev; |
961 | 961 | ||
962 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); | 962 | mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); |
963 | if (mtu < IPV6_MIN_MTU) { | 963 | if (mtu < IPV6_MIN_MTU) { |
964 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, | 964 | IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, |
965 | IPV6_MIN_MTU); | 965 | IPV6_MIN_MTU); |
966 | goto tx_error_put; | 966 | goto tx_error_put; |
967 | } | 967 | } |
968 | if (skb_dst(skb)) | 968 | if (skb_dst(skb)) |
969 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); | 969 | skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); |
970 | 970 | ||
971 | /* MTU checking: Notice that 'mtu' have been adjusted before hand */ | 971 | /* MTU checking: Notice that 'mtu' have been adjusted before hand */ |
972 | if (__mtu_check_toobig_v6(skb, mtu)) { | 972 | if (__mtu_check_toobig_v6(skb, mtu)) { |
973 | if (!skb->dev) { | 973 | if (!skb->dev) { |
974 | struct net *net = dev_net(skb_dst(skb)->dev); | 974 | struct net *net = dev_net(skb_dst(skb)->dev); |
975 | 975 | ||
976 | skb->dev = net->loopback_dev; | 976 | skb->dev = net->loopback_dev; |
977 | } | 977 | } |
978 | /* only send ICMP too big on first fragment */ | 978 | /* only send ICMP too big on first fragment */ |
979 | if (!ipvsh->fragoffs) | 979 | if (!ipvsh->fragoffs) |
980 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 980 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
981 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 981 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
982 | goto tx_error_put; | 982 | goto tx_error_put; |
983 | } | 983 | } |
984 | 984 | ||
985 | /* | 985 | /* |
986 | * Okay, now see if we can stuff it in the buffer as-is. | 986 | * Okay, now see if we can stuff it in the buffer as-is. |
987 | */ | 987 | */ |
988 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); | 988 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); |
989 | 989 | ||
990 | if (skb_headroom(skb) < max_headroom | 990 | if (skb_headroom(skb) < max_headroom |
991 | || skb_cloned(skb) || skb_shared(skb)) { | 991 | || skb_cloned(skb) || skb_shared(skb)) { |
992 | struct sk_buff *new_skb = | 992 | struct sk_buff *new_skb = |
993 | skb_realloc_headroom(skb, max_headroom); | 993 | skb_realloc_headroom(skb, max_headroom); |
994 | if (!new_skb) { | 994 | if (!new_skb) { |
995 | dst_release(&rt->dst); | 995 | dst_release(&rt->dst); |
996 | kfree_skb(skb); | 996 | kfree_skb(skb); |
997 | IP_VS_ERR_RL("%s(): no memory\n", __func__); | 997 | IP_VS_ERR_RL("%s(): no memory\n", __func__); |
998 | return NF_STOLEN; | 998 | return NF_STOLEN; |
999 | } | 999 | } |
1000 | consume_skb(skb); | 1000 | consume_skb(skb); |
1001 | skb = new_skb; | 1001 | skb = new_skb; |
1002 | old_iph = ipv6_hdr(skb); | 1002 | old_iph = ipv6_hdr(skb); |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | skb->transport_header = skb->network_header; | 1005 | skb->transport_header = skb->network_header; |
1006 | 1006 | ||
1007 | skb_push(skb, sizeof(struct ipv6hdr)); | 1007 | skb_push(skb, sizeof(struct ipv6hdr)); |
1008 | skb_reset_network_header(skb); | 1008 | skb_reset_network_header(skb); |
1009 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1009 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
1010 | 1010 | ||
1011 | /* drop old route */ | 1011 | /* drop old route */ |
1012 | skb_dst_drop(skb); | 1012 | skb_dst_drop(skb); |
1013 | skb_dst_set(skb, &rt->dst); | 1013 | skb_dst_set(skb, &rt->dst); |
1014 | 1014 | ||
1015 | /* | 1015 | /* |
1016 | * Push down and install the IPIP header. | 1016 | * Push down and install the IPIP header. |
1017 | */ | 1017 | */ |
1018 | iph = ipv6_hdr(skb); | 1018 | iph = ipv6_hdr(skb); |
1019 | iph->version = 6; | 1019 | iph->version = 6; |
1020 | iph->nexthdr = IPPROTO_IPV6; | 1020 | iph->nexthdr = IPPROTO_IPV6; |
1021 | iph->payload_len = old_iph->payload_len; | 1021 | iph->payload_len = old_iph->payload_len; |
1022 | be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); | 1022 | be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); |
1023 | iph->priority = old_iph->priority; | 1023 | iph->priority = old_iph->priority; |
1024 | memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); | 1024 | memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); |
1025 | iph->daddr = cp->daddr.in6; | 1025 | iph->daddr = cp->daddr.in6; |
1026 | iph->saddr = saddr; | 1026 | iph->saddr = saddr; |
1027 | iph->hop_limit = old_iph->hop_limit; | 1027 | iph->hop_limit = old_iph->hop_limit; |
1028 | 1028 | ||
1029 | /* Another hack: avoid icmp_send in ip_fragment */ | 1029 | /* Another hack: avoid icmp_send in ip_fragment */ |
1030 | skb->local_df = 1; | 1030 | skb->local_df = 1; |
1031 | 1031 | ||
1032 | ret = IP_VS_XMIT_TUNNEL(skb, cp); | 1032 | ret = IP_VS_XMIT_TUNNEL(skb, cp); |
1033 | if (ret == NF_ACCEPT) | 1033 | if (ret == NF_ACCEPT) |
1034 | ip6_local_out(skb); | 1034 | ip6_local_out(skb); |
1035 | else if (ret == NF_DROP) | 1035 | else if (ret == NF_DROP) |
1036 | kfree_skb(skb); | 1036 | kfree_skb(skb); |
1037 | 1037 | ||
1038 | LeaveFunction(10); | 1038 | LeaveFunction(10); |
1039 | 1039 | ||
1040 | return NF_STOLEN; | 1040 | return NF_STOLEN; |
1041 | 1041 | ||
1042 | tx_error_icmp: | 1042 | tx_error_icmp: |
1043 | dst_link_failure(skb); | 1043 | dst_link_failure(skb); |
1044 | tx_error: | 1044 | tx_error: |
1045 | kfree_skb(skb); | 1045 | kfree_skb(skb); |
1046 | LeaveFunction(10); | 1046 | LeaveFunction(10); |
1047 | return NF_STOLEN; | 1047 | return NF_STOLEN; |
1048 | tx_error_put: | 1048 | tx_error_put: |
1049 | dst_release(&rt->dst); | 1049 | dst_release(&rt->dst); |
1050 | goto tx_error; | 1050 | goto tx_error; |
1051 | } | 1051 | } |
1052 | #endif | 1052 | #endif |
1053 | 1053 | ||
1054 | 1054 | ||
1055 | /* | 1055 | /* |
1056 | * Direct Routing transmitter | 1056 | * Direct Routing transmitter |
1057 | * Used for ANY protocol | 1057 | * Used for ANY protocol |
1058 | */ | 1058 | */ |
1059 | int | 1059 | int |
1060 | ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 1060 | ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
1061 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) | 1061 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) |
1062 | { | 1062 | { |
1063 | struct rtable *rt; /* Route to the other host */ | 1063 | struct rtable *rt; /* Route to the other host */ |
1064 | struct iphdr *iph = ip_hdr(skb); | 1064 | struct iphdr *iph = ip_hdr(skb); |
1065 | int mtu; | 1065 | int mtu; |
1066 | 1066 | ||
1067 | EnterFunction(10); | 1067 | EnterFunction(10); |
1068 | 1068 | ||
1069 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, | 1069 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
1070 | RT_TOS(iph->tos), | 1070 | RT_TOS(iph->tos), |
1071 | IP_VS_RT_MODE_LOCAL | | 1071 | IP_VS_RT_MODE_LOCAL | |
1072 | IP_VS_RT_MODE_NON_LOCAL | | 1072 | IP_VS_RT_MODE_NON_LOCAL | |
1073 | IP_VS_RT_MODE_KNOWN_NH, NULL))) | 1073 | IP_VS_RT_MODE_KNOWN_NH, NULL))) |
1074 | goto tx_error_icmp; | 1074 | goto tx_error_icmp; |
1075 | if (rt->rt_flags & RTCF_LOCAL) { | 1075 | if (rt->rt_flags & RTCF_LOCAL) { |
1076 | ip_rt_put(rt); | 1076 | ip_rt_put(rt); |
1077 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); | 1077 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); |
1078 | } | 1078 | } |
1079 | 1079 | ||
1080 | /* MTU checking */ | 1080 | /* MTU checking */ |
1081 | mtu = dst_mtu(&rt->dst); | 1081 | mtu = dst_mtu(&rt->dst); |
1082 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && | 1082 | if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && |
1083 | !skb_is_gso(skb)) { | 1083 | !skb_is_gso(skb)) { |
1084 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | 1084 | icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); |
1085 | ip_rt_put(rt); | 1085 | ip_rt_put(rt); |
1086 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1086 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1087 | goto tx_error; | 1087 | goto tx_error; |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | /* | 1090 | /* |
1091 | * Call ip_send_check because we are not sure it is called | 1091 | * Call ip_send_check because we are not sure it is called |
1092 | * after ip_defrag. Is copy-on-write needed? | 1092 | * after ip_defrag. Is copy-on-write needed? |
1093 | */ | 1093 | */ |
1094 | if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { | 1094 | if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { |
1095 | ip_rt_put(rt); | 1095 | ip_rt_put(rt); |
1096 | return NF_STOLEN; | 1096 | return NF_STOLEN; |
1097 | } | 1097 | } |
1098 | ip_send_check(ip_hdr(skb)); | 1098 | ip_send_check(ip_hdr(skb)); |
1099 | 1099 | ||
1100 | /* drop old route */ | 1100 | /* drop old route */ |
1101 | skb_dst_drop(skb); | 1101 | skb_dst_drop(skb); |
1102 | skb_dst_set(skb, &rt->dst); | 1102 | skb_dst_set(skb, &rt->dst); |
1103 | 1103 | ||
1104 | /* Another hack: avoid icmp_send in ip_fragment */ | 1104 | /* Another hack: avoid icmp_send in ip_fragment */ |
1105 | skb->local_df = 1; | 1105 | skb->local_df = 1; |
1106 | 1106 | ||
1107 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); | 1107 | IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); |
1108 | 1108 | ||
1109 | LeaveFunction(10); | 1109 | LeaveFunction(10); |
1110 | return NF_STOLEN; | 1110 | return NF_STOLEN; |
1111 | 1111 | ||
1112 | tx_error_icmp: | 1112 | tx_error_icmp: |
1113 | dst_link_failure(skb); | 1113 | dst_link_failure(skb); |
1114 | tx_error: | 1114 | tx_error: |
1115 | kfree_skb(skb); | 1115 | kfree_skb(skb); |
1116 | LeaveFunction(10); | 1116 | LeaveFunction(10); |
1117 | return NF_STOLEN; | 1117 | return NF_STOLEN; |
1118 | } | 1118 | } |
1119 | 1119 | ||
1120 | #ifdef CONFIG_IP_VS_IPV6 | 1120 | #ifdef CONFIG_IP_VS_IPV6 |
1121 | int | 1121 | int |
1122 | ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 1122 | ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
1123 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) | 1123 | struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) |
1124 | { | 1124 | { |
1125 | struct rt6_info *rt; /* Route to the other host */ | 1125 | struct rt6_info *rt; /* Route to the other host */ |
1126 | int mtu; | 1126 | int mtu; |
1127 | 1127 | ||
1128 | EnterFunction(10); | 1128 | EnterFunction(10); |
1129 | 1129 | ||
1130 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, | 1130 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
1131 | 0, (IP_VS_RT_MODE_LOCAL | | 1131 | 0, (IP_VS_RT_MODE_LOCAL | |
1132 | IP_VS_RT_MODE_NON_LOCAL)))) | 1132 | IP_VS_RT_MODE_NON_LOCAL)))) |
1133 | goto tx_error_icmp; | 1133 | goto tx_error_icmp; |
1134 | if (__ip_vs_is_local_route6(rt)) { | 1134 | if (__ip_vs_is_local_route6(rt)) { |
1135 | dst_release(&rt->dst); | 1135 | dst_release(&rt->dst); |
1136 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); | 1136 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); |
1137 | } | 1137 | } |
1138 | 1138 | ||
1139 | /* MTU checking */ | 1139 | /* MTU checking */ |
1140 | mtu = dst_mtu(&rt->dst); | 1140 | mtu = dst_mtu(&rt->dst); |
1141 | if (__mtu_check_toobig_v6(skb, mtu)) { | 1141 | if (__mtu_check_toobig_v6(skb, mtu)) { |
1142 | if (!skb->dev) { | 1142 | if (!skb->dev) { |
1143 | struct net *net = dev_net(skb_dst(skb)->dev); | 1143 | struct net *net = dev_net(skb_dst(skb)->dev); |
1144 | 1144 | ||
1145 | skb->dev = net->loopback_dev; | 1145 | skb->dev = net->loopback_dev; |
1146 | } | 1146 | } |
1147 | /* only send ICMP too big on first fragment */ | 1147 | /* only send ICMP too big on first fragment */ |
1148 | if (!iph->fragoffs) | 1148 | if (!iph->fragoffs) |
1149 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1149 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
1150 | dst_release(&rt->dst); | 1150 | dst_release(&rt->dst); |
1151 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1151 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1152 | goto tx_error; | 1152 | goto tx_error; |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | /* | 1155 | /* |
1156 | * Call ip_send_check because we are not sure it is called | 1156 | * Call ip_send_check because we are not sure it is called |
1157 | * after ip_defrag. Is copy-on-write needed? | 1157 | * after ip_defrag. Is copy-on-write needed? |
1158 | */ | 1158 | */ |
1159 | skb = skb_share_check(skb, GFP_ATOMIC); | 1159 | skb = skb_share_check(skb, GFP_ATOMIC); |
1160 | if (unlikely(skb == NULL)) { | 1160 | if (unlikely(skb == NULL)) { |
1161 | dst_release(&rt->dst); | 1161 | dst_release(&rt->dst); |
1162 | return NF_STOLEN; | 1162 | return NF_STOLEN; |
1163 | } | 1163 | } |
1164 | 1164 | ||
1165 | /* drop old route */ | 1165 | /* drop old route */ |
1166 | skb_dst_drop(skb); | 1166 | skb_dst_drop(skb); |
1167 | skb_dst_set(skb, &rt->dst); | 1167 | skb_dst_set(skb, &rt->dst); |
1168 | 1168 | ||
1169 | /* Another hack: avoid icmp_send in ip_fragment */ | 1169 | /* Another hack: avoid icmp_send in ip_fragment */ |
1170 | skb->local_df = 1; | 1170 | skb->local_df = 1; |
1171 | 1171 | ||
1172 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); | 1172 | IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); |
1173 | 1173 | ||
1174 | LeaveFunction(10); | 1174 | LeaveFunction(10); |
1175 | return NF_STOLEN; | 1175 | return NF_STOLEN; |
1176 | 1176 | ||
1177 | tx_error_icmp: | 1177 | tx_error_icmp: |
1178 | dst_link_failure(skb); | 1178 | dst_link_failure(skb); |
1179 | tx_error: | 1179 | tx_error: |
1180 | kfree_skb(skb); | 1180 | kfree_skb(skb); |
1181 | LeaveFunction(10); | 1181 | LeaveFunction(10); |
1182 | return NF_STOLEN; | 1182 | return NF_STOLEN; |
1183 | } | 1183 | } |
1184 | #endif | 1184 | #endif |
1185 | 1185 | ||
1186 | 1186 | ||
1187 | /* | 1187 | /* |
1188 | * ICMP packet transmitter | 1188 | * ICMP packet transmitter |
1189 | * called by the ip_vs_in_icmp | 1189 | * called by the ip_vs_in_icmp |
1190 | */ | 1190 | */ |
1191 | int | 1191 | int |
1192 | ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | 1192 | ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
1193 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, | 1193 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, |
1194 | struct ip_vs_iphdr *iph) | 1194 | struct ip_vs_iphdr *iph) |
1195 | { | 1195 | { |
1196 | struct rtable *rt; /* Route to the other host */ | 1196 | struct rtable *rt; /* Route to the other host */ |
1197 | int mtu; | 1197 | int mtu; |
1198 | int rc; | 1198 | int rc; |
1199 | int local; | 1199 | int local; |
1200 | int rt_mode; | 1200 | int rt_mode; |
1201 | 1201 | ||
1202 | EnterFunction(10); | 1202 | EnterFunction(10); |
1203 | 1203 | ||
1204 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be | 1204 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be |
1205 | forwarded directly here, because there is no need to | 1205 | forwarded directly here, because there is no need to |
1206 | translate address/port back */ | 1206 | translate address/port back */ |
1207 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | 1207 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { |
1208 | if (cp->packet_xmit) | 1208 | if (cp->packet_xmit) |
1209 | rc = cp->packet_xmit(skb, cp, pp, iph); | 1209 | rc = cp->packet_xmit(skb, cp, pp, iph); |
1210 | else | 1210 | else |
1211 | rc = NF_ACCEPT; | 1211 | rc = NF_ACCEPT; |
1212 | /* do not touch skb anymore */ | 1212 | /* do not touch skb anymore */ |
1213 | atomic_inc(&cp->in_pkts); | 1213 | atomic_inc(&cp->in_pkts); |
1214 | goto out; | 1214 | goto out; |
1215 | } | 1215 | } |
1216 | 1216 | ||
1217 | /* | 1217 | /* |
1218 | * mangle and send the packet here (only for VS/NAT) | 1218 | * mangle and send the packet here (only for VS/NAT) |
1219 | */ | 1219 | */ |
1220 | 1220 | ||
1221 | /* LOCALNODE from FORWARD hook is not supported */ | 1221 | /* LOCALNODE from FORWARD hook is not supported */ |
1222 | rt_mode = (hooknum != NF_INET_FORWARD) ? | 1222 | rt_mode = (hooknum != NF_INET_FORWARD) ? |
1223 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | | 1223 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | |
1224 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; | 1224 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; |
1225 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, | 1225 | if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
1226 | RT_TOS(ip_hdr(skb)->tos), | 1226 | RT_TOS(ip_hdr(skb)->tos), |
1227 | rt_mode, NULL))) | 1227 | rt_mode, NULL))) |
1228 | goto tx_error_icmp; | 1228 | goto tx_error_icmp; |
1229 | local = rt->rt_flags & RTCF_LOCAL; | 1229 | local = rt->rt_flags & RTCF_LOCAL; |
1230 | 1230 | ||
1231 | /* | 1231 | /* |
1232 | * Avoid duplicate tuple in reply direction for NAT traffic | 1232 | * Avoid duplicate tuple in reply direction for NAT traffic |
1233 | * to local address when connection is sync-ed | 1233 | * to local address when connection is sync-ed |
1234 | */ | 1234 | */ |
1235 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | 1235 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
1236 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | 1236 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
1237 | enum ip_conntrack_info ctinfo; | 1237 | enum ip_conntrack_info ctinfo; |
1238 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | 1238 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
1239 | 1239 | ||
1240 | if (ct && !nf_ct_is_untracked(ct)) { | 1240 | if (ct && !nf_ct_is_untracked(ct)) { |
1241 | IP_VS_DBG(10, "%s(): " | 1241 | IP_VS_DBG(10, "%s(): " |
1242 | "stopping DNAT to local address %pI4\n", | 1242 | "stopping DNAT to local address %pI4\n", |
1243 | __func__, &cp->daddr.ip); | 1243 | __func__, &cp->daddr.ip); |
1244 | goto tx_error_put; | 1244 | goto tx_error_put; |
1245 | } | 1245 | } |
1246 | } | 1246 | } |
1247 | #endif | 1247 | #endif |
1248 | 1248 | ||
1249 | /* From world but DNAT to loopback address? */ | 1249 | /* From world but DNAT to loopback address? */ |
1250 | if (local && ipv4_is_loopback(cp->daddr.ip) && | 1250 | if (local && ipv4_is_loopback(cp->daddr.ip) && |
1251 | rt_is_input_route(skb_rtable(skb))) { | 1251 | rt_is_input_route(skb_rtable(skb))) { |
1252 | IP_VS_DBG(1, "%s(): " | 1252 | IP_VS_DBG(1, "%s(): " |
1253 | "stopping DNAT to loopback %pI4\n", | 1253 | "stopping DNAT to loopback %pI4\n", |
1254 | __func__, &cp->daddr.ip); | 1254 | __func__, &cp->daddr.ip); |
1255 | goto tx_error_put; | 1255 | goto tx_error_put; |
1256 | } | 1256 | } |
1257 | 1257 | ||
1258 | /* MTU checking */ | 1258 | /* MTU checking */ |
1259 | mtu = dst_mtu(&rt->dst); | 1259 | mtu = dst_mtu(&rt->dst); |
1260 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && | 1260 | if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && |
1261 | !skb_is_gso(skb)) { | 1261 | !skb_is_gso(skb)) { |
1262 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 1262 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
1263 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1263 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1264 | goto tx_error_put; | 1264 | goto tx_error_put; |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | /* copy-on-write the packet before mangling it */ | 1267 | /* copy-on-write the packet before mangling it */ |
1268 | if (!skb_make_writable(skb, offset)) | 1268 | if (!skb_make_writable(skb, offset)) |
1269 | goto tx_error_put; | 1269 | goto tx_error_put; |
1270 | 1270 | ||
1271 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1271 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
1272 | goto tx_error_put; | 1272 | goto tx_error_put; |
1273 | 1273 | ||
1274 | ip_vs_nat_icmp(skb, pp, cp, 0); | 1274 | ip_vs_nat_icmp(skb, pp, cp, 0); |
1275 | 1275 | ||
1276 | if (!local) { | 1276 | if (!local) { |
1277 | /* drop the old route when skb is not shared */ | 1277 | /* drop the old route when skb is not shared */ |
1278 | skb_dst_drop(skb); | 1278 | skb_dst_drop(skb); |
1279 | skb_dst_set(skb, &rt->dst); | 1279 | skb_dst_set(skb, &rt->dst); |
1280 | } else { | 1280 | } else { |
1281 | ip_rt_put(rt); | 1281 | ip_rt_put(rt); |
1282 | /* | 1282 | /* |
1283 | * Some IPv4 replies get local address from routes, | 1283 | * Some IPv4 replies get local address from routes, |
1284 | * not from iph, so while we DNAT after routing | 1284 | * not from iph, so while we DNAT after routing |
1285 | * we need this second input/output route. | 1285 | * we need this second input/output route. |
1286 | */ | 1286 | */ |
1287 | if (!__ip_vs_reroute_locally(skb)) | 1287 | if (!__ip_vs_reroute_locally(skb)) |
1288 | goto tx_error; | 1288 | goto tx_error; |
1289 | } | 1289 | } |
1290 | 1290 | ||
1291 | /* Another hack: avoid icmp_send in ip_fragment */ | 1291 | /* Another hack: avoid icmp_send in ip_fragment */ |
1292 | skb->local_df = 1; | 1292 | skb->local_df = 1; |
1293 | 1293 | ||
1294 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); | 1294 | IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); |
1295 | 1295 | ||
1296 | rc = NF_STOLEN; | 1296 | rc = NF_STOLEN; |
1297 | goto out; | 1297 | goto out; |
1298 | 1298 | ||
1299 | tx_error_icmp: | 1299 | tx_error_icmp: |
1300 | dst_link_failure(skb); | 1300 | dst_link_failure(skb); |
1301 | tx_error: | 1301 | tx_error: |
1302 | dev_kfree_skb(skb); | 1302 | dev_kfree_skb(skb); |
1303 | rc = NF_STOLEN; | 1303 | rc = NF_STOLEN; |
1304 | out: | 1304 | out: |
1305 | LeaveFunction(10); | 1305 | LeaveFunction(10); |
1306 | return rc; | 1306 | return rc; |
1307 | tx_error_put: | 1307 | tx_error_put: |
1308 | ip_rt_put(rt); | 1308 | ip_rt_put(rt); |
1309 | goto tx_error; | 1309 | goto tx_error; |
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | #ifdef CONFIG_IP_VS_IPV6 | 1312 | #ifdef CONFIG_IP_VS_IPV6 |
1313 | int | 1313 | int |
1314 | ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, | 1314 | ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
1315 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, | 1315 | struct ip_vs_protocol *pp, int offset, unsigned int hooknum, |
1316 | struct ip_vs_iphdr *iph) | 1316 | struct ip_vs_iphdr *iph) |
1317 | { | 1317 | { |
1318 | struct rt6_info *rt; /* Route to the other host */ | 1318 | struct rt6_info *rt; /* Route to the other host */ |
1319 | int mtu; | 1319 | int mtu; |
1320 | int rc; | 1320 | int rc; |
1321 | int local; | 1321 | int local; |
1322 | int rt_mode; | 1322 | int rt_mode; |
1323 | 1323 | ||
1324 | EnterFunction(10); | 1324 | EnterFunction(10); |
1325 | 1325 | ||
1326 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be | 1326 | /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be |
1327 | forwarded directly here, because there is no need to | 1327 | forwarded directly here, because there is no need to |
1328 | translate address/port back */ | 1328 | translate address/port back */ |
1329 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | 1329 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { |
1330 | if (cp->packet_xmit) | 1330 | if (cp->packet_xmit) |
1331 | rc = cp->packet_xmit(skb, cp, pp, iph); | 1331 | rc = cp->packet_xmit(skb, cp, pp, iph); |
1332 | else | 1332 | else |
1333 | rc = NF_ACCEPT; | 1333 | rc = NF_ACCEPT; |
1334 | /* do not touch skb anymore */ | 1334 | /* do not touch skb anymore */ |
1335 | atomic_inc(&cp->in_pkts); | 1335 | atomic_inc(&cp->in_pkts); |
1336 | goto out; | 1336 | goto out; |
1337 | } | 1337 | } |
1338 | 1338 | ||
1339 | /* | 1339 | /* |
1340 | * mangle and send the packet here (only for VS/NAT) | 1340 | * mangle and send the packet here (only for VS/NAT) |
1341 | */ | 1341 | */ |
1342 | 1342 | ||
1343 | /* LOCALNODE from FORWARD hook is not supported */ | 1343 | /* LOCALNODE from FORWARD hook is not supported */ |
1344 | rt_mode = (hooknum != NF_INET_FORWARD) ? | 1344 | rt_mode = (hooknum != NF_INET_FORWARD) ? |
1345 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | | 1345 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | |
1346 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; | 1346 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; |
1347 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, | 1347 | if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
1348 | 0, rt_mode))) | 1348 | 0, rt_mode))) |
1349 | goto tx_error_icmp; | 1349 | goto tx_error_icmp; |
1350 | 1350 | ||
1351 | local = __ip_vs_is_local_route6(rt); | 1351 | local = __ip_vs_is_local_route6(rt); |
1352 | /* | 1352 | /* |
1353 | * Avoid duplicate tuple in reply direction for NAT traffic | 1353 | * Avoid duplicate tuple in reply direction for NAT traffic |
1354 | * to local address when connection is sync-ed | 1354 | * to local address when connection is sync-ed |
1355 | */ | 1355 | */ |
1356 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | 1356 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
1357 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { | 1357 | if (cp->flags & IP_VS_CONN_F_SYNC && local) { |
1358 | enum ip_conntrack_info ctinfo; | 1358 | enum ip_conntrack_info ctinfo; |
1359 | struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); | 1359 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
1360 | 1360 | ||
1361 | if (ct && !nf_ct_is_untracked(ct)) { | 1361 | if (ct && !nf_ct_is_untracked(ct)) { |
1362 | IP_VS_DBG(10, "%s(): " | 1362 | IP_VS_DBG(10, "%s(): " |
1363 | "stopping DNAT to local address %pI6\n", | 1363 | "stopping DNAT to local address %pI6\n", |
1364 | __func__, &cp->daddr.in6); | 1364 | __func__, &cp->daddr.in6); |
1365 | goto tx_error_put; | 1365 | goto tx_error_put; |
1366 | } | 1366 | } |
1367 | } | 1367 | } |
1368 | #endif | 1368 | #endif |
1369 | 1369 | ||
1370 | /* From world but DNAT to loopback address? */ | 1370 | /* From world but DNAT to loopback address? */ |
1371 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && | 1371 | if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && |
1372 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { | 1372 | ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { |
1373 | IP_VS_DBG(1, "%s(): " | 1373 | IP_VS_DBG(1, "%s(): " |
1374 | "stopping DNAT to loopback %pI6\n", | 1374 | "stopping DNAT to loopback %pI6\n", |
1375 | __func__, &cp->daddr.in6); | 1375 | __func__, &cp->daddr.in6); |
1376 | goto tx_error_put; | 1376 | goto tx_error_put; |
1377 | } | 1377 | } |
1378 | 1378 | ||
1379 | /* MTU checking */ | 1379 | /* MTU checking */ |
1380 | mtu = dst_mtu(&rt->dst); | 1380 | mtu = dst_mtu(&rt->dst); |
1381 | if (__mtu_check_toobig_v6(skb, mtu)) { | 1381 | if (__mtu_check_toobig_v6(skb, mtu)) { |
1382 | if (!skb->dev) { | 1382 | if (!skb->dev) { |
1383 | struct net *net = dev_net(skb_dst(skb)->dev); | 1383 | struct net *net = dev_net(skb_dst(skb)->dev); |
1384 | 1384 | ||
1385 | skb->dev = net->loopback_dev; | 1385 | skb->dev = net->loopback_dev; |
1386 | } | 1386 | } |
1387 | /* only send ICMP too big on first fragment */ | 1387 | /* only send ICMP too big on first fragment */ |
1388 | if (!iph->fragoffs) | 1388 | if (!iph->fragoffs) |
1389 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); | 1389 | icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
1390 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); | 1390 | IP_VS_DBG_RL("%s(): frag needed\n", __func__); |
1391 | goto tx_error_put; | 1391 | goto tx_error_put; |
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | /* copy-on-write the packet before mangling it */ | 1394 | /* copy-on-write the packet before mangling it */ |
1395 | if (!skb_make_writable(skb, offset)) | 1395 | if (!skb_make_writable(skb, offset)) |
1396 | goto tx_error_put; | 1396 | goto tx_error_put; |
1397 | 1397 | ||
1398 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) | 1398 | if (skb_cow(skb, rt->dst.dev->hard_header_len)) |
1399 | goto tx_error_put; | 1399 | goto tx_error_put; |
1400 | 1400 | ||
1401 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); | 1401 | ip_vs_nat_icmp_v6(skb, pp, cp, 0); |
1402 | 1402 | ||
1403 | if (!local || !skb->dev) { | 1403 | if (!local || !skb->dev) { |
1404 | /* drop the old route when skb is not shared */ | 1404 | /* drop the old route when skb is not shared */ |
1405 | skb_dst_drop(skb); | 1405 | skb_dst_drop(skb); |
1406 | skb_dst_set(skb, &rt->dst); | 1406 | skb_dst_set(skb, &rt->dst); |
1407 | } else { | 1407 | } else { |
1408 | /* destined to loopback, do we need to change route? */ | 1408 | /* destined to loopback, do we need to change route? */ |
1409 | dst_release(&rt->dst); | 1409 | dst_release(&rt->dst); |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | /* Another hack: avoid icmp_send in ip_fragment */ | 1412 | /* Another hack: avoid icmp_send in ip_fragment */ |
1413 | skb->local_df = 1; | 1413 | skb->local_df = 1; |
1414 | 1414 | ||
1415 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); | 1415 | IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); |
1416 | 1416 | ||
1417 | rc = NF_STOLEN; | 1417 | rc = NF_STOLEN; |
1418 | goto out; | 1418 | goto out; |
1419 | 1419 | ||
1420 | tx_error_icmp: | 1420 | tx_error_icmp: |
1421 | dst_link_failure(skb); | 1421 | dst_link_failure(skb); |
1422 | tx_error: | 1422 | tx_error: |
1423 | dev_kfree_skb(skb); | 1423 | dev_kfree_skb(skb); |
1424 | rc = NF_STOLEN; | 1424 | rc = NF_STOLEN; |
1425 | out: | 1425 | out: |
1426 | LeaveFunction(10); | 1426 | LeaveFunction(10); |
1427 | return rc; | 1427 | return rc; |
1428 | tx_error_put: | 1428 | tx_error_put: |
1429 | dst_release(&rt->dst); | 1429 | dst_release(&rt->dst); |
1430 | goto tx_error; | 1430 | goto tx_error; |
1431 | } | 1431 | } |
1432 | #endif | 1432 | #endif |
1433 | 1433 |