Commit 1f305323ff5b9ddc1a4346d36072bcdb58f3f68a

Authored by Evgeniy Polyakov
Committed by David S. Miller
1 parent 0a06ea8718

[NETFILTER]: Fix kernel panic with REDIRECT target.

When connection tracking entry (nf_conn) is about to copy itself it can
have some of its extension users (like nat) as being already freed and
thus not required to be copied.

Actually looking at this function I suspect it was copied from
nf_nat_setup_info() and thus bug was introduced.

Report and testing from David <david@unsolicited.net>.

[ Patrick McHardy states:

	I now understand whats happening:

	- new connection is allocated without helper
	- connection is REDIRECTed to localhost
	- nf_nat_setup_info adds NAT extension, but doesn't initialize it yet
	- nf_conntrack_alter_reply performs a helper lookup based on the
	   new tuple, finds the SIP helper and allocates a helper extension,
	   causing reallocation because of too little space
	- nf_nat_move_storage is called with the uninitialized nat extension

	So your fix is entirely correct, thanks a lot :)  ]

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 1 additions and 4 deletions Inline Diff

net/ipv4/netfilter/nf_nat_core.c
1 /* NAT for netfilter; shared with compatibility layer. */ 1 /* NAT for netfilter; shared with compatibility layer. */
2 2
3 /* (C) 1999-2001 Paul `Rusty' Russell 3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11 #include <linux/module.h> 11 #include <linux/module.h>
12 #include <linux/types.h> 12 #include <linux/types.h>
13 #include <linux/timer.h> 13 #include <linux/timer.h>
14 #include <linux/skbuff.h> 14 #include <linux/skbuff.h>
15 #include <net/checksum.h> 15 #include <net/checksum.h>
16 #include <net/icmp.h> 16 #include <net/icmp.h>
17 #include <net/ip.h> 17 #include <net/ip.h>
18 #include <net/tcp.h> /* For tcp_prot in getorigdst */ 18 #include <net/tcp.h> /* For tcp_prot in getorigdst */
19 #include <linux/icmp.h> 19 #include <linux/icmp.h>
20 #include <linux/udp.h> 20 #include <linux/udp.h>
21 #include <linux/jhash.h> 21 #include <linux/jhash.h>
22 22
23 #include <linux/netfilter_ipv4.h> 23 #include <linux/netfilter_ipv4.h>
24 #include <net/netfilter/nf_conntrack.h> 24 #include <net/netfilter/nf_conntrack.h>
25 #include <net/netfilter/nf_conntrack_core.h> 25 #include <net/netfilter/nf_conntrack_core.h>
26 #include <net/netfilter/nf_nat.h> 26 #include <net/netfilter/nf_nat.h>
27 #include <net/netfilter/nf_nat_protocol.h> 27 #include <net/netfilter/nf_nat_protocol.h>
28 #include <net/netfilter/nf_nat_core.h> 28 #include <net/netfilter/nf_nat_core.h>
29 #include <net/netfilter/nf_nat_helper.h> 29 #include <net/netfilter/nf_nat_helper.h>
30 #include <net/netfilter/nf_conntrack_helper.h> 30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_l3proto.h> 31 #include <net/netfilter/nf_conntrack_l3proto.h>
32 #include <net/netfilter/nf_conntrack_l4proto.h> 32 #include <net/netfilter/nf_conntrack_l4proto.h>
33 33
34 static DEFINE_RWLOCK(nf_nat_lock); 34 static DEFINE_RWLOCK(nf_nat_lock);
35 35
36 static struct nf_conntrack_l3proto *l3proto = NULL; 36 static struct nf_conntrack_l3proto *l3proto = NULL;
37 37
38 /* Calculated at init based on memory size */ 38 /* Calculated at init based on memory size */
39 static unsigned int nf_nat_htable_size; 39 static unsigned int nf_nat_htable_size;
40 static int nf_nat_vmalloced; 40 static int nf_nat_vmalloced;
41 41
42 static struct hlist_head *bysource; 42 static struct hlist_head *bysource;
43 43
44 #define MAX_IP_NAT_PROTO 256 44 #define MAX_IP_NAT_PROTO 256
45 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; 45 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
46 46
47 static inline struct nf_nat_protocol * 47 static inline struct nf_nat_protocol *
48 __nf_nat_proto_find(u_int8_t protonum) 48 __nf_nat_proto_find(u_int8_t protonum)
49 { 49 {
50 return rcu_dereference(nf_nat_protos[protonum]); 50 return rcu_dereference(nf_nat_protos[protonum]);
51 } 51 }
52 52
53 struct nf_nat_protocol * 53 struct nf_nat_protocol *
54 nf_nat_proto_find_get(u_int8_t protonum) 54 nf_nat_proto_find_get(u_int8_t protonum)
55 { 55 {
56 struct nf_nat_protocol *p; 56 struct nf_nat_protocol *p;
57 57
58 rcu_read_lock(); 58 rcu_read_lock();
59 p = __nf_nat_proto_find(protonum); 59 p = __nf_nat_proto_find(protonum);
60 if (!try_module_get(p->me)) 60 if (!try_module_get(p->me))
61 p = &nf_nat_unknown_protocol; 61 p = &nf_nat_unknown_protocol;
62 rcu_read_unlock(); 62 rcu_read_unlock();
63 63
64 return p; 64 return p;
65 } 65 }
66 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); 66 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
67 67
68 void 68 void
69 nf_nat_proto_put(struct nf_nat_protocol *p) 69 nf_nat_proto_put(struct nf_nat_protocol *p)
70 { 70 {
71 module_put(p->me); 71 module_put(p->me);
72 } 72 }
73 EXPORT_SYMBOL_GPL(nf_nat_proto_put); 73 EXPORT_SYMBOL_GPL(nf_nat_proto_put);
74 74
75 /* We keep an extra hash for each conntrack, for fast searching. */ 75 /* We keep an extra hash for each conntrack, for fast searching. */
76 static inline unsigned int 76 static inline unsigned int
77 hash_by_src(const struct nf_conntrack_tuple *tuple) 77 hash_by_src(const struct nf_conntrack_tuple *tuple)
78 { 78 {
79 /* Original src, to ensure we map it consistently if poss. */ 79 /* Original src, to ensure we map it consistently if poss. */
80 return jhash_3words((__force u32)tuple->src.u3.ip, 80 return jhash_3words((__force u32)tuple->src.u3.ip,
81 (__force u32)tuple->src.u.all, 81 (__force u32)tuple->src.u.all,
82 tuple->dst.protonum, 0) % nf_nat_htable_size; 82 tuple->dst.protonum, 0) % nf_nat_htable_size;
83 } 83 }
84 84
85 /* Is this tuple already taken? (not by us) */ 85 /* Is this tuple already taken? (not by us) */
86 int 86 int
87 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 87 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
88 const struct nf_conn *ignored_conntrack) 88 const struct nf_conn *ignored_conntrack)
89 { 89 {
90 /* Conntrack tracking doesn't keep track of outgoing tuples; only 90 /* Conntrack tracking doesn't keep track of outgoing tuples; only
91 incoming ones. NAT means they don't have a fixed mapping, 91 incoming ones. NAT means they don't have a fixed mapping,
92 so we invert the tuple and look for the incoming reply. 92 so we invert the tuple and look for the incoming reply.
93 93
94 We could keep a separate hash if this proves too slow. */ 94 We could keep a separate hash if this proves too slow. */
95 struct nf_conntrack_tuple reply; 95 struct nf_conntrack_tuple reply;
96 96
97 nf_ct_invert_tuplepr(&reply, tuple); 97 nf_ct_invert_tuplepr(&reply, tuple);
98 return nf_conntrack_tuple_taken(&reply, ignored_conntrack); 98 return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
99 } 99 }
100 EXPORT_SYMBOL(nf_nat_used_tuple); 100 EXPORT_SYMBOL(nf_nat_used_tuple);
101 101
102 /* If we source map this tuple so reply looks like reply_tuple, will 102 /* If we source map this tuple so reply looks like reply_tuple, will
103 * that meet the constraints of range. */ 103 * that meet the constraints of range. */
104 static int 104 static int
105 in_range(const struct nf_conntrack_tuple *tuple, 105 in_range(const struct nf_conntrack_tuple *tuple,
106 const struct nf_nat_range *range) 106 const struct nf_nat_range *range)
107 { 107 {
108 struct nf_nat_protocol *proto; 108 struct nf_nat_protocol *proto;
109 int ret = 0; 109 int ret = 0;
110 110
111 /* If we are supposed to map IPs, then we must be in the 111 /* If we are supposed to map IPs, then we must be in the
112 range specified, otherwise let this drag us onto a new src IP. */ 112 range specified, otherwise let this drag us onto a new src IP. */
113 if (range->flags & IP_NAT_RANGE_MAP_IPS) { 113 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
114 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || 114 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
115 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) 115 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
116 return 0; 116 return 0;
117 } 117 }
118 118
119 rcu_read_lock(); 119 rcu_read_lock();
120 proto = __nf_nat_proto_find(tuple->dst.protonum); 120 proto = __nf_nat_proto_find(tuple->dst.protonum);
121 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 121 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
122 proto->in_range(tuple, IP_NAT_MANIP_SRC, 122 proto->in_range(tuple, IP_NAT_MANIP_SRC,
123 &range->min, &range->max)) 123 &range->min, &range->max))
124 ret = 1; 124 ret = 1;
125 rcu_read_unlock(); 125 rcu_read_unlock();
126 126
127 return ret; 127 return ret;
128 } 128 }
129 129
130 static inline int 130 static inline int
131 same_src(const struct nf_conn *ct, 131 same_src(const struct nf_conn *ct,
132 const struct nf_conntrack_tuple *tuple) 132 const struct nf_conntrack_tuple *tuple)
133 { 133 {
134 const struct nf_conntrack_tuple *t; 134 const struct nf_conntrack_tuple *t;
135 135
136 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 136 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
137 return (t->dst.protonum == tuple->dst.protonum && 137 return (t->dst.protonum == tuple->dst.protonum &&
138 t->src.u3.ip == tuple->src.u3.ip && 138 t->src.u3.ip == tuple->src.u3.ip &&
139 t->src.u.all == tuple->src.u.all); 139 t->src.u.all == tuple->src.u.all);
140 } 140 }
141 141
142 /* Only called for SRC manip */ 142 /* Only called for SRC manip */
143 static int 143 static int
144 find_appropriate_src(const struct nf_conntrack_tuple *tuple, 144 find_appropriate_src(const struct nf_conntrack_tuple *tuple,
145 struct nf_conntrack_tuple *result, 145 struct nf_conntrack_tuple *result,
146 const struct nf_nat_range *range) 146 const struct nf_nat_range *range)
147 { 147 {
148 unsigned int h = hash_by_src(tuple); 148 unsigned int h = hash_by_src(tuple);
149 struct nf_conn_nat *nat; 149 struct nf_conn_nat *nat;
150 struct nf_conn *ct; 150 struct nf_conn *ct;
151 struct hlist_node *n; 151 struct hlist_node *n;
152 152
153 read_lock_bh(&nf_nat_lock); 153 read_lock_bh(&nf_nat_lock);
154 hlist_for_each_entry(nat, n, &bysource[h], bysource) { 154 hlist_for_each_entry(nat, n, &bysource[h], bysource) {
155 ct = nat->ct; 155 ct = nat->ct;
156 if (same_src(ct, tuple)) { 156 if (same_src(ct, tuple)) {
157 /* Copy source part from reply tuple. */ 157 /* Copy source part from reply tuple. */
158 nf_ct_invert_tuplepr(result, 158 nf_ct_invert_tuplepr(result,
159 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 159 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
160 result->dst = tuple->dst; 160 result->dst = tuple->dst;
161 161
162 if (in_range(result, range)) { 162 if (in_range(result, range)) {
163 read_unlock_bh(&nf_nat_lock); 163 read_unlock_bh(&nf_nat_lock);
164 return 1; 164 return 1;
165 } 165 }
166 } 166 }
167 } 167 }
168 read_unlock_bh(&nf_nat_lock); 168 read_unlock_bh(&nf_nat_lock);
169 return 0; 169 return 0;
170 } 170 }
171 171
172 /* For [FUTURE] fragmentation handling, we want the least-used 172 /* For [FUTURE] fragmentation handling, we want the least-used
173 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus 173 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
174 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports 174 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
175 1-65535, we don't do pro-rata allocation based on ports; we choose 175 1-65535, we don't do pro-rata allocation based on ports; we choose
176 the ip with the lowest src-ip/dst-ip/proto usage. 176 the ip with the lowest src-ip/dst-ip/proto usage.
177 */ 177 */
178 static void 178 static void
179 find_best_ips_proto(struct nf_conntrack_tuple *tuple, 179 find_best_ips_proto(struct nf_conntrack_tuple *tuple,
180 const struct nf_nat_range *range, 180 const struct nf_nat_range *range,
181 const struct nf_conn *ct, 181 const struct nf_conn *ct,
182 enum nf_nat_manip_type maniptype) 182 enum nf_nat_manip_type maniptype)
183 { 183 {
184 __be32 *var_ipp; 184 __be32 *var_ipp;
185 /* Host order */ 185 /* Host order */
186 u_int32_t minip, maxip, j; 186 u_int32_t minip, maxip, j;
187 187
188 /* No IP mapping? Do nothing. */ 188 /* No IP mapping? Do nothing. */
189 if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) 189 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
190 return; 190 return;
191 191
192 if (maniptype == IP_NAT_MANIP_SRC) 192 if (maniptype == IP_NAT_MANIP_SRC)
193 var_ipp = &tuple->src.u3.ip; 193 var_ipp = &tuple->src.u3.ip;
194 else 194 else
195 var_ipp = &tuple->dst.u3.ip; 195 var_ipp = &tuple->dst.u3.ip;
196 196
197 /* Fast path: only one choice. */ 197 /* Fast path: only one choice. */
198 if (range->min_ip == range->max_ip) { 198 if (range->min_ip == range->max_ip) {
199 *var_ipp = range->min_ip; 199 *var_ipp = range->min_ip;
200 return; 200 return;
201 } 201 }
202 202
203 /* Hashing source and destination IPs gives a fairly even 203 /* Hashing source and destination IPs gives a fairly even
204 * spread in practice (if there are a small number of IPs 204 * spread in practice (if there are a small number of IPs
205 * involved, there usually aren't that many connections 205 * involved, there usually aren't that many connections
206 * anyway). The consistency means that servers see the same 206 * anyway). The consistency means that servers see the same
207 * client coming from the same IP (some Internet Banking sites 207 * client coming from the same IP (some Internet Banking sites
208 * like this), even across reboots. */ 208 * like this), even across reboots. */
209 minip = ntohl(range->min_ip); 209 minip = ntohl(range->min_ip);
210 maxip = ntohl(range->max_ip); 210 maxip = ntohl(range->max_ip);
211 j = jhash_2words((__force u32)tuple->src.u3.ip, 211 j = jhash_2words((__force u32)tuple->src.u3.ip,
212 (__force u32)tuple->dst.u3.ip, 0); 212 (__force u32)tuple->dst.u3.ip, 0);
213 *var_ipp = htonl(minip + j % (maxip - minip + 1)); 213 *var_ipp = htonl(minip + j % (maxip - minip + 1));
214 } 214 }
215 215
216 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, 216 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
217 * we change the source to map into the range. For NF_IP_PRE_ROUTING 217 * we change the source to map into the range. For NF_IP_PRE_ROUTING
218 * and NF_IP_LOCAL_OUT, we change the destination to map into the 218 * and NF_IP_LOCAL_OUT, we change the destination to map into the
219 * range. It might not be possible to get a unique tuple, but we try. 219 * range. It might not be possible to get a unique tuple, but we try.
220 * At worst (or if we race), we will end up with a final duplicate in 220 * At worst (or if we race), we will end up with a final duplicate in
221 * __ip_conntrack_confirm and drop the packet. */ 221 * __ip_conntrack_confirm and drop the packet. */
222 static void 222 static void
223 get_unique_tuple(struct nf_conntrack_tuple *tuple, 223 get_unique_tuple(struct nf_conntrack_tuple *tuple,
224 const struct nf_conntrack_tuple *orig_tuple, 224 const struct nf_conntrack_tuple *orig_tuple,
225 const struct nf_nat_range *range, 225 const struct nf_nat_range *range,
226 struct nf_conn *ct, 226 struct nf_conn *ct,
227 enum nf_nat_manip_type maniptype) 227 enum nf_nat_manip_type maniptype)
228 { 228 {
229 struct nf_nat_protocol *proto; 229 struct nf_nat_protocol *proto;
230 230
231 /* 1) If this srcip/proto/src-proto-part is currently mapped, 231 /* 1) If this srcip/proto/src-proto-part is currently mapped,
232 and that same mapping gives a unique tuple within the given 232 and that same mapping gives a unique tuple within the given
233 range, use that. 233 range, use that.
234 234
235 This is only required for source (ie. NAT/masq) mappings. 235 This is only required for source (ie. NAT/masq) mappings.
236 So far, we don't do local source mappings, so multiple 236 So far, we don't do local source mappings, so multiple
237 manips not an issue. */ 237 manips not an issue. */
238 if (maniptype == IP_NAT_MANIP_SRC) { 238 if (maniptype == IP_NAT_MANIP_SRC) {
239 if (find_appropriate_src(orig_tuple, tuple, range)) { 239 if (find_appropriate_src(orig_tuple, tuple, range)) {
240 pr_debug("get_unique_tuple: Found current src map\n"); 240 pr_debug("get_unique_tuple: Found current src map\n");
241 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 241 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
242 if (!nf_nat_used_tuple(tuple, ct)) 242 if (!nf_nat_used_tuple(tuple, ct))
243 return; 243 return;
244 } 244 }
245 } 245 }
246 246
247 /* 2) Select the least-used IP/proto combination in the given 247 /* 2) Select the least-used IP/proto combination in the given
248 range. */ 248 range. */
249 *tuple = *orig_tuple; 249 *tuple = *orig_tuple;
250 find_best_ips_proto(tuple, range, ct, maniptype); 250 find_best_ips_proto(tuple, range, ct, maniptype);
251 251
252 /* 3) The per-protocol part of the manip is made to map into 252 /* 3) The per-protocol part of the manip is made to map into
253 the range to make a unique tuple. */ 253 the range to make a unique tuple. */
254 254
255 rcu_read_lock(); 255 rcu_read_lock();
256 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 256 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
257 257
258 /* Change protocol info to have some randomization */ 258 /* Change protocol info to have some randomization */
259 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { 259 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
260 proto->unique_tuple(tuple, range, maniptype, ct); 260 proto->unique_tuple(tuple, range, maniptype, ct);
261 goto out; 261 goto out;
262 } 262 }
263 263
264 /* Only bother mapping if it's not already in range and unique */ 264 /* Only bother mapping if it's not already in range and unique */
265 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 265 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
266 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 266 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
267 !nf_nat_used_tuple(tuple, ct)) 267 !nf_nat_used_tuple(tuple, ct))
268 goto out; 268 goto out;
269 269
270 /* Last change: get protocol to try to obtain unique tuple. */ 270 /* Last change: get protocol to try to obtain unique tuple. */
271 proto->unique_tuple(tuple, range, maniptype, ct); 271 proto->unique_tuple(tuple, range, maniptype, ct);
272 out: 272 out:
273 rcu_read_unlock(); 273 rcu_read_unlock();
274 } 274 }
275 275
276 unsigned int 276 unsigned int
277 nf_nat_setup_info(struct nf_conn *ct, 277 nf_nat_setup_info(struct nf_conn *ct,
278 const struct nf_nat_range *range, 278 const struct nf_nat_range *range,
279 unsigned int hooknum) 279 unsigned int hooknum)
280 { 280 {
281 struct nf_conntrack_tuple curr_tuple, new_tuple; 281 struct nf_conntrack_tuple curr_tuple, new_tuple;
282 struct nf_conn_nat *nat; 282 struct nf_conn_nat *nat;
283 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); 283 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
284 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 284 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
285 285
286 /* nat helper or nfctnetlink also setup binding */ 286 /* nat helper or nfctnetlink also setup binding */
287 nat = nfct_nat(ct); 287 nat = nfct_nat(ct);
288 if (!nat) { 288 if (!nat) {
289 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 289 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
290 if (nat == NULL) { 290 if (nat == NULL) {
291 pr_debug("failed to add NAT extension\n"); 291 pr_debug("failed to add NAT extension\n");
292 return NF_ACCEPT; 292 return NF_ACCEPT;
293 } 293 }
294 } 294 }
295 295
296 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 296 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
297 hooknum == NF_IP_POST_ROUTING || 297 hooknum == NF_IP_POST_ROUTING ||
298 hooknum == NF_IP_LOCAL_IN || 298 hooknum == NF_IP_LOCAL_IN ||
299 hooknum == NF_IP_LOCAL_OUT); 299 hooknum == NF_IP_LOCAL_OUT);
300 BUG_ON(nf_nat_initialized(ct, maniptype)); 300 BUG_ON(nf_nat_initialized(ct, maniptype));
301 301
302 /* What we've got will look like inverse of reply. Normally 302 /* What we've got will look like inverse of reply. Normally
303 this is what is in the conntrack, except for prior 303 this is what is in the conntrack, except for prior
304 manipulations (future optimization: if num_manips == 0, 304 manipulations (future optimization: if num_manips == 0,
305 orig_tp = 305 orig_tp =
306 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ 306 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
307 nf_ct_invert_tuplepr(&curr_tuple, 307 nf_ct_invert_tuplepr(&curr_tuple,
308 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 308 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
309 309
310 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); 310 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
311 311
312 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { 312 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
313 struct nf_conntrack_tuple reply; 313 struct nf_conntrack_tuple reply;
314 314
315 /* Alter conntrack table so will recognize replies. */ 315 /* Alter conntrack table so will recognize replies. */
316 nf_ct_invert_tuplepr(&reply, &new_tuple); 316 nf_ct_invert_tuplepr(&reply, &new_tuple);
317 nf_conntrack_alter_reply(ct, &reply); 317 nf_conntrack_alter_reply(ct, &reply);
318 318
319 /* Non-atomic: we own this at the moment. */ 319 /* Non-atomic: we own this at the moment. */
320 if (maniptype == IP_NAT_MANIP_SRC) 320 if (maniptype == IP_NAT_MANIP_SRC)
321 ct->status |= IPS_SRC_NAT; 321 ct->status |= IPS_SRC_NAT;
322 else 322 else
323 ct->status |= IPS_DST_NAT; 323 ct->status |= IPS_DST_NAT;
324 } 324 }
325 325
326 /* Place in source hash if this is the first time. */ 326 /* Place in source hash if this is the first time. */
327 if (have_to_hash) { 327 if (have_to_hash) {
328 unsigned int srchash; 328 unsigned int srchash;
329 329
330 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 330 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
331 write_lock_bh(&nf_nat_lock); 331 write_lock_bh(&nf_nat_lock);
332 /* nf_conntrack_alter_reply might re-allocate exntension aera */ 332 /* nf_conntrack_alter_reply might re-allocate exntension aera */
333 nat = nfct_nat(ct); 333 nat = nfct_nat(ct);
334 nat->ct = ct; 334 nat->ct = ct;
335 hlist_add_head(&nat->bysource, &bysource[srchash]); 335 hlist_add_head(&nat->bysource, &bysource[srchash]);
336 write_unlock_bh(&nf_nat_lock); 336 write_unlock_bh(&nf_nat_lock);
337 } 337 }
338 338
339 /* It's done. */ 339 /* It's done. */
340 if (maniptype == IP_NAT_MANIP_DST) 340 if (maniptype == IP_NAT_MANIP_DST)
341 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 341 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
342 else 342 else
343 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 343 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
344 344
345 return NF_ACCEPT; 345 return NF_ACCEPT;
346 } 346 }
347 EXPORT_SYMBOL(nf_nat_setup_info); 347 EXPORT_SYMBOL(nf_nat_setup_info);
348 348
349 /* Returns true if succeeded. */ 349 /* Returns true if succeeded. */
350 static int 350 static int
351 manip_pkt(u_int16_t proto, 351 manip_pkt(u_int16_t proto,
352 struct sk_buff *skb, 352 struct sk_buff *skb,
353 unsigned int iphdroff, 353 unsigned int iphdroff,
354 const struct nf_conntrack_tuple *target, 354 const struct nf_conntrack_tuple *target,
355 enum nf_nat_manip_type maniptype) 355 enum nf_nat_manip_type maniptype)
356 { 356 {
357 struct iphdr *iph; 357 struct iphdr *iph;
358 struct nf_nat_protocol *p; 358 struct nf_nat_protocol *p;
359 359
360 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 360 if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
361 return 0; 361 return 0;
362 362
363 iph = (void *)skb->data + iphdroff; 363 iph = (void *)skb->data + iphdroff;
364 364
365 /* Manipulate protcol part. */ 365 /* Manipulate protcol part. */
366 366
367 /* rcu_read_lock()ed by nf_hook_slow */ 367 /* rcu_read_lock()ed by nf_hook_slow */
368 p = __nf_nat_proto_find(proto); 368 p = __nf_nat_proto_find(proto);
369 if (!p->manip_pkt(skb, iphdroff, target, maniptype)) 369 if (!p->manip_pkt(skb, iphdroff, target, maniptype))
370 return 0; 370 return 0;
371 371
372 iph = (void *)skb->data + iphdroff; 372 iph = (void *)skb->data + iphdroff;
373 373
374 if (maniptype == IP_NAT_MANIP_SRC) { 374 if (maniptype == IP_NAT_MANIP_SRC) {
375 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 375 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
376 iph->saddr = target->src.u3.ip; 376 iph->saddr = target->src.u3.ip;
377 } else { 377 } else {
378 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 378 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
379 iph->daddr = target->dst.u3.ip; 379 iph->daddr = target->dst.u3.ip;
380 } 380 }
381 return 1; 381 return 1;
382 } 382 }
383 383
384 /* Do packet manipulations according to nf_nat_setup_info. */ 384 /* Do packet manipulations according to nf_nat_setup_info. */
385 unsigned int nf_nat_packet(struct nf_conn *ct, 385 unsigned int nf_nat_packet(struct nf_conn *ct,
386 enum ip_conntrack_info ctinfo, 386 enum ip_conntrack_info ctinfo,
387 unsigned int hooknum, 387 unsigned int hooknum,
388 struct sk_buff *skb) 388 struct sk_buff *skb)
389 { 389 {
390 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 390 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
391 unsigned long statusbit; 391 unsigned long statusbit;
392 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); 392 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
393 393
394 if (mtype == IP_NAT_MANIP_SRC) 394 if (mtype == IP_NAT_MANIP_SRC)
395 statusbit = IPS_SRC_NAT; 395 statusbit = IPS_SRC_NAT;
396 else 396 else
397 statusbit = IPS_DST_NAT; 397 statusbit = IPS_DST_NAT;
398 398
399 /* Invert if this is reply dir. */ 399 /* Invert if this is reply dir. */
400 if (dir == IP_CT_DIR_REPLY) 400 if (dir == IP_CT_DIR_REPLY)
401 statusbit ^= IPS_NAT_MASK; 401 statusbit ^= IPS_NAT_MASK;
402 402
403 /* Non-atomic: these bits don't change. */ 403 /* Non-atomic: these bits don't change. */
404 if (ct->status & statusbit) { 404 if (ct->status & statusbit) {
405 struct nf_conntrack_tuple target; 405 struct nf_conntrack_tuple target;
406 406
407 /* We are aiming to look like inverse of other direction. */ 407 /* We are aiming to look like inverse of other direction. */
408 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 408 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
409 409
410 if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) 410 if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
411 return NF_DROP; 411 return NF_DROP;
412 } 412 }
413 return NF_ACCEPT; 413 return NF_ACCEPT;
414 } 414 }
415 EXPORT_SYMBOL_GPL(nf_nat_packet); 415 EXPORT_SYMBOL_GPL(nf_nat_packet);
416 416
417 /* Dir is direction ICMP is coming from (opposite to packet it contains) */ 417 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
418 int nf_nat_icmp_reply_translation(struct nf_conn *ct, 418 int nf_nat_icmp_reply_translation(struct nf_conn *ct,
419 enum ip_conntrack_info ctinfo, 419 enum ip_conntrack_info ctinfo,
420 unsigned int hooknum, 420 unsigned int hooknum,
421 struct sk_buff *skb) 421 struct sk_buff *skb)
422 { 422 {
423 struct { 423 struct {
424 struct icmphdr icmp; 424 struct icmphdr icmp;
425 struct iphdr ip; 425 struct iphdr ip;
426 } *inside; 426 } *inside;
427 struct nf_conntrack_l4proto *l4proto; 427 struct nf_conntrack_l4proto *l4proto;
428 struct nf_conntrack_tuple inner, target; 428 struct nf_conntrack_tuple inner, target;
429 int hdrlen = ip_hdrlen(skb); 429 int hdrlen = ip_hdrlen(skb);
430 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 430 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
431 unsigned long statusbit; 431 unsigned long statusbit;
432 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 432 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
433 433
434 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 434 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
435 return 0; 435 return 0;
436 436
437 inside = (void *)skb->data + ip_hdrlen(skb); 437 inside = (void *)skb->data + ip_hdrlen(skb);
438 438
439 /* We're actually going to mangle it beyond trivial checksum 439 /* We're actually going to mangle it beyond trivial checksum
440 adjustment, so make sure the current checksum is correct. */ 440 adjustment, so make sure the current checksum is correct. */
441 if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) 441 if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
442 return 0; 442 return 0;
443 443
444 /* Must be RELATED */ 444 /* Must be RELATED */
445 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || 445 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
446 skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); 446 skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
447 447
448 /* Redirects on non-null nats must be dropped, else they'll 448 /* Redirects on non-null nats must be dropped, else they'll
449 start talking to each other without our translation, and be 449 start talking to each other without our translation, and be
450 confused... --RR */ 450 confused... --RR */
451 if (inside->icmp.type == ICMP_REDIRECT) { 451 if (inside->icmp.type == ICMP_REDIRECT) {
452 /* If NAT isn't finished, assume it and drop. */ 452 /* If NAT isn't finished, assume it and drop. */
453 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 453 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
454 return 0; 454 return 0;
455 455
456 if (ct->status & IPS_NAT_MASK) 456 if (ct->status & IPS_NAT_MASK)
457 return 0; 457 return 0;
458 } 458 }
459 459
460 pr_debug("icmp_reply_translation: translating error %p manip %u " 460 pr_debug("icmp_reply_translation: translating error %p manip %u "
461 "dir %s\n", skb, manip, 461 "dir %s\n", skb, manip,
462 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 462 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
463 463
464 /* rcu_read_lock()ed by nf_hook_slow */ 464 /* rcu_read_lock()ed by nf_hook_slow */
465 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 465 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
466 466
467 if (!nf_ct_get_tuple(skb, 467 if (!nf_ct_get_tuple(skb,
468 ip_hdrlen(skb) + sizeof(struct icmphdr), 468 ip_hdrlen(skb) + sizeof(struct icmphdr),
469 (ip_hdrlen(skb) + 469 (ip_hdrlen(skb) +
470 sizeof(struct icmphdr) + inside->ip.ihl * 4), 470 sizeof(struct icmphdr) + inside->ip.ihl * 4),
471 (u_int16_t)AF_INET, 471 (u_int16_t)AF_INET,
472 inside->ip.protocol, 472 inside->ip.protocol,
473 &inner, l3proto, l4proto)) 473 &inner, l3proto, l4proto))
474 return 0; 474 return 0;
475 475
476 /* Change inner back to look like incoming packet. We do the 476 /* Change inner back to look like incoming packet. We do the
477 opposite manip on this hook to normal, because it might not 477 opposite manip on this hook to normal, because it might not
478 pass all hooks (locally-generated ICMP). Consider incoming 478 pass all hooks (locally-generated ICMP). Consider incoming
479 packet: PREROUTING (DST manip), routing produces ICMP, goes 479 packet: PREROUTING (DST manip), routing produces ICMP, goes
480 through POSTROUTING (which must correct the DST manip). */ 480 through POSTROUTING (which must correct the DST manip). */
481 if (!manip_pkt(inside->ip.protocol, skb, 481 if (!manip_pkt(inside->ip.protocol, skb,
482 ip_hdrlen(skb) + sizeof(inside->icmp), 482 ip_hdrlen(skb) + sizeof(inside->icmp),
483 &ct->tuplehash[!dir].tuple, 483 &ct->tuplehash[!dir].tuple,
484 !manip)) 484 !manip))
485 return 0; 485 return 0;
486 486
487 if (skb->ip_summed != CHECKSUM_PARTIAL) { 487 if (skb->ip_summed != CHECKSUM_PARTIAL) {
488 /* Reloading "inside" here since manip_pkt inner. */ 488 /* Reloading "inside" here since manip_pkt inner. */
489 inside = (void *)skb->data + ip_hdrlen(skb); 489 inside = (void *)skb->data + ip_hdrlen(skb);
490 inside->icmp.checksum = 0; 490 inside->icmp.checksum = 0;
491 inside->icmp.checksum = 491 inside->icmp.checksum =
492 csum_fold(skb_checksum(skb, hdrlen, 492 csum_fold(skb_checksum(skb, hdrlen,
493 skb->len - hdrlen, 0)); 493 skb->len - hdrlen, 0));
494 } 494 }
495 495
496 /* Change outer to look the reply to an incoming packet 496 /* Change outer to look the reply to an incoming packet
497 * (proto 0 means don't invert per-proto part). */ 497 * (proto 0 means don't invert per-proto part). */
498 if (manip == IP_NAT_MANIP_SRC) 498 if (manip == IP_NAT_MANIP_SRC)
499 statusbit = IPS_SRC_NAT; 499 statusbit = IPS_SRC_NAT;
500 else 500 else
501 statusbit = IPS_DST_NAT; 501 statusbit = IPS_DST_NAT;
502 502
503 /* Invert if this is reply dir. */ 503 /* Invert if this is reply dir. */
504 if (dir == IP_CT_DIR_REPLY) 504 if (dir == IP_CT_DIR_REPLY)
505 statusbit ^= IPS_NAT_MASK; 505 statusbit ^= IPS_NAT_MASK;
506 506
507 if (ct->status & statusbit) { 507 if (ct->status & statusbit) {
508 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 508 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
509 if (!manip_pkt(0, skb, 0, &target, manip)) 509 if (!manip_pkt(0, skb, 0, &target, manip))
510 return 0; 510 return 0;
511 } 511 }
512 512
513 return 1; 513 return 1;
514 } 514 }
515 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 515 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
516 516
517 /* Protocol registration. */ 517 /* Protocol registration. */
518 int nf_nat_protocol_register(struct nf_nat_protocol *proto) 518 int nf_nat_protocol_register(struct nf_nat_protocol *proto)
519 { 519 {
520 int ret = 0; 520 int ret = 0;
521 521
522 write_lock_bh(&nf_nat_lock); 522 write_lock_bh(&nf_nat_lock);
523 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 523 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
524 ret = -EBUSY; 524 ret = -EBUSY;
525 goto out; 525 goto out;
526 } 526 }
527 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 527 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
528 out: 528 out:
529 write_unlock_bh(&nf_nat_lock); 529 write_unlock_bh(&nf_nat_lock);
530 return ret; 530 return ret;
531 } 531 }
532 EXPORT_SYMBOL(nf_nat_protocol_register); 532 EXPORT_SYMBOL(nf_nat_protocol_register);
533 533
534 /* Noone stores the protocol anywhere; simply delete it. */ 534 /* Noone stores the protocol anywhere; simply delete it. */
535 void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) 535 void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
536 { 536 {
537 write_lock_bh(&nf_nat_lock); 537 write_lock_bh(&nf_nat_lock);
538 rcu_assign_pointer(nf_nat_protos[proto->protonum], 538 rcu_assign_pointer(nf_nat_protos[proto->protonum],
539 &nf_nat_unknown_protocol); 539 &nf_nat_unknown_protocol);
540 write_unlock_bh(&nf_nat_lock); 540 write_unlock_bh(&nf_nat_lock);
541 synchronize_rcu(); 541 synchronize_rcu();
542 } 542 }
543 EXPORT_SYMBOL(nf_nat_protocol_unregister); 543 EXPORT_SYMBOL(nf_nat_protocol_unregister);
544 544
545 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 545 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
546 int 546 int
547 nf_nat_port_range_to_nlattr(struct sk_buff *skb, 547 nf_nat_port_range_to_nlattr(struct sk_buff *skb,
548 const struct nf_nat_range *range) 548 const struct nf_nat_range *range)
549 { 549 {
550 NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), 550 NLA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
551 &range->min.tcp.port); 551 &range->min.tcp.port);
552 NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), 552 NLA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
553 &range->max.tcp.port); 553 &range->max.tcp.port);
554 554
555 return 0; 555 return 0;
556 556
557 nla_put_failure: 557 nla_put_failure:
558 return -1; 558 return -1;
559 } 559 }
560 EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range); 560 EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range);
561 561
562 int 562 int
563 nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) 563 nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
564 { 564 {
565 int ret = 0; 565 int ret = 0;
566 566
567 /* we have to return whether we actually parsed something or not */ 567 /* we have to return whether we actually parsed something or not */
568 568
569 if (tb[CTA_PROTONAT_PORT_MIN]) { 569 if (tb[CTA_PROTONAT_PORT_MIN]) {
570 ret = 1; 570 ret = 1;
571 range->min.tcp.port = 571 range->min.tcp.port =
572 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]); 572 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MIN]);
573 } 573 }
574 574
575 if (!tb[CTA_PROTONAT_PORT_MAX]) { 575 if (!tb[CTA_PROTONAT_PORT_MAX]) {
576 if (ret) 576 if (ret)
577 range->max.tcp.port = range->min.tcp.port; 577 range->max.tcp.port = range->min.tcp.port;
578 } else { 578 } else {
579 ret = 1; 579 ret = 1;
580 range->max.tcp.port = 580 range->max.tcp.port =
581 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]); 581 *(__be16 *)nla_data(tb[CTA_PROTONAT_PORT_MAX]);
582 } 582 }
583 583
584 return ret; 584 return ret;
585 } 585 }
586 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr); 586 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr);
587 #endif 587 #endif
588 588
589 /* Noone using conntrack by the time this called. */ 589 /* Noone using conntrack by the time this called. */
590 static void nf_nat_cleanup_conntrack(struct nf_conn *ct) 590 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
591 { 591 {
592 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); 592 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
593 593
594 if (nat == NULL || nat->ct == NULL) 594 if (nat == NULL || nat->ct == NULL)
595 return; 595 return;
596 596
597 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 597 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
598 598
599 write_lock_bh(&nf_nat_lock); 599 write_lock_bh(&nf_nat_lock);
600 hlist_del(&nat->bysource); 600 hlist_del(&nat->bysource);
601 nat->ct = NULL; 601 nat->ct = NULL;
602 write_unlock_bh(&nf_nat_lock); 602 write_unlock_bh(&nf_nat_lock);
603 } 603 }
604 604
605 static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) 605 static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
606 { 606 {
607 struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT); 607 struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
608 struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old; 608 struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old;
609 struct nf_conn *ct = old_nat->ct; 609 struct nf_conn *ct = old_nat->ct;
610 unsigned int srchash;
611 610
612 if (!(ct->status & IPS_NAT_DONE_MASK)) 611 if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
613 return; 612 return;
614
615 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
616 613
617 write_lock_bh(&nf_nat_lock); 614 write_lock_bh(&nf_nat_lock);
618 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 615 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
619 new_nat->ct = ct; 616 new_nat->ct = ct;
620 write_unlock_bh(&nf_nat_lock); 617 write_unlock_bh(&nf_nat_lock);
621 } 618 }
622 619
623 static struct nf_ct_ext_type nat_extend __read_mostly = { 620 static struct nf_ct_ext_type nat_extend __read_mostly = {
624 .len = sizeof(struct nf_conn_nat), 621 .len = sizeof(struct nf_conn_nat),
625 .align = __alignof__(struct nf_conn_nat), 622 .align = __alignof__(struct nf_conn_nat),
626 .destroy = nf_nat_cleanup_conntrack, 623 .destroy = nf_nat_cleanup_conntrack,
627 .move = nf_nat_move_storage, 624 .move = nf_nat_move_storage,
628 .id = NF_CT_EXT_NAT, 625 .id = NF_CT_EXT_NAT,
629 .flags = NF_CT_EXT_F_PREALLOC, 626 .flags = NF_CT_EXT_F_PREALLOC,
630 }; 627 };
631 628
632 static int __init nf_nat_init(void) 629 static int __init nf_nat_init(void)
633 { 630 {
634 size_t i; 631 size_t i;
635 int ret; 632 int ret;
636 633
637 ret = nf_ct_extend_register(&nat_extend); 634 ret = nf_ct_extend_register(&nat_extend);
638 if (ret < 0) { 635 if (ret < 0) {
639 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 636 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
640 return ret; 637 return ret;
641 } 638 }
642 639
643 /* Leave them the same for the moment. */ 640 /* Leave them the same for the moment. */
644 nf_nat_htable_size = nf_conntrack_htable_size; 641 nf_nat_htable_size = nf_conntrack_htable_size;
645 642
646 bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 643 bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
647 &nf_nat_vmalloced); 644 &nf_nat_vmalloced);
648 if (!bysource) { 645 if (!bysource) {
649 ret = -ENOMEM; 646 ret = -ENOMEM;
650 goto cleanup_extend; 647 goto cleanup_extend;
651 } 648 }
652 649
653 /* Sew in builtin protocols. */ 650 /* Sew in builtin protocols. */
654 write_lock_bh(&nf_nat_lock); 651 write_lock_bh(&nf_nat_lock);
655 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 652 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
656 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 653 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
657 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 654 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
658 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 655 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
659 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 656 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
660 write_unlock_bh(&nf_nat_lock); 657 write_unlock_bh(&nf_nat_lock);
661 658
662 for (i = 0; i < nf_nat_htable_size; i++) { 659 for (i = 0; i < nf_nat_htable_size; i++) {
663 INIT_HLIST_HEAD(&bysource[i]); 660 INIT_HLIST_HEAD(&bysource[i]);
664 } 661 }
665 662
666 /* Initialize fake conntrack so that NAT will skip it */ 663 /* Initialize fake conntrack so that NAT will skip it */
667 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 664 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
668 665
669 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 666 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
670 return 0; 667 return 0;
671 668
672 cleanup_extend: 669 cleanup_extend:
673 nf_ct_extend_unregister(&nat_extend); 670 nf_ct_extend_unregister(&nat_extend);
674 return ret; 671 return ret;
675 } 672 }
676 673
677 /* Clear NAT section of all conntracks, in case we're loaded again. */ 674 /* Clear NAT section of all conntracks, in case we're loaded again. */
678 static int clean_nat(struct nf_conn *i, void *data) 675 static int clean_nat(struct nf_conn *i, void *data)
679 { 676 {
680 struct nf_conn_nat *nat = nfct_nat(i); 677 struct nf_conn_nat *nat = nfct_nat(i);
681 678
682 if (!nat) 679 if (!nat)
683 return 0; 680 return 0;
684 memset(nat, 0, sizeof(*nat)); 681 memset(nat, 0, sizeof(*nat));
685 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); 682 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
686 return 0; 683 return 0;
687 } 684 }
688 685
689 static void __exit nf_nat_cleanup(void) 686 static void __exit nf_nat_cleanup(void)
690 { 687 {
691 nf_ct_iterate_cleanup(&clean_nat, NULL); 688 nf_ct_iterate_cleanup(&clean_nat, NULL);
692 synchronize_rcu(); 689 synchronize_rcu();
693 nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); 690 nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
694 nf_ct_l3proto_put(l3proto); 691 nf_ct_l3proto_put(l3proto);
695 nf_ct_extend_unregister(&nat_extend); 692 nf_ct_extend_unregister(&nat_extend);
696 } 693 }
697 694
698 MODULE_LICENSE("GPL"); 695 MODULE_LICENSE("GPL");
699 696
700 module_init(nf_nat_init); 697 module_init(nf_nat_init);
701 module_exit(nf_nat_cleanup); 698 module_exit(nf_nat_cleanup);
702 699