Commit 2d59e5ca8c7113ad91452f0f9259a4b55ee90323

Authored by Yasuyuki Kozakai
Committed by David S. Miller
1 parent e54cbc1f91

[NETFILTER]: nf_nat: use extension infrastructure

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 6 changed files with 73 additions and 36 deletions Inline Diff

include/net/netfilter/nf_conntrack.h
1 /* 1 /*
2 * Connection state tracking for netfilter. This is separated from, 2 * Connection state tracking for netfilter. This is separated from,
3 * but required by, the (future) NAT layer; it can also be used by an iptables 3 * but required by, the (future) NAT layer; it can also be used by an iptables
4 * extension. 4 * extension.
5 * 5 *
6 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 6 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
7 * - generalize L3 protocol dependent part. 7 * - generalize L3 protocol dependent part.
8 * 8 *
9 * Derived from include/linux/netfiter_ipv4/ip_conntrack.h 9 * Derived from include/linux/netfiter_ipv4/ip_conntrack.h
10 */ 10 */
11 11
12 #ifndef _NF_CONNTRACK_H 12 #ifndef _NF_CONNTRACK_H
13 #define _NF_CONNTRACK_H 13 #define _NF_CONNTRACK_H
14 14
15 #include <linux/netfilter/nf_conntrack_common.h> 15 #include <linux/netfilter/nf_conntrack_common.h>
16 16
17 #ifdef __KERNEL__ 17 #ifdef __KERNEL__
18 #include <linux/bitops.h> 18 #include <linux/bitops.h>
19 #include <linux/compiler.h> 19 #include <linux/compiler.h>
20 #include <asm/atomic.h> 20 #include <asm/atomic.h>
21 21
22 #include <linux/netfilter/nf_conntrack_tcp.h> 22 #include <linux/netfilter/nf_conntrack_tcp.h>
23 #include <linux/netfilter/nf_conntrack_sctp.h> 23 #include <linux/netfilter/nf_conntrack_sctp.h>
24 #include <linux/netfilter/nf_conntrack_proto_gre.h> 24 #include <linux/netfilter/nf_conntrack_proto_gre.h>
25 #include <net/netfilter/ipv4/nf_conntrack_icmp.h> 25 #include <net/netfilter/ipv4/nf_conntrack_icmp.h>
26 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 26 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
27 27
28 #include <net/netfilter/nf_conntrack_tuple.h> 28 #include <net/netfilter/nf_conntrack_tuple.h>
29 29
30 /* per conntrack: protocol private data */ 30 /* per conntrack: protocol private data */
31 union nf_conntrack_proto { 31 union nf_conntrack_proto {
32 /* insert conntrack proto private data here */ 32 /* insert conntrack proto private data here */
33 struct ip_ct_sctp sctp; 33 struct ip_ct_sctp sctp;
34 struct ip_ct_tcp tcp; 34 struct ip_ct_tcp tcp;
35 struct ip_ct_icmp icmp; 35 struct ip_ct_icmp icmp;
36 struct nf_ct_icmpv6 icmpv6; 36 struct nf_ct_icmpv6 icmpv6;
37 struct nf_ct_gre gre; 37 struct nf_ct_gre gre;
38 }; 38 };
39 39
40 union nf_conntrack_expect_proto { 40 union nf_conntrack_expect_proto {
41 /* insert expect proto private data here */ 41 /* insert expect proto private data here */
42 }; 42 };
43 43
44 /* Add protocol helper include file here */ 44 /* Add protocol helper include file here */
45 #include <linux/netfilter/nf_conntrack_ftp.h> 45 #include <linux/netfilter/nf_conntrack_ftp.h>
46 #include <linux/netfilter/nf_conntrack_pptp.h> 46 #include <linux/netfilter/nf_conntrack_pptp.h>
47 #include <linux/netfilter/nf_conntrack_h323.h> 47 #include <linux/netfilter/nf_conntrack_h323.h>
48 #include <linux/netfilter/nf_conntrack_sane.h> 48 #include <linux/netfilter/nf_conntrack_sane.h>
49 49
50 /* per conntrack: application helper private data */ 50 /* per conntrack: application helper private data */
51 union nf_conntrack_help { 51 union nf_conntrack_help {
52 /* insert conntrack helper private data (master) here */ 52 /* insert conntrack helper private data (master) here */
53 struct nf_ct_ftp_master ct_ftp_info; 53 struct nf_ct_ftp_master ct_ftp_info;
54 struct nf_ct_pptp_master ct_pptp_info; 54 struct nf_ct_pptp_master ct_pptp_info;
55 struct nf_ct_h323_master ct_h323_info; 55 struct nf_ct_h323_master ct_h323_info;
56 struct nf_ct_sane_master ct_sane_info; 56 struct nf_ct_sane_master ct_sane_info;
57 }; 57 };
58 58
59 #include <linux/types.h> 59 #include <linux/types.h>
60 #include <linux/skbuff.h> 60 #include <linux/skbuff.h>
61 #include <linux/timer.h> 61 #include <linux/timer.h>
62 62
63 #ifdef CONFIG_NETFILTER_DEBUG 63 #ifdef CONFIG_NETFILTER_DEBUG
64 #define NF_CT_ASSERT(x) \ 64 #define NF_CT_ASSERT(x) \
65 do { \ 65 do { \
66 if (!(x)) \ 66 if (!(x)) \
67 /* Wooah! I'm tripping my conntrack in a frenzy of \ 67 /* Wooah! I'm tripping my conntrack in a frenzy of \
68 netplay... */ \ 68 netplay... */ \
69 printk("NF_CT_ASSERT: %s:%i(%s)\n", \ 69 printk("NF_CT_ASSERT: %s:%i(%s)\n", \
70 __FILE__, __LINE__, __FUNCTION__); \ 70 __FILE__, __LINE__, __FUNCTION__); \
71 } while(0) 71 } while(0)
72 #else 72 #else
73 #define NF_CT_ASSERT(x) 73 #define NF_CT_ASSERT(x)
74 #endif 74 #endif
75 75
76 struct nf_conntrack_helper; 76 struct nf_conntrack_helper;
77 77
78 /* nf_conn feature for connections that have a helper */ 78 /* nf_conn feature for connections that have a helper */
79 struct nf_conn_help { 79 struct nf_conn_help {
80 /* Helper. if any */ 80 /* Helper. if any */
81 struct nf_conntrack_helper *helper; 81 struct nf_conntrack_helper *helper;
82 82
83 union nf_conntrack_help help; 83 union nf_conntrack_help help;
84 84
85 /* Current number of expected connections */ 85 /* Current number of expected connections */
86 unsigned int expecting; 86 unsigned int expecting;
87 }; 87 };
88 88
89 89
90 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 90 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
91 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 91 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
92 92
93 struct nf_conn 93 struct nf_conn
94 { 94 {
95 /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, 95 /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
96 plus 1 for any connection(s) we are `master' for */ 96 plus 1 for any connection(s) we are `master' for */
97 struct nf_conntrack ct_general; 97 struct nf_conntrack ct_general;
98 98
99 /* XXX should I move this to the tail ? - Y.K */ 99 /* XXX should I move this to the tail ? - Y.K */
100 /* These are my tuples; original and reply */ 100 /* These are my tuples; original and reply */
101 struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; 101 struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
102 102
103 /* Have we seen traffic both ways yet? (bitset) */ 103 /* Have we seen traffic both ways yet? (bitset) */
104 unsigned long status; 104 unsigned long status;
105 105
106 /* If we were expected by an expectation, this will be it */ 106 /* If we were expected by an expectation, this will be it */
107 struct nf_conn *master; 107 struct nf_conn *master;
108 108
109 /* Timer function; drops refcnt when it goes off. */ 109 /* Timer function; drops refcnt when it goes off. */
110 struct timer_list timeout; 110 struct timer_list timeout;
111 111
112 #ifdef CONFIG_NF_CT_ACCT 112 #ifdef CONFIG_NF_CT_ACCT
113 /* Accounting Information (same cache line as other written members) */ 113 /* Accounting Information (same cache line as other written members) */
114 struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; 114 struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
115 #endif 115 #endif
116 116
117 /* Unique ID that identifies this conntrack*/ 117 /* Unique ID that identifies this conntrack*/
118 unsigned int id; 118 unsigned int id;
119 119
120 /* features - nat, helper, ... used by allocating system */ 120 /* features - nat, helper, ... used by allocating system */
121 u_int32_t features; 121 u_int32_t features;
122 122
123 #if defined(CONFIG_NF_CONNTRACK_MARK) 123 #if defined(CONFIG_NF_CONNTRACK_MARK)
124 u_int32_t mark; 124 u_int32_t mark;
125 #endif 125 #endif
126 126
127 #ifdef CONFIG_NF_CONNTRACK_SECMARK 127 #ifdef CONFIG_NF_CONNTRACK_SECMARK
128 u_int32_t secmark; 128 u_int32_t secmark;
129 #endif 129 #endif
130 130
131 /* Storage reserved for other modules: */ 131 /* Storage reserved for other modules: */
132 union nf_conntrack_proto proto; 132 union nf_conntrack_proto proto;
133 133
134 /* Extensions */ 134 /* Extensions */
135 struct nf_ct_ext *ext; 135 struct nf_ct_ext *ext;
136 136
137 /* features dynamically at the end: helper, nat (both optional) */ 137 /* features dynamically at the end: helper, nat (both optional) */
138 char data[0]; 138 char data[0];
139 }; 139 };
140 140
141 static inline struct nf_conn * 141 static inline struct nf_conn *
142 nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) 142 nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash)
143 { 143 {
144 return container_of(hash, struct nf_conn, 144 return container_of(hash, struct nf_conn,
145 tuplehash[hash->tuple.dst.dir]); 145 tuplehash[hash->tuple.dst.dir]);
146 } 146 }
147 147
148 /* get master conntrack via master expectation */ 148 /* get master conntrack via master expectation */
149 #define master_ct(conntr) (conntr->master) 149 #define master_ct(conntr) (conntr->master)
150 150
151 /* Alter reply tuple (maybe alter helper). */ 151 /* Alter reply tuple (maybe alter helper). */
152 extern void 152 extern void
153 nf_conntrack_alter_reply(struct nf_conn *conntrack, 153 nf_conntrack_alter_reply(struct nf_conn *conntrack,
154 const struct nf_conntrack_tuple *newreply); 154 const struct nf_conntrack_tuple *newreply);
155 155
156 /* Is this tuple taken? (ignoring any belonging to the given 156 /* Is this tuple taken? (ignoring any belonging to the given
157 conntrack). */ 157 conntrack). */
158 extern int 158 extern int
159 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 159 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
160 const struct nf_conn *ignored_conntrack); 160 const struct nf_conn *ignored_conntrack);
161 161
162 /* Return conntrack_info and tuple hash for given skb. */ 162 /* Return conntrack_info and tuple hash for given skb. */
163 static inline struct nf_conn * 163 static inline struct nf_conn *
164 nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) 164 nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
165 { 165 {
166 *ctinfo = skb->nfctinfo; 166 *ctinfo = skb->nfctinfo;
167 return (struct nf_conn *)skb->nfct; 167 return (struct nf_conn *)skb->nfct;
168 } 168 }
169 169
170 /* decrement reference count on a conntrack */ 170 /* decrement reference count on a conntrack */
171 static inline void nf_ct_put(struct nf_conn *ct) 171 static inline void nf_ct_put(struct nf_conn *ct)
172 { 172 {
173 NF_CT_ASSERT(ct); 173 NF_CT_ASSERT(ct);
174 nf_conntrack_put(&ct->ct_general); 174 nf_conntrack_put(&ct->ct_general);
175 } 175 }
176 176
177 /* Protocol module loading */ 177 /* Protocol module loading */
178 extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); 178 extern int nf_ct_l3proto_try_module_get(unsigned short l3proto);
179 extern void nf_ct_l3proto_module_put(unsigned short l3proto); 179 extern void nf_ct_l3proto_module_put(unsigned short l3proto);
180 180
181 extern struct nf_conntrack_tuple_hash * 181 extern struct nf_conntrack_tuple_hash *
182 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, 182 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
183 const struct nf_conn *ignored_conntrack); 183 const struct nf_conn *ignored_conntrack);
184 184
185 extern void nf_conntrack_hash_insert(struct nf_conn *ct); 185 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
186 186
187 extern void nf_conntrack_flush(void); 187 extern void nf_conntrack_flush(void);
188 188
189 extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 189 extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
190 const struct nf_conntrack_tuple *orig); 190 const struct nf_conntrack_tuple *orig);
191 191
192 extern void __nf_ct_refresh_acct(struct nf_conn *ct, 192 extern void __nf_ct_refresh_acct(struct nf_conn *ct,
193 enum ip_conntrack_info ctinfo, 193 enum ip_conntrack_info ctinfo,
194 const struct sk_buff *skb, 194 const struct sk_buff *skb,
195 unsigned long extra_jiffies, 195 unsigned long extra_jiffies,
196 int do_acct); 196 int do_acct);
197 197
198 /* Refresh conntrack for this many jiffies and do accounting */ 198 /* Refresh conntrack for this many jiffies and do accounting */
199 static inline void nf_ct_refresh_acct(struct nf_conn *ct, 199 static inline void nf_ct_refresh_acct(struct nf_conn *ct,
200 enum ip_conntrack_info ctinfo, 200 enum ip_conntrack_info ctinfo,
201 const struct sk_buff *skb, 201 const struct sk_buff *skb,
202 unsigned long extra_jiffies) 202 unsigned long extra_jiffies)
203 { 203 {
204 __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); 204 __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
205 } 205 }
206 206
207 /* Refresh conntrack for this many jiffies */ 207 /* Refresh conntrack for this many jiffies */
208 static inline void nf_ct_refresh(struct nf_conn *ct, 208 static inline void nf_ct_refresh(struct nf_conn *ct,
209 const struct sk_buff *skb, 209 const struct sk_buff *skb,
210 unsigned long extra_jiffies) 210 unsigned long extra_jiffies)
211 { 211 {
212 __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); 212 __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
213 } 213 }
214 214
215 /* These are for NAT. Icky. */ 215 /* These are for NAT. Icky. */
216 /* Update TCP window tracking data when NAT mangles the packet */ 216 /* Update TCP window tracking data when NAT mangles the packet */
217 extern void nf_conntrack_tcp_update(struct sk_buff *skb, 217 extern void nf_conntrack_tcp_update(struct sk_buff *skb,
218 unsigned int dataoff, 218 unsigned int dataoff,
219 struct nf_conn *conntrack, 219 struct nf_conn *conntrack,
220 int dir); 220 int dir);
221 221
222 /* Call me when a conntrack is destroyed. */ 222 /* Call me when a conntrack is destroyed. */
223 extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); 223 extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack);
224 224
225 /* Fake conntrack entry for untracked connections */ 225 /* Fake conntrack entry for untracked connections */
226 extern struct nf_conn nf_conntrack_untracked; 226 extern struct nf_conn nf_conntrack_untracked;
227 227
228 extern int nf_ct_no_defrag; 228 extern int nf_ct_no_defrag;
229 229
230 /* Iterate over all conntracks: if iter returns true, it's deleted. */ 230 /* Iterate over all conntracks: if iter returns true, it's deleted. */
231 extern void 231 extern void
232 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); 232 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data);
233 extern void nf_conntrack_free(struct nf_conn *ct); 233 extern void nf_conntrack_free(struct nf_conn *ct);
234 extern struct nf_conn * 234 extern struct nf_conn *
235 nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 235 nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
236 const struct nf_conntrack_tuple *repl); 236 const struct nf_conntrack_tuple *repl);
237 237
238 /* It's confirmed if it is, or has been in the hash table. */ 238 /* It's confirmed if it is, or has been in the hash table. */
239 static inline int nf_ct_is_confirmed(struct nf_conn *ct) 239 static inline int nf_ct_is_confirmed(struct nf_conn *ct)
240 { 240 {
241 return test_bit(IPS_CONFIRMED_BIT, &ct->status); 241 return test_bit(IPS_CONFIRMED_BIT, &ct->status);
242 } 242 }
243 243
244 static inline int nf_ct_is_dying(struct nf_conn *ct) 244 static inline int nf_ct_is_dying(struct nf_conn *ct)
245 { 245 {
246 return test_bit(IPS_DYING_BIT, &ct->status); 246 return test_bit(IPS_DYING_BIT, &ct->status);
247 } 247 }
248 248
249 static inline int nf_ct_is_untracked(const struct sk_buff *skb) 249 static inline int nf_ct_is_untracked(const struct sk_buff *skb)
250 { 250 {
251 return (skb->nfct == &nf_conntrack_untracked.ct_general); 251 return (skb->nfct == &nf_conntrack_untracked.ct_general);
252 } 252 }
253 253
254 extern unsigned int nf_conntrack_htable_size; 254 extern unsigned int nf_conntrack_htable_size;
255 extern int nf_conntrack_checksum; 255 extern int nf_conntrack_checksum;
256 extern atomic_t nf_conntrack_count; 256 extern atomic_t nf_conntrack_count;
257 extern int nf_conntrack_max; 257 extern int nf_conntrack_max;
258 258
259 DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); 259 DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
260 #define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) 260 #define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
261 #define NF_CT_STAT_INC_ATOMIC(count) \ 261 #define NF_CT_STAT_INC_ATOMIC(count) \
262 do { \ 262 do { \
263 local_bh_disable(); \ 263 local_bh_disable(); \
264 __get_cpu_var(nf_conntrack_stat).count++; \ 264 __get_cpu_var(nf_conntrack_stat).count++; \
265 local_bh_enable(); \ 265 local_bh_enable(); \
266 } while (0) 266 } while (0)
267 267
268 /* no helper, no nat */ 268 /* no helper, no nat */
269 #define NF_CT_F_BASIC 0 269 #define NF_CT_F_BASIC 0
270 /* for helper */ 270 /* for helper */
271 #define NF_CT_F_HELP 1 271 #define NF_CT_F_HELP 1
272 /* for nat. */ 272 /* for nat. */
273 #define NF_CT_F_NAT 2 273 #define NF_CT_F_NAT 2
274 #define NF_CT_F_NUM 4 274 #define NF_CT_F_NUM 4
275 275
276 extern int 276 extern int
277 nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size); 277 nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size);
278 extern void 278 extern void
279 nf_conntrack_unregister_cache(u_int32_t features); 279 nf_conntrack_unregister_cache(u_int32_t features);
280 280
281 /* valid combinations:
282 * basic: nf_conn, nf_conn .. nf_conn_help
283 * nat: nf_conn .. nf_conn_nat, nf_conn .. nf_conn_nat .. nf_conn help
284 */
285 #ifdef CONFIG_NF_NAT_NEEDED
286 #include <net/netfilter/nf_nat.h>
287 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
288 {
289 unsigned int offset = sizeof(struct nf_conn);
290
291 if (!(ct->features & NF_CT_F_NAT))
292 return NULL;
293
294 offset = ALIGN(offset, __alignof__(struct nf_conn_nat));
295 return (struct nf_conn_nat *) ((void *)ct + offset);
296 }
297 #endif /* CONFIG_NF_NAT_NEEDED */
298 #endif /* __KERNEL__ */ 281 #endif /* __KERNEL__ */
299 #endif /* _NF_CONNTRACK_H */ 282 #endif /* _NF_CONNTRACK_H */
300 283
include/net/netfilter/nf_conntrack_extend.h
1 #ifndef _NF_CONNTRACK_EXTEND_H 1 #ifndef _NF_CONNTRACK_EXTEND_H
2 #define _NF_CONNTRACK_EXTEND_H 2 #define _NF_CONNTRACK_EXTEND_H
3 3
4 #include <net/netfilter/nf_conntrack.h> 4 #include <net/netfilter/nf_conntrack.h>
5 5
6 enum nf_ct_ext_id 6 enum nf_ct_ext_id
7 { 7 {
8 NF_CT_EXT_HELPER, 8 NF_CT_EXT_HELPER,
9 NF_CT_EXT_NAT,
9 NF_CT_EXT_NUM, 10 NF_CT_EXT_NUM,
10 }; 11 };
11 12
12 #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help 13 #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
14 #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
13 15
14 /* Extensions: optional stuff which isn't permanently in struct. */ 16 /* Extensions: optional stuff which isn't permanently in struct. */
15 struct nf_ct_ext { 17 struct nf_ct_ext {
16 u8 offset[NF_CT_EXT_NUM]; 18 u8 offset[NF_CT_EXT_NUM];
17 u8 len; 19 u8 len;
18 u8 real_len; 20 u8 real_len;
19 char data[0]; 21 char data[0];
20 }; 22 };
21 23
22 static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id) 24 static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id)
23 { 25 {
24 return (ct->ext && ct->ext->offset[id]); 26 return (ct->ext && ct->ext->offset[id]);
25 } 27 }
26 28
27 static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) 29 static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
28 { 30 {
29 if (!nf_ct_ext_exist(ct, id)) 31 if (!nf_ct_ext_exist(ct, id))
30 return NULL; 32 return NULL;
31 33
32 return (void *)ct->ext + ct->ext->offset[id]; 34 return (void *)ct->ext + ct->ext->offset[id];
33 } 35 }
34 #define nf_ct_ext_find(ext, id) \ 36 #define nf_ct_ext_find(ext, id) \
35 ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) 37 ((id##_TYPE *)__nf_ct_ext_find((ext), (id)))
36 38
37 /* Destroy all relationships */ 39 /* Destroy all relationships */
38 extern void __nf_ct_ext_destroy(struct nf_conn *ct); 40 extern void __nf_ct_ext_destroy(struct nf_conn *ct);
39 static inline void nf_ct_ext_destroy(struct nf_conn *ct) 41 static inline void nf_ct_ext_destroy(struct nf_conn *ct)
40 { 42 {
41 if (ct->ext) 43 if (ct->ext)
42 __nf_ct_ext_destroy(ct); 44 __nf_ct_ext_destroy(ct);
43 } 45 }
44 46
45 /* Free operation. If you want to free a object referred from private area, 47 /* Free operation. If you want to free a object referred from private area,
46 * please implement __nf_ct_ext_free() and call it. 48 * please implement __nf_ct_ext_free() and call it.
47 */ 49 */
48 static inline void nf_ct_ext_free(struct nf_conn *ct) 50 static inline void nf_ct_ext_free(struct nf_conn *ct)
49 { 51 {
50 if (ct->ext) 52 if (ct->ext)
51 kfree(ct->ext); 53 kfree(ct->ext);
52 } 54 }
53 55
54 /* Add this type, returns pointer to data or NULL. */ 56 /* Add this type, returns pointer to data or NULL. */
55 void * 57 void *
56 __nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); 58 __nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
57 #define nf_ct_ext_add(ct, id, gfp) \ 59 #define nf_ct_ext_add(ct, id, gfp) \
58 ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) 60 ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp)))
59 61
60 #define NF_CT_EXT_F_PREALLOC 0x0001 62 #define NF_CT_EXT_F_PREALLOC 0x0001
61 63
62 struct nf_ct_ext_type 64 struct nf_ct_ext_type
63 { 65 {
64 /* Destroys relationships (can be NULL). */ 66 /* Destroys relationships (can be NULL). */
65 void (*destroy)(struct nf_conn *ct); 67 void (*destroy)(struct nf_conn *ct);
66 /* Called when realloacted (can be NULL). 68 /* Called when realloacted (can be NULL).
67 Contents has already been moved. */ 69 Contents has already been moved. */
68 void (*move)(struct nf_conn *ct, void *old); 70 void (*move)(struct nf_conn *ct, void *old);
69 71
70 enum nf_ct_ext_id id; 72 enum nf_ct_ext_id id;
71 73
72 unsigned int flags; 74 unsigned int flags;
73 75
74 /* Length and min alignment. */ 76 /* Length and min alignment. */
75 u8 len; 77 u8 len;
76 u8 align; 78 u8 align;
77 /* initial size of nf_ct_ext. */ 79 /* initial size of nf_ct_ext. */
78 u8 alloc_size; 80 u8 alloc_size;
79 }; 81 };
80 82
81 int nf_ct_extend_register(struct nf_ct_ext_type *type); 83 int nf_ct_extend_register(struct nf_ct_ext_type *type);
82 void nf_ct_extend_unregister(struct nf_ct_ext_type *type); 84 void nf_ct_extend_unregister(struct nf_ct_ext_type *type);
83 #endif /* _NF_CONNTRACK_EXTEND_H */ 85 #endif /* _NF_CONNTRACK_EXTEND_H */
84 86
include/net/netfilter/nf_nat.h
1 #ifndef _NF_NAT_H 1 #ifndef _NF_NAT_H
2 #define _NF_NAT_H 2 #define _NF_NAT_H
3 #include <linux/netfilter_ipv4.h> 3 #include <linux/netfilter_ipv4.h>
4 #include <net/netfilter/nf_conntrack_tuple.h> 4 #include <net/netfilter/nf_conntrack_tuple.h>
5 5
6 #define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16 6 #define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16
7 7
8 enum nf_nat_manip_type 8 enum nf_nat_manip_type
9 { 9 {
10 IP_NAT_MANIP_SRC, 10 IP_NAT_MANIP_SRC,
11 IP_NAT_MANIP_DST 11 IP_NAT_MANIP_DST
12 }; 12 };
13 13
14 /* SRC manip occurs POST_ROUTING or LOCAL_IN */ 14 /* SRC manip occurs POST_ROUTING or LOCAL_IN */
15 #define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN) 15 #define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN)
16 16
17 #define IP_NAT_RANGE_MAP_IPS 1 17 #define IP_NAT_RANGE_MAP_IPS 1
18 #define IP_NAT_RANGE_PROTO_SPECIFIED 2 18 #define IP_NAT_RANGE_PROTO_SPECIFIED 2
19 #define IP_NAT_RANGE_PROTO_RANDOM 4 19 #define IP_NAT_RANGE_PROTO_RANDOM 4
20 20
21 /* NAT sequence number modifications */ 21 /* NAT sequence number modifications */
22 struct nf_nat_seq { 22 struct nf_nat_seq {
23 /* position of the last TCP sequence number modification (if any) */ 23 /* position of the last TCP sequence number modification (if any) */
24 u_int32_t correction_pos; 24 u_int32_t correction_pos;
25 25
26 /* sequence number offset before and after last modification */ 26 /* sequence number offset before and after last modification */
27 int16_t offset_before, offset_after; 27 int16_t offset_before, offset_after;
28 }; 28 };
29 29
30 /* Single range specification. */ 30 /* Single range specification. */
31 struct nf_nat_range 31 struct nf_nat_range
32 { 32 {
33 /* Set to OR of flags above. */ 33 /* Set to OR of flags above. */
34 unsigned int flags; 34 unsigned int flags;
35 35
36 /* Inclusive: network order. */ 36 /* Inclusive: network order. */
37 __be32 min_ip, max_ip; 37 __be32 min_ip, max_ip;
38 38
39 /* Inclusive: network order */ 39 /* Inclusive: network order */
40 union nf_conntrack_man_proto min, max; 40 union nf_conntrack_man_proto min, max;
41 }; 41 };
42 42
43 /* For backwards compat: don't use in modern code. */ 43 /* For backwards compat: don't use in modern code. */
44 struct nf_nat_multi_range_compat 44 struct nf_nat_multi_range_compat
45 { 45 {
46 unsigned int rangesize; /* Must be 1. */ 46 unsigned int rangesize; /* Must be 1. */
47 47
48 /* hangs off end. */ 48 /* hangs off end. */
49 struct nf_nat_range range[1]; 49 struct nf_nat_range range[1];
50 }; 50 };
51 51
52 #ifdef __KERNEL__ 52 #ifdef __KERNEL__
53 #include <linux/list.h> 53 #include <linux/list.h>
54 #include <linux/netfilter/nf_conntrack_pptp.h> 54 #include <linux/netfilter/nf_conntrack_pptp.h>
55 #include <net/netfilter/nf_conntrack_extend.h>
55 56
56 struct nf_conn; 57 struct nf_conn;
57 58
58 /* The structure embedded in the conntrack structure. */ 59 /* The structure embedded in the conntrack structure. */
59 struct nf_nat_info 60 struct nf_nat_info
60 { 61 {
61 struct list_head bysource; 62 struct list_head bysource;
62 struct nf_nat_seq seq[IP_CT_DIR_MAX]; 63 struct nf_nat_seq seq[IP_CT_DIR_MAX];
63 struct nf_conn *ct; 64 struct nf_conn *ct;
64 }; 65 };
65 66
66 /* per conntrack: nat application helper private data */ 67 /* per conntrack: nat application helper private data */
67 union nf_conntrack_nat_help 68 union nf_conntrack_nat_help
68 { 69 {
69 /* insert nat helper private data here */ 70 /* insert nat helper private data here */
70 struct nf_nat_pptp nat_pptp_info; 71 struct nf_nat_pptp nat_pptp_info;
71 }; 72 };
72 73
73 struct nf_conn_nat 74 struct nf_conn_nat
74 { 75 {
75 struct nf_nat_info info; 76 struct nf_nat_info info;
76 union nf_conntrack_nat_help help; 77 union nf_conntrack_nat_help help;
77 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ 78 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
78 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) 79 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
79 int masq_index; 80 int masq_index;
80 #endif 81 #endif
81 }; 82 };
82 83
83 /* Set up the info structure to map into this range. */ 84 /* Set up the info structure to map into this range. */
84 extern unsigned int nf_nat_setup_info(struct nf_conn *ct, 85 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
85 const struct nf_nat_range *range, 86 const struct nf_nat_range *range,
86 unsigned int hooknum); 87 unsigned int hooknum);
87 88
88 /* Is this tuple already taken? (not by us)*/ 89 /* Is this tuple already taken? (not by us)*/
89 extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 90 extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
90 const struct nf_conn *ignored_conntrack); 91 const struct nf_conn *ignored_conntrack);
92
93 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
94 {
95 return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
96 }
91 97
92 extern int nf_nat_module_is_loaded; 98 extern int nf_nat_module_is_loaded;
93 99
94 #else /* !__KERNEL__: iptables wants this to compile. */ 100 #else /* !__KERNEL__: iptables wants this to compile. */
95 #define nf_nat_multi_range nf_nat_multi_range_compat 101 #define nf_nat_multi_range nf_nat_multi_range_compat
96 #endif /*__KERNEL__*/ 102 #endif /*__KERNEL__*/
97 #endif 103 #endif
98 104
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8 8
9 #include <linux/types.h> 9 #include <linux/types.h>
10 #include <linux/ip.h> 10 #include <linux/ip.h>
11 #include <linux/netfilter.h> 11 #include <linux/netfilter.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/skbuff.h> 13 #include <linux/skbuff.h>
14 #include <linux/icmp.h> 14 #include <linux/icmp.h>
15 #include <linux/sysctl.h> 15 #include <linux/sysctl.h>
16 #include <net/route.h> 16 #include <net/route.h>
17 #include <net/ip.h> 17 #include <net/ip.h>
18 18
19 #include <linux/netfilter_ipv4.h> 19 #include <linux/netfilter_ipv4.h>
20 #include <net/netfilter/nf_conntrack.h> 20 #include <net/netfilter/nf_conntrack.h>
21 #include <net/netfilter/nf_conntrack_helper.h> 21 #include <net/netfilter/nf_conntrack_helper.h>
22 #include <net/netfilter/nf_conntrack_l4proto.h> 22 #include <net/netfilter/nf_conntrack_l4proto.h>
23 #include <net/netfilter/nf_conntrack_l3proto.h> 23 #include <net/netfilter/nf_conntrack_l3proto.h>
24 #include <net/netfilter/nf_conntrack_core.h> 24 #include <net/netfilter/nf_conntrack_core.h>
25 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 25 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
26 26
27 #if 0 27 #if 0
28 #define DEBUGP printk 28 #define DEBUGP printk
29 #else 29 #else
30 #define DEBUGP(format, args...) 30 #define DEBUGP(format, args...)
31 #endif 31 #endif
32 32
33 static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 33 static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
34 struct nf_conntrack_tuple *tuple) 34 struct nf_conntrack_tuple *tuple)
35 { 35 {
36 __be32 _addrs[2], *ap; 36 __be32 _addrs[2], *ap;
37 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 37 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
38 sizeof(u_int32_t) * 2, _addrs); 38 sizeof(u_int32_t) * 2, _addrs);
39 if (ap == NULL) 39 if (ap == NULL)
40 return 0; 40 return 0;
41 41
42 tuple->src.u3.ip = ap[0]; 42 tuple->src.u3.ip = ap[0];
43 tuple->dst.u3.ip = ap[1]; 43 tuple->dst.u3.ip = ap[1];
44 44
45 return 1; 45 return 1;
46 } 46 }
47 47
48 static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, 48 static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
49 const struct nf_conntrack_tuple *orig) 49 const struct nf_conntrack_tuple *orig)
50 { 50 {
51 tuple->src.u3.ip = orig->dst.u3.ip; 51 tuple->src.u3.ip = orig->dst.u3.ip;
52 tuple->dst.u3.ip = orig->src.u3.ip; 52 tuple->dst.u3.ip = orig->src.u3.ip;
53 53
54 return 1; 54 return 1;
55 } 55 }
56 56
57 static int ipv4_print_tuple(struct seq_file *s, 57 static int ipv4_print_tuple(struct seq_file *s,
58 const struct nf_conntrack_tuple *tuple) 58 const struct nf_conntrack_tuple *tuple)
59 { 59 {
60 return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", 60 return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
61 NIPQUAD(tuple->src.u3.ip), 61 NIPQUAD(tuple->src.u3.ip),
62 NIPQUAD(tuple->dst.u3.ip)); 62 NIPQUAD(tuple->dst.u3.ip));
63 } 63 }
64 64
65 static int ipv4_print_conntrack(struct seq_file *s, 65 static int ipv4_print_conntrack(struct seq_file *s,
66 const struct nf_conn *conntrack) 66 const struct nf_conn *conntrack)
67 { 67 {
68 return 0; 68 return 0;
69 } 69 }
70 70
71 /* Returns new sk_buff, or NULL */ 71 /* Returns new sk_buff, or NULL */
72 static struct sk_buff * 72 static struct sk_buff *
73 nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) 73 nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
74 { 74 {
75 skb_orphan(skb); 75 skb_orphan(skb);
76 76
77 local_bh_disable(); 77 local_bh_disable();
78 skb = ip_defrag(skb, user); 78 skb = ip_defrag(skb, user);
79 local_bh_enable(); 79 local_bh_enable();
80 80
81 if (skb) 81 if (skb)
82 ip_send_check(ip_hdr(skb)); 82 ip_send_check(ip_hdr(skb));
83 83
84 return skb; 84 return skb;
85 } 85 }
86 86
87 static int 87 static int
88 ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, 88 ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
89 u_int8_t *protonum) 89 u_int8_t *protonum)
90 { 90 {
91 /* Never happen */ 91 /* Never happen */
92 if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) { 92 if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
93 if (net_ratelimit()) { 93 if (net_ratelimit()) {
94 printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", 94 printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
95 ip_hdr(*pskb)->protocol, hooknum); 95 ip_hdr(*pskb)->protocol, hooknum);
96 } 96 }
97 return -NF_DROP; 97 return -NF_DROP;
98 } 98 }
99 99
100 *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb); 100 *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
101 *protonum = ip_hdr(*pskb)->protocol; 101 *protonum = ip_hdr(*pskb)->protocol;
102 102
103 return NF_ACCEPT; 103 return NF_ACCEPT;
104 } 104 }
105 105
106 int nf_nat_module_is_loaded = 0; 106 int nf_nat_module_is_loaded = 0;
107 EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded); 107 EXPORT_SYMBOL_GPL(nf_nat_module_is_loaded);
108 108
109 static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) 109 static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
110 { 110 {
111 if (nf_nat_module_is_loaded)
112 return NF_CT_F_NAT;
113
114 return NF_CT_F_BASIC; 111 return NF_CT_F_BASIC;
115 } 112 }
116 113
117 static unsigned int ipv4_confirm(unsigned int hooknum, 114 static unsigned int ipv4_confirm(unsigned int hooknum,
118 struct sk_buff **pskb, 115 struct sk_buff **pskb,
119 const struct net_device *in, 116 const struct net_device *in,
120 const struct net_device *out, 117 const struct net_device *out,
121 int (*okfn)(struct sk_buff *)) 118 int (*okfn)(struct sk_buff *))
122 { 119 {
123 /* We've seen it coming out the other side: confirm it */ 120 /* We've seen it coming out the other side: confirm it */
124 return nf_conntrack_confirm(pskb); 121 return nf_conntrack_confirm(pskb);
125 } 122 }
126 123
127 static unsigned int ipv4_conntrack_help(unsigned int hooknum, 124 static unsigned int ipv4_conntrack_help(unsigned int hooknum,
128 struct sk_buff **pskb, 125 struct sk_buff **pskb,
129 const struct net_device *in, 126 const struct net_device *in,
130 const struct net_device *out, 127 const struct net_device *out,
131 int (*okfn)(struct sk_buff *)) 128 int (*okfn)(struct sk_buff *))
132 { 129 {
133 struct nf_conn *ct; 130 struct nf_conn *ct;
134 enum ip_conntrack_info ctinfo; 131 enum ip_conntrack_info ctinfo;
135 struct nf_conn_help *help; 132 struct nf_conn_help *help;
136 struct nf_conntrack_helper *helper; 133 struct nf_conntrack_helper *helper;
137 134
138 /* This is where we call the helper: as the packet goes out. */ 135 /* This is where we call the helper: as the packet goes out. */
139 ct = nf_ct_get(*pskb, &ctinfo); 136 ct = nf_ct_get(*pskb, &ctinfo);
140 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) 137 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
141 return NF_ACCEPT; 138 return NF_ACCEPT;
142 139
143 help = nfct_help(ct); 140 help = nfct_help(ct);
144 if (!help) 141 if (!help)
145 return NF_ACCEPT; 142 return NF_ACCEPT;
146 /* rcu_read_lock()ed by nf_hook_slow */ 143 /* rcu_read_lock()ed by nf_hook_slow */
147 helper = rcu_dereference(help->helper); 144 helper = rcu_dereference(help->helper);
148 if (!helper) 145 if (!helper)
149 return NF_ACCEPT; 146 return NF_ACCEPT;
150 return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), 147 return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
151 ct, ctinfo); 148 ct, ctinfo);
152 } 149 }
153 150
154 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, 151 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
155 struct sk_buff **pskb, 152 struct sk_buff **pskb,
156 const struct net_device *in, 153 const struct net_device *in,
157 const struct net_device *out, 154 const struct net_device *out,
158 int (*okfn)(struct sk_buff *)) 155 int (*okfn)(struct sk_buff *))
159 { 156 {
160 /* Previously seen (loopback)? Ignore. Do this before 157 /* Previously seen (loopback)? Ignore. Do this before
161 fragment check. */ 158 fragment check. */
162 if ((*pskb)->nfct) 159 if ((*pskb)->nfct)
163 return NF_ACCEPT; 160 return NF_ACCEPT;
164 161
165 /* Gather fragments. */ 162 /* Gather fragments. */
166 if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { 163 if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
167 *pskb = nf_ct_ipv4_gather_frags(*pskb, 164 *pskb = nf_ct_ipv4_gather_frags(*pskb,
168 hooknum == NF_IP_PRE_ROUTING ? 165 hooknum == NF_IP_PRE_ROUTING ?
169 IP_DEFRAG_CONNTRACK_IN : 166 IP_DEFRAG_CONNTRACK_IN :
170 IP_DEFRAG_CONNTRACK_OUT); 167 IP_DEFRAG_CONNTRACK_OUT);
171 if (!*pskb) 168 if (!*pskb)
172 return NF_STOLEN; 169 return NF_STOLEN;
173 } 170 }
174 return NF_ACCEPT; 171 return NF_ACCEPT;
175 } 172 }
176 173
177 static unsigned int ipv4_conntrack_in(unsigned int hooknum, 174 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
178 struct sk_buff **pskb, 175 struct sk_buff **pskb,
179 const struct net_device *in, 176 const struct net_device *in,
180 const struct net_device *out, 177 const struct net_device *out,
181 int (*okfn)(struct sk_buff *)) 178 int (*okfn)(struct sk_buff *))
182 { 179 {
183 return nf_conntrack_in(PF_INET, hooknum, pskb); 180 return nf_conntrack_in(PF_INET, hooknum, pskb);
184 } 181 }
185 182
186 static unsigned int ipv4_conntrack_local(unsigned int hooknum, 183 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
187 struct sk_buff **pskb, 184 struct sk_buff **pskb,
188 const struct net_device *in, 185 const struct net_device *in,
189 const struct net_device *out, 186 const struct net_device *out,
190 int (*okfn)(struct sk_buff *)) 187 int (*okfn)(struct sk_buff *))
191 { 188 {
192 /* root is playing with raw sockets. */ 189 /* root is playing with raw sockets. */
193 if ((*pskb)->len < sizeof(struct iphdr) 190 if ((*pskb)->len < sizeof(struct iphdr)
194 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { 191 || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
195 if (net_ratelimit()) 192 if (net_ratelimit())
196 printk("ipt_hook: happy cracking.\n"); 193 printk("ipt_hook: happy cracking.\n");
197 return NF_ACCEPT; 194 return NF_ACCEPT;
198 } 195 }
199 return nf_conntrack_in(PF_INET, hooknum, pskb); 196 return nf_conntrack_in(PF_INET, hooknum, pskb);
200 } 197 }
201 198
202 /* Connection tracking may drop packets, but never alters them, so 199 /* Connection tracking may drop packets, but never alters them, so
203 make it the first hook. */ 200 make it the first hook. */
204 static struct nf_hook_ops ipv4_conntrack_ops[] = { 201 static struct nf_hook_ops ipv4_conntrack_ops[] = {
205 { 202 {
206 .hook = ipv4_conntrack_defrag, 203 .hook = ipv4_conntrack_defrag,
207 .owner = THIS_MODULE, 204 .owner = THIS_MODULE,
208 .pf = PF_INET, 205 .pf = PF_INET,
209 .hooknum = NF_IP_PRE_ROUTING, 206 .hooknum = NF_IP_PRE_ROUTING,
210 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 207 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
211 }, 208 },
212 { 209 {
213 .hook = ipv4_conntrack_in, 210 .hook = ipv4_conntrack_in,
214 .owner = THIS_MODULE, 211 .owner = THIS_MODULE,
215 .pf = PF_INET, 212 .pf = PF_INET,
216 .hooknum = NF_IP_PRE_ROUTING, 213 .hooknum = NF_IP_PRE_ROUTING,
217 .priority = NF_IP_PRI_CONNTRACK, 214 .priority = NF_IP_PRI_CONNTRACK,
218 }, 215 },
219 { 216 {
220 .hook = ipv4_conntrack_defrag, 217 .hook = ipv4_conntrack_defrag,
221 .owner = THIS_MODULE, 218 .owner = THIS_MODULE,
222 .pf = PF_INET, 219 .pf = PF_INET,
223 .hooknum = NF_IP_LOCAL_OUT, 220 .hooknum = NF_IP_LOCAL_OUT,
224 .priority = NF_IP_PRI_CONNTRACK_DEFRAG, 221 .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
225 }, 222 },
226 { 223 {
227 .hook = ipv4_conntrack_local, 224 .hook = ipv4_conntrack_local,
228 .owner = THIS_MODULE, 225 .owner = THIS_MODULE,
229 .pf = PF_INET, 226 .pf = PF_INET,
230 .hooknum = NF_IP_LOCAL_OUT, 227 .hooknum = NF_IP_LOCAL_OUT,
231 .priority = NF_IP_PRI_CONNTRACK, 228 .priority = NF_IP_PRI_CONNTRACK,
232 }, 229 },
233 { 230 {
234 .hook = ipv4_conntrack_help, 231 .hook = ipv4_conntrack_help,
235 .owner = THIS_MODULE, 232 .owner = THIS_MODULE,
236 .pf = PF_INET, 233 .pf = PF_INET,
237 .hooknum = NF_IP_POST_ROUTING, 234 .hooknum = NF_IP_POST_ROUTING,
238 .priority = NF_IP_PRI_CONNTRACK_HELPER, 235 .priority = NF_IP_PRI_CONNTRACK_HELPER,
239 }, 236 },
240 { 237 {
241 .hook = ipv4_conntrack_help, 238 .hook = ipv4_conntrack_help,
242 .owner = THIS_MODULE, 239 .owner = THIS_MODULE,
243 .pf = PF_INET, 240 .pf = PF_INET,
244 .hooknum = NF_IP_LOCAL_IN, 241 .hooknum = NF_IP_LOCAL_IN,
245 .priority = NF_IP_PRI_CONNTRACK_HELPER, 242 .priority = NF_IP_PRI_CONNTRACK_HELPER,
246 }, 243 },
247 { 244 {
248 .hook = ipv4_confirm, 245 .hook = ipv4_confirm,
249 .owner = THIS_MODULE, 246 .owner = THIS_MODULE,
250 .pf = PF_INET, 247 .pf = PF_INET,
251 .hooknum = NF_IP_POST_ROUTING, 248 .hooknum = NF_IP_POST_ROUTING,
252 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 249 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
253 }, 250 },
254 { 251 {
255 .hook = ipv4_confirm, 252 .hook = ipv4_confirm,
256 .owner = THIS_MODULE, 253 .owner = THIS_MODULE,
257 .pf = PF_INET, 254 .pf = PF_INET,
258 .hooknum = NF_IP_LOCAL_IN, 255 .hooknum = NF_IP_LOCAL_IN,
259 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 256 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
260 }, 257 },
261 }; 258 };
262 259
263 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 260 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
264 static int log_invalid_proto_min = 0; 261 static int log_invalid_proto_min = 0;
265 static int log_invalid_proto_max = 255; 262 static int log_invalid_proto_max = 255;
266 263
267 static ctl_table ip_ct_sysctl_table[] = { 264 static ctl_table ip_ct_sysctl_table[] = {
268 { 265 {
269 .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, 266 .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
270 .procname = "ip_conntrack_max", 267 .procname = "ip_conntrack_max",
271 .data = &nf_conntrack_max, 268 .data = &nf_conntrack_max,
272 .maxlen = sizeof(int), 269 .maxlen = sizeof(int),
273 .mode = 0644, 270 .mode = 0644,
274 .proc_handler = &proc_dointvec, 271 .proc_handler = &proc_dointvec,
275 }, 272 },
276 { 273 {
277 .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, 274 .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
278 .procname = "ip_conntrack_count", 275 .procname = "ip_conntrack_count",
279 .data = &nf_conntrack_count, 276 .data = &nf_conntrack_count,
280 .maxlen = sizeof(int), 277 .maxlen = sizeof(int),
281 .mode = 0444, 278 .mode = 0444,
282 .proc_handler = &proc_dointvec, 279 .proc_handler = &proc_dointvec,
283 }, 280 },
284 { 281 {
285 .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, 282 .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
286 .procname = "ip_conntrack_buckets", 283 .procname = "ip_conntrack_buckets",
287 .data = &nf_conntrack_htable_size, 284 .data = &nf_conntrack_htable_size,
288 .maxlen = sizeof(unsigned int), 285 .maxlen = sizeof(unsigned int),
289 .mode = 0444, 286 .mode = 0444,
290 .proc_handler = &proc_dointvec, 287 .proc_handler = &proc_dointvec,
291 }, 288 },
292 { 289 {
293 .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, 290 .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
294 .procname = "ip_conntrack_checksum", 291 .procname = "ip_conntrack_checksum",
295 .data = &nf_conntrack_checksum, 292 .data = &nf_conntrack_checksum,
296 .maxlen = sizeof(int), 293 .maxlen = sizeof(int),
297 .mode = 0644, 294 .mode = 0644,
298 .proc_handler = &proc_dointvec, 295 .proc_handler = &proc_dointvec,
299 }, 296 },
300 { 297 {
301 .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, 298 .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
302 .procname = "ip_conntrack_log_invalid", 299 .procname = "ip_conntrack_log_invalid",
303 .data = &nf_ct_log_invalid, 300 .data = &nf_ct_log_invalid,
304 .maxlen = sizeof(unsigned int), 301 .maxlen = sizeof(unsigned int),
305 .mode = 0644, 302 .mode = 0644,
306 .proc_handler = &proc_dointvec_minmax, 303 .proc_handler = &proc_dointvec_minmax,
307 .strategy = &sysctl_intvec, 304 .strategy = &sysctl_intvec,
308 .extra1 = &log_invalid_proto_min, 305 .extra1 = &log_invalid_proto_min,
309 .extra2 = &log_invalid_proto_max, 306 .extra2 = &log_invalid_proto_max,
310 }, 307 },
311 { 308 {
312 .ctl_name = 0 309 .ctl_name = 0
313 } 310 }
314 }; 311 };
315 #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ 312 #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
316 313
317 /* Fast function for those who don't want to parse /proc (and I don't 314 /* Fast function for those who don't want to parse /proc (and I don't
318 blame them). */ 315 blame them). */
319 /* Reversing the socket's dst/src point of view gives us the reply 316 /* Reversing the socket's dst/src point of view gives us the reply
320 mapping. */ 317 mapping. */
321 static int 318 static int
322 getorigdst(struct sock *sk, int optval, void __user *user, int *len) 319 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
323 { 320 {
324 struct inet_sock *inet = inet_sk(sk); 321 struct inet_sock *inet = inet_sk(sk);
325 struct nf_conntrack_tuple_hash *h; 322 struct nf_conntrack_tuple_hash *h;
326 struct nf_conntrack_tuple tuple; 323 struct nf_conntrack_tuple tuple;
327 324
328 NF_CT_TUPLE_U_BLANK(&tuple); 325 NF_CT_TUPLE_U_BLANK(&tuple);
329 tuple.src.u3.ip = inet->rcv_saddr; 326 tuple.src.u3.ip = inet->rcv_saddr;
330 tuple.src.u.tcp.port = inet->sport; 327 tuple.src.u.tcp.port = inet->sport;
331 tuple.dst.u3.ip = inet->daddr; 328 tuple.dst.u3.ip = inet->daddr;
332 tuple.dst.u.tcp.port = inet->dport; 329 tuple.dst.u.tcp.port = inet->dport;
333 tuple.src.l3num = PF_INET; 330 tuple.src.l3num = PF_INET;
334 tuple.dst.protonum = IPPROTO_TCP; 331 tuple.dst.protonum = IPPROTO_TCP;
335 332
336 /* We only do TCP at the moment: is there a better way? */ 333 /* We only do TCP at the moment: is there a better way? */
337 if (strcmp(sk->sk_prot->name, "TCP")) { 334 if (strcmp(sk->sk_prot->name, "TCP")) {
338 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); 335 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
339 return -ENOPROTOOPT; 336 return -ENOPROTOOPT;
340 } 337 }
341 338
342 if ((unsigned int) *len < sizeof(struct sockaddr_in)) { 339 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
343 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", 340 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
344 *len, sizeof(struct sockaddr_in)); 341 *len, sizeof(struct sockaddr_in));
345 return -EINVAL; 342 return -EINVAL;
346 } 343 }
347 344
348 h = nf_conntrack_find_get(&tuple, NULL); 345 h = nf_conntrack_find_get(&tuple, NULL);
349 if (h) { 346 if (h) {
350 struct sockaddr_in sin; 347 struct sockaddr_in sin;
351 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 348 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
352 349
353 sin.sin_family = AF_INET; 350 sin.sin_family = AF_INET;
354 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 351 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
355 .tuple.dst.u.tcp.port; 352 .tuple.dst.u.tcp.port;
356 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 353 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
357 .tuple.dst.u3.ip; 354 .tuple.dst.u3.ip;
358 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 355 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
359 356
360 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", 357 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
361 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 358 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
362 nf_ct_put(ct); 359 nf_ct_put(ct);
363 if (copy_to_user(user, &sin, sizeof(sin)) != 0) 360 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
364 return -EFAULT; 361 return -EFAULT;
365 else 362 else
366 return 0; 363 return 0;
367 } 364 }
368 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", 365 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
369 NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), 366 NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
370 NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); 367 NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
371 return -ENOENT; 368 return -ENOENT;
372 } 369 }
373 370
374 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 371 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
375 372
376 #include <linux/netfilter/nfnetlink.h> 373 #include <linux/netfilter/nfnetlink.h>
377 #include <linux/netfilter/nfnetlink_conntrack.h> 374 #include <linux/netfilter/nfnetlink_conntrack.h>
378 375
379 static int ipv4_tuple_to_nfattr(struct sk_buff *skb, 376 static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
380 const struct nf_conntrack_tuple *tuple) 377 const struct nf_conntrack_tuple *tuple)
381 { 378 {
382 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), 379 NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
383 &tuple->src.u3.ip); 380 &tuple->src.u3.ip);
384 NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), 381 NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
385 &tuple->dst.u3.ip); 382 &tuple->dst.u3.ip);
386 return 0; 383 return 0;
387 384
388 nfattr_failure: 385 nfattr_failure:
389 return -1; 386 return -1;
390 } 387 }
391 388
392 static const size_t cta_min_ip[CTA_IP_MAX] = { 389 static const size_t cta_min_ip[CTA_IP_MAX] = {
393 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), 390 [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
394 [CTA_IP_V4_DST-1] = sizeof(u_int32_t), 391 [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
395 }; 392 };
396 393
397 static int ipv4_nfattr_to_tuple(struct nfattr *tb[], 394 static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
398 struct nf_conntrack_tuple *t) 395 struct nf_conntrack_tuple *t)
399 { 396 {
400 if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1]) 397 if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
401 return -EINVAL; 398 return -EINVAL;
402 399
403 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) 400 if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
404 return -EINVAL; 401 return -EINVAL;
405 402
406 t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); 403 t->src.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
407 t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); 404 t->dst.u3.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
408 405
409 return 0; 406 return 0;
410 } 407 }
411 #endif 408 #endif
412 409
413 static struct nf_sockopt_ops so_getorigdst = { 410 static struct nf_sockopt_ops so_getorigdst = {
414 .pf = PF_INET, 411 .pf = PF_INET,
415 .get_optmin = SO_ORIGINAL_DST, 412 .get_optmin = SO_ORIGINAL_DST,
416 .get_optmax = SO_ORIGINAL_DST+1, 413 .get_optmax = SO_ORIGINAL_DST+1,
417 .get = &getorigdst, 414 .get = &getorigdst,
418 }; 415 };
419 416
420 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { 417 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
421 .l3proto = PF_INET, 418 .l3proto = PF_INET,
422 .name = "ipv4", 419 .name = "ipv4",
423 .pkt_to_tuple = ipv4_pkt_to_tuple, 420 .pkt_to_tuple = ipv4_pkt_to_tuple,
424 .invert_tuple = ipv4_invert_tuple, 421 .invert_tuple = ipv4_invert_tuple,
425 .print_tuple = ipv4_print_tuple, 422 .print_tuple = ipv4_print_tuple,
426 .print_conntrack = ipv4_print_conntrack, 423 .print_conntrack = ipv4_print_conntrack,
427 .prepare = ipv4_prepare, 424 .prepare = ipv4_prepare,
428 .get_features = ipv4_get_features, 425 .get_features = ipv4_get_features,
429 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 426 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
430 .tuple_to_nfattr = ipv4_tuple_to_nfattr, 427 .tuple_to_nfattr = ipv4_tuple_to_nfattr,
431 .nfattr_to_tuple = ipv4_nfattr_to_tuple, 428 .nfattr_to_tuple = ipv4_nfattr_to_tuple,
432 #endif 429 #endif
433 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 430 #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
434 .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path, 431 .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
435 .ctl_table = ip_ct_sysctl_table, 432 .ctl_table = ip_ct_sysctl_table,
436 #endif 433 #endif
437 .me = THIS_MODULE, 434 .me = THIS_MODULE,
438 }; 435 };
439 436
440 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 437 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
441 MODULE_ALIAS("ip_conntrack"); 438 MODULE_ALIAS("ip_conntrack");
442 MODULE_LICENSE("GPL"); 439 MODULE_LICENSE("GPL");
443 440
444 static int __init nf_conntrack_l3proto_ipv4_init(void) 441 static int __init nf_conntrack_l3proto_ipv4_init(void)
445 { 442 {
446 int ret = 0; 443 int ret = 0;
447 444
448 need_conntrack(); 445 need_conntrack();
449 446
450 ret = nf_register_sockopt(&so_getorigdst); 447 ret = nf_register_sockopt(&so_getorigdst);
451 if (ret < 0) { 448 if (ret < 0) {
452 printk(KERN_ERR "Unable to register netfilter socket option\n"); 449 printk(KERN_ERR "Unable to register netfilter socket option\n");
453 return ret; 450 return ret;
454 } 451 }
455 452
456 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); 453 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
457 if (ret < 0) { 454 if (ret < 0) {
458 printk("nf_conntrack_ipv4: can't register tcp.\n"); 455 printk("nf_conntrack_ipv4: can't register tcp.\n");
459 goto cleanup_sockopt; 456 goto cleanup_sockopt;
460 } 457 }
461 458
462 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); 459 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
463 if (ret < 0) { 460 if (ret < 0) {
464 printk("nf_conntrack_ipv4: can't register udp.\n"); 461 printk("nf_conntrack_ipv4: can't register udp.\n");
465 goto cleanup_tcp; 462 goto cleanup_tcp;
466 } 463 }
467 464
468 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); 465 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
469 if (ret < 0) { 466 if (ret < 0) {
470 printk("nf_conntrack_ipv4: can't register icmp.\n"); 467 printk("nf_conntrack_ipv4: can't register icmp.\n");
471 goto cleanup_udp; 468 goto cleanup_udp;
472 } 469 }
473 470
474 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); 471 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
475 if (ret < 0) { 472 if (ret < 0) {
476 printk("nf_conntrack_ipv4: can't register ipv4\n"); 473 printk("nf_conntrack_ipv4: can't register ipv4\n");
477 goto cleanup_icmp; 474 goto cleanup_icmp;
478 } 475 }
479 476
480 ret = nf_register_hooks(ipv4_conntrack_ops, 477 ret = nf_register_hooks(ipv4_conntrack_ops,
481 ARRAY_SIZE(ipv4_conntrack_ops)); 478 ARRAY_SIZE(ipv4_conntrack_ops));
482 if (ret < 0) { 479 if (ret < 0) {
483 printk("nf_conntrack_ipv4: can't register hooks.\n"); 480 printk("nf_conntrack_ipv4: can't register hooks.\n");
484 goto cleanup_ipv4; 481 goto cleanup_ipv4;
485 } 482 }
486 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 483 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
487 ret = nf_conntrack_ipv4_compat_init(); 484 ret = nf_conntrack_ipv4_compat_init();
488 if (ret < 0) 485 if (ret < 0)
489 goto cleanup_hooks; 486 goto cleanup_hooks;
490 #endif 487 #endif
491 return ret; 488 return ret;
492 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 489 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
493 cleanup_hooks: 490 cleanup_hooks:
494 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 491 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
495 #endif 492 #endif
496 cleanup_ipv4: 493 cleanup_ipv4:
497 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 494 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
498 cleanup_icmp: 495 cleanup_icmp:
499 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); 496 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
500 cleanup_udp: 497 cleanup_udp:
501 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); 498 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
502 cleanup_tcp: 499 cleanup_tcp:
503 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); 500 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
504 cleanup_sockopt: 501 cleanup_sockopt:
505 nf_unregister_sockopt(&so_getorigdst); 502 nf_unregister_sockopt(&so_getorigdst);
506 return ret; 503 return ret;
507 } 504 }
508 505
509 static void __exit nf_conntrack_l3proto_ipv4_fini(void) 506 static void __exit nf_conntrack_l3proto_ipv4_fini(void)
510 { 507 {
511 synchronize_net(); 508 synchronize_net();
512 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 509 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
513 nf_conntrack_ipv4_compat_fini(); 510 nf_conntrack_ipv4_compat_fini();
514 #endif 511 #endif
515 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); 512 nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
516 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 513 nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
517 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); 514 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
518 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); 515 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
519 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); 516 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
520 nf_unregister_sockopt(&so_getorigdst); 517 nf_unregister_sockopt(&so_getorigdst);
521 } 518 }
522 519
523 module_init(nf_conntrack_l3proto_ipv4_init); 520 module_init(nf_conntrack_l3proto_ipv4_init);
524 module_exit(nf_conntrack_l3proto_ipv4_fini); 521 module_exit(nf_conntrack_l3proto_ipv4_fini);
525 522
net/ipv4/netfilter/nf_nat_core.c
1 /* NAT for netfilter; shared with compatibility layer. */ 1 /* NAT for netfilter; shared with compatibility layer. */
2 2
3 /* (C) 1999-2001 Paul `Rusty' Russell 3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11 #include <linux/module.h> 11 #include <linux/module.h>
12 #include <linux/types.h> 12 #include <linux/types.h>
13 #include <linux/timer.h> 13 #include <linux/timer.h>
14 #include <linux/skbuff.h> 14 #include <linux/skbuff.h>
15 #include <linux/vmalloc.h> 15 #include <linux/vmalloc.h>
16 #include <net/checksum.h> 16 #include <net/checksum.h>
17 #include <net/icmp.h> 17 #include <net/icmp.h>
18 #include <net/ip.h> 18 #include <net/ip.h>
19 #include <net/tcp.h> /* For tcp_prot in getorigdst */ 19 #include <net/tcp.h> /* For tcp_prot in getorigdst */
20 #include <linux/icmp.h> 20 #include <linux/icmp.h>
21 #include <linux/udp.h> 21 #include <linux/udp.h>
22 #include <linux/jhash.h> 22 #include <linux/jhash.h>
23 23
24 #include <linux/netfilter_ipv4.h> 24 #include <linux/netfilter_ipv4.h>
25 #include <net/netfilter/nf_conntrack.h> 25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h> 26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_nat.h> 27 #include <net/netfilter/nf_nat.h>
28 #include <net/netfilter/nf_nat_protocol.h> 28 #include <net/netfilter/nf_nat_protocol.h>
29 #include <net/netfilter/nf_nat_core.h> 29 #include <net/netfilter/nf_nat_core.h>
30 #include <net/netfilter/nf_nat_helper.h> 30 #include <net/netfilter/nf_nat_helper.h>
31 #include <net/netfilter/nf_conntrack_helper.h> 31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_l3proto.h> 32 #include <net/netfilter/nf_conntrack_l3proto.h>
33 #include <net/netfilter/nf_conntrack_l4proto.h> 33 #include <net/netfilter/nf_conntrack_l4proto.h>
34 34
35 #if 0 35 #if 0
36 #define DEBUGP printk 36 #define DEBUGP printk
37 #else 37 #else
38 #define DEBUGP(format, args...) 38 #define DEBUGP(format, args...)
39 #endif 39 #endif
40 40
41 static DEFINE_RWLOCK(nf_nat_lock); 41 static DEFINE_RWLOCK(nf_nat_lock);
42 42
43 static struct nf_conntrack_l3proto *l3proto = NULL; 43 static struct nf_conntrack_l3proto *l3proto = NULL;
44 44
45 /* Calculated at init based on memory size */ 45 /* Calculated at init based on memory size */
46 static unsigned int nf_nat_htable_size; 46 static unsigned int nf_nat_htable_size;
47 47
48 static struct list_head *bysource; 48 static struct list_head *bysource;
49 49
50 #define MAX_IP_NAT_PROTO 256 50 #define MAX_IP_NAT_PROTO 256
51 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]; 51 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
52 52
53 static inline struct nf_nat_protocol * 53 static inline struct nf_nat_protocol *
54 __nf_nat_proto_find(u_int8_t protonum) 54 __nf_nat_proto_find(u_int8_t protonum)
55 { 55 {
56 return rcu_dereference(nf_nat_protos[protonum]); 56 return rcu_dereference(nf_nat_protos[protonum]);
57 } 57 }
58 58
59 struct nf_nat_protocol * 59 struct nf_nat_protocol *
60 nf_nat_proto_find_get(u_int8_t protonum) 60 nf_nat_proto_find_get(u_int8_t protonum)
61 { 61 {
62 struct nf_nat_protocol *p; 62 struct nf_nat_protocol *p;
63 63
64 rcu_read_lock(); 64 rcu_read_lock();
65 p = __nf_nat_proto_find(protonum); 65 p = __nf_nat_proto_find(protonum);
66 if (!try_module_get(p->me)) 66 if (!try_module_get(p->me))
67 p = &nf_nat_unknown_protocol; 67 p = &nf_nat_unknown_protocol;
68 rcu_read_unlock(); 68 rcu_read_unlock();
69 69
70 return p; 70 return p;
71 } 71 }
72 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); 72 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
73 73
74 void 74 void
75 nf_nat_proto_put(struct nf_nat_protocol *p) 75 nf_nat_proto_put(struct nf_nat_protocol *p)
76 { 76 {
77 module_put(p->me); 77 module_put(p->me);
78 } 78 }
79 EXPORT_SYMBOL_GPL(nf_nat_proto_put); 79 EXPORT_SYMBOL_GPL(nf_nat_proto_put);
80 80
81 /* We keep an extra hash for each conntrack, for fast searching. */ 81 /* We keep an extra hash for each conntrack, for fast searching. */
82 static inline unsigned int 82 static inline unsigned int
83 hash_by_src(const struct nf_conntrack_tuple *tuple) 83 hash_by_src(const struct nf_conntrack_tuple *tuple)
84 { 84 {
85 /* Original src, to ensure we map it consistently if poss. */ 85 /* Original src, to ensure we map it consistently if poss. */
86 return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all, 86 return jhash_3words((__force u32)tuple->src.u3.ip, tuple->src.u.all,
87 tuple->dst.protonum, 0) % nf_nat_htable_size; 87 tuple->dst.protonum, 0) % nf_nat_htable_size;
88 } 88 }
89 89
90 /* Noone using conntrack by the time this called. */ 90 /* Noone using conntrack by the time this called. */
91 static void nf_nat_cleanup_conntrack(struct nf_conn *conn) 91 static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
92 { 92 {
93 struct nf_conn_nat *nat; 93 struct nf_conn_nat *nat;
94 if (!(conn->status & IPS_NAT_DONE_MASK)) 94 if (!(conn->status & IPS_NAT_DONE_MASK))
95 return; 95 return;
96 96
97 nat = nfct_nat(conn); 97 nat = nfct_nat(conn);
98 write_lock_bh(&nf_nat_lock); 98 write_lock_bh(&nf_nat_lock);
99 list_del(&nat->info.bysource); 99 list_del(&nat->info.bysource);
100 nat->info.ct = NULL; 100 nat->info.ct = NULL;
101 write_unlock_bh(&nf_nat_lock); 101 write_unlock_bh(&nf_nat_lock);
102 } 102 }
103 103
104 /* Is this tuple already taken? (not by us) */ 104 /* Is this tuple already taken? (not by us) */
105 int 105 int
106 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 106 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
107 const struct nf_conn *ignored_conntrack) 107 const struct nf_conn *ignored_conntrack)
108 { 108 {
109 /* Conntrack tracking doesn't keep track of outgoing tuples; only 109 /* Conntrack tracking doesn't keep track of outgoing tuples; only
110 incoming ones. NAT means they don't have a fixed mapping, 110 incoming ones. NAT means they don't have a fixed mapping,
111 so we invert the tuple and look for the incoming reply. 111 so we invert the tuple and look for the incoming reply.
112 112
113 We could keep a separate hash if this proves too slow. */ 113 We could keep a separate hash if this proves too slow. */
114 struct nf_conntrack_tuple reply; 114 struct nf_conntrack_tuple reply;
115 115
116 nf_ct_invert_tuplepr(&reply, tuple); 116 nf_ct_invert_tuplepr(&reply, tuple);
117 return nf_conntrack_tuple_taken(&reply, ignored_conntrack); 117 return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
118 } 118 }
119 EXPORT_SYMBOL(nf_nat_used_tuple); 119 EXPORT_SYMBOL(nf_nat_used_tuple);
120 120
121 /* If we source map this tuple so reply looks like reply_tuple, will 121 /* If we source map this tuple so reply looks like reply_tuple, will
122 * that meet the constraints of range. */ 122 * that meet the constraints of range. */
123 static int 123 static int
124 in_range(const struct nf_conntrack_tuple *tuple, 124 in_range(const struct nf_conntrack_tuple *tuple,
125 const struct nf_nat_range *range) 125 const struct nf_nat_range *range)
126 { 126 {
127 struct nf_nat_protocol *proto; 127 struct nf_nat_protocol *proto;
128 int ret = 0; 128 int ret = 0;
129 129
130 /* If we are supposed to map IPs, then we must be in the 130 /* If we are supposed to map IPs, then we must be in the
131 range specified, otherwise let this drag us onto a new src IP. */ 131 range specified, otherwise let this drag us onto a new src IP. */
132 if (range->flags & IP_NAT_RANGE_MAP_IPS) { 132 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
133 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || 133 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
134 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) 134 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
135 return 0; 135 return 0;
136 } 136 }
137 137
138 rcu_read_lock(); 138 rcu_read_lock();
139 proto = __nf_nat_proto_find(tuple->dst.protonum); 139 proto = __nf_nat_proto_find(tuple->dst.protonum);
140 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 140 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
141 proto->in_range(tuple, IP_NAT_MANIP_SRC, 141 proto->in_range(tuple, IP_NAT_MANIP_SRC,
142 &range->min, &range->max)) 142 &range->min, &range->max))
143 ret = 1; 143 ret = 1;
144 rcu_read_unlock(); 144 rcu_read_unlock();
145 145
146 return ret; 146 return ret;
147 } 147 }
148 148
149 static inline int 149 static inline int
150 same_src(const struct nf_conn *ct, 150 same_src(const struct nf_conn *ct,
151 const struct nf_conntrack_tuple *tuple) 151 const struct nf_conntrack_tuple *tuple)
152 { 152 {
153 const struct nf_conntrack_tuple *t; 153 const struct nf_conntrack_tuple *t;
154 154
155 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 155 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
156 return (t->dst.protonum == tuple->dst.protonum && 156 return (t->dst.protonum == tuple->dst.protonum &&
157 t->src.u3.ip == tuple->src.u3.ip && 157 t->src.u3.ip == tuple->src.u3.ip &&
158 t->src.u.all == tuple->src.u.all); 158 t->src.u.all == tuple->src.u.all);
159 } 159 }
160 160
161 /* Only called for SRC manip */ 161 /* Only called for SRC manip */
162 static int 162 static int
163 find_appropriate_src(const struct nf_conntrack_tuple *tuple, 163 find_appropriate_src(const struct nf_conntrack_tuple *tuple,
164 struct nf_conntrack_tuple *result, 164 struct nf_conntrack_tuple *result,
165 const struct nf_nat_range *range) 165 const struct nf_nat_range *range)
166 { 166 {
167 unsigned int h = hash_by_src(tuple); 167 unsigned int h = hash_by_src(tuple);
168 struct nf_conn_nat *nat; 168 struct nf_conn_nat *nat;
169 struct nf_conn *ct; 169 struct nf_conn *ct;
170 170
171 read_lock_bh(&nf_nat_lock); 171 read_lock_bh(&nf_nat_lock);
172 list_for_each_entry(nat, &bysource[h], info.bysource) { 172 list_for_each_entry(nat, &bysource[h], info.bysource) {
173 ct = nat->info.ct; 173 ct = nat->info.ct;
174 if (same_src(ct, tuple)) { 174 if (same_src(ct, tuple)) {
175 /* Copy source part from reply tuple. */ 175 /* Copy source part from reply tuple. */
176 nf_ct_invert_tuplepr(result, 176 nf_ct_invert_tuplepr(result,
177 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 177 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
178 result->dst = tuple->dst; 178 result->dst = tuple->dst;
179 179
180 if (in_range(result, range)) { 180 if (in_range(result, range)) {
181 read_unlock_bh(&nf_nat_lock); 181 read_unlock_bh(&nf_nat_lock);
182 return 1; 182 return 1;
183 } 183 }
184 } 184 }
185 } 185 }
186 read_unlock_bh(&nf_nat_lock); 186 read_unlock_bh(&nf_nat_lock);
187 return 0; 187 return 0;
188 } 188 }
189 189
190 /* For [FUTURE] fragmentation handling, we want the least-used 190 /* For [FUTURE] fragmentation handling, we want the least-used
191 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus 191 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
192 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports 192 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
193 1-65535, we don't do pro-rata allocation based on ports; we choose 193 1-65535, we don't do pro-rata allocation based on ports; we choose
194 the ip with the lowest src-ip/dst-ip/proto usage. 194 the ip with the lowest src-ip/dst-ip/proto usage.
195 */ 195 */
196 static void 196 static void
197 find_best_ips_proto(struct nf_conntrack_tuple *tuple, 197 find_best_ips_proto(struct nf_conntrack_tuple *tuple,
198 const struct nf_nat_range *range, 198 const struct nf_nat_range *range,
199 const struct nf_conn *ct, 199 const struct nf_conn *ct,
200 enum nf_nat_manip_type maniptype) 200 enum nf_nat_manip_type maniptype)
201 { 201 {
202 __be32 *var_ipp; 202 __be32 *var_ipp;
203 /* Host order */ 203 /* Host order */
204 u_int32_t minip, maxip, j; 204 u_int32_t minip, maxip, j;
205 205
206 /* No IP mapping? Do nothing. */ 206 /* No IP mapping? Do nothing. */
207 if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) 207 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
208 return; 208 return;
209 209
210 if (maniptype == IP_NAT_MANIP_SRC) 210 if (maniptype == IP_NAT_MANIP_SRC)
211 var_ipp = &tuple->src.u3.ip; 211 var_ipp = &tuple->src.u3.ip;
212 else 212 else
213 var_ipp = &tuple->dst.u3.ip; 213 var_ipp = &tuple->dst.u3.ip;
214 214
215 /* Fast path: only one choice. */ 215 /* Fast path: only one choice. */
216 if (range->min_ip == range->max_ip) { 216 if (range->min_ip == range->max_ip) {
217 *var_ipp = range->min_ip; 217 *var_ipp = range->min_ip;
218 return; 218 return;
219 } 219 }
220 220
221 /* Hashing source and destination IPs gives a fairly even 221 /* Hashing source and destination IPs gives a fairly even
222 * spread in practice (if there are a small number of IPs 222 * spread in practice (if there are a small number of IPs
223 * involved, there usually aren't that many connections 223 * involved, there usually aren't that many connections
224 * anyway). The consistency means that servers see the same 224 * anyway). The consistency means that servers see the same
225 * client coming from the same IP (some Internet Banking sites 225 * client coming from the same IP (some Internet Banking sites
226 * like this), even across reboots. */ 226 * like this), even across reboots. */
227 minip = ntohl(range->min_ip); 227 minip = ntohl(range->min_ip);
228 maxip = ntohl(range->max_ip); 228 maxip = ntohl(range->max_ip);
229 j = jhash_2words((__force u32)tuple->src.u3.ip, 229 j = jhash_2words((__force u32)tuple->src.u3.ip,
230 (__force u32)tuple->dst.u3.ip, 0); 230 (__force u32)tuple->dst.u3.ip, 0);
231 *var_ipp = htonl(minip + j % (maxip - minip + 1)); 231 *var_ipp = htonl(minip + j % (maxip - minip + 1));
232 } 232 }
233 233
234 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, 234 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
235 * we change the source to map into the range. For NF_IP_PRE_ROUTING 235 * we change the source to map into the range. For NF_IP_PRE_ROUTING
236 * and NF_IP_LOCAL_OUT, we change the destination to map into the 236 * and NF_IP_LOCAL_OUT, we change the destination to map into the
237 * range. It might not be possible to get a unique tuple, but we try. 237 * range. It might not be possible to get a unique tuple, but we try.
238 * At worst (or if we race), we will end up with a final duplicate in 238 * At worst (or if we race), we will end up with a final duplicate in
239 * __ip_conntrack_confirm and drop the packet. */ 239 * __ip_conntrack_confirm and drop the packet. */
240 static void 240 static void
241 get_unique_tuple(struct nf_conntrack_tuple *tuple, 241 get_unique_tuple(struct nf_conntrack_tuple *tuple,
242 const struct nf_conntrack_tuple *orig_tuple, 242 const struct nf_conntrack_tuple *orig_tuple,
243 const struct nf_nat_range *range, 243 const struct nf_nat_range *range,
244 struct nf_conn *ct, 244 struct nf_conn *ct,
245 enum nf_nat_manip_type maniptype) 245 enum nf_nat_manip_type maniptype)
246 { 246 {
247 struct nf_nat_protocol *proto; 247 struct nf_nat_protocol *proto;
248 248
249 /* 1) If this srcip/proto/src-proto-part is currently mapped, 249 /* 1) If this srcip/proto/src-proto-part is currently mapped,
250 and that same mapping gives a unique tuple within the given 250 and that same mapping gives a unique tuple within the given
251 range, use that. 251 range, use that.
252 252
253 This is only required for source (ie. NAT/masq) mappings. 253 This is only required for source (ie. NAT/masq) mappings.
254 So far, we don't do local source mappings, so multiple 254 So far, we don't do local source mappings, so multiple
255 manips not an issue. */ 255 manips not an issue. */
256 if (maniptype == IP_NAT_MANIP_SRC) { 256 if (maniptype == IP_NAT_MANIP_SRC) {
257 if (find_appropriate_src(orig_tuple, tuple, range)) { 257 if (find_appropriate_src(orig_tuple, tuple, range)) {
258 DEBUGP("get_unique_tuple: Found current src map\n"); 258 DEBUGP("get_unique_tuple: Found current src map\n");
259 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 259 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
260 if (!nf_nat_used_tuple(tuple, ct)) 260 if (!nf_nat_used_tuple(tuple, ct))
261 return; 261 return;
262 } 262 }
263 } 263 }
264 264
265 /* 2) Select the least-used IP/proto combination in the given 265 /* 2) Select the least-used IP/proto combination in the given
266 range. */ 266 range. */
267 *tuple = *orig_tuple; 267 *tuple = *orig_tuple;
268 find_best_ips_proto(tuple, range, ct, maniptype); 268 find_best_ips_proto(tuple, range, ct, maniptype);
269 269
270 /* 3) The per-protocol part of the manip is made to map into 270 /* 3) The per-protocol part of the manip is made to map into
271 the range to make a unique tuple. */ 271 the range to make a unique tuple. */
272 272
273 rcu_read_lock(); 273 rcu_read_lock();
274 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 274 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
275 275
276 /* Change protocol info to have some randomization */ 276 /* Change protocol info to have some randomization */
277 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { 277 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
278 proto->unique_tuple(tuple, range, maniptype, ct); 278 proto->unique_tuple(tuple, range, maniptype, ct);
279 goto out; 279 goto out;
280 } 280 }
281 281
282 /* Only bother mapping if it's not already in range and unique */ 282 /* Only bother mapping if it's not already in range and unique */
283 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 283 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
284 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 284 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
285 !nf_nat_used_tuple(tuple, ct)) 285 !nf_nat_used_tuple(tuple, ct))
286 goto out; 286 goto out;
287 287
288 /* Last change: get protocol to try to obtain unique tuple. */ 288 /* Last change: get protocol to try to obtain unique tuple. */
289 proto->unique_tuple(tuple, range, maniptype, ct); 289 proto->unique_tuple(tuple, range, maniptype, ct);
290 out: 290 out:
291 rcu_read_unlock(); 291 rcu_read_unlock();
292 } 292 }
293 293
294 unsigned int 294 unsigned int
295 nf_nat_setup_info(struct nf_conn *ct, 295 nf_nat_setup_info(struct nf_conn *ct,
296 const struct nf_nat_range *range, 296 const struct nf_nat_range *range,
297 unsigned int hooknum) 297 unsigned int hooknum)
298 { 298 {
299 struct nf_conntrack_tuple curr_tuple, new_tuple; 299 struct nf_conntrack_tuple curr_tuple, new_tuple;
300 struct nf_conn_nat *nat = nfct_nat(ct); 300 struct nf_conn_nat *nat;
301 struct nf_nat_info *info = &nat->info; 301 struct nf_nat_info *info;
302 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); 302 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
303 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 303 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
304 304
305 /* nat helper or nfctnetlink also setup binding */
306 nat = nfct_nat(ct);
307 if (!nat) {
308 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
309 if (nat == NULL) {
310 DEBUGP("failed to add NAT extension\n");
311 return NF_ACCEPT;
312 }
313 }
314
305 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || 315 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
306 hooknum == NF_IP_POST_ROUTING || 316 hooknum == NF_IP_POST_ROUTING ||
307 hooknum == NF_IP_LOCAL_IN || 317 hooknum == NF_IP_LOCAL_IN ||
308 hooknum == NF_IP_LOCAL_OUT); 318 hooknum == NF_IP_LOCAL_OUT);
309 BUG_ON(nf_nat_initialized(ct, maniptype)); 319 BUG_ON(nf_nat_initialized(ct, maniptype));
310 320
311 /* What we've got will look like inverse of reply. Normally 321 /* What we've got will look like inverse of reply. Normally
312 this is what is in the conntrack, except for prior 322 this is what is in the conntrack, except for prior
313 manipulations (future optimization: if num_manips == 0, 323 manipulations (future optimization: if num_manips == 0,
314 orig_tp = 324 orig_tp =
315 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ 325 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
316 nf_ct_invert_tuplepr(&curr_tuple, 326 nf_ct_invert_tuplepr(&curr_tuple,
317 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 327 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
318 328
319 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); 329 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
320 330
321 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { 331 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
322 struct nf_conntrack_tuple reply; 332 struct nf_conntrack_tuple reply;
323 333
324 /* Alter conntrack table so will recognize replies. */ 334 /* Alter conntrack table so will recognize replies. */
325 nf_ct_invert_tuplepr(&reply, &new_tuple); 335 nf_ct_invert_tuplepr(&reply, &new_tuple);
326 nf_conntrack_alter_reply(ct, &reply); 336 nf_conntrack_alter_reply(ct, &reply);
327 337
328 /* Non-atomic: we own this at the moment. */ 338 /* Non-atomic: we own this at the moment. */
329 if (maniptype == IP_NAT_MANIP_SRC) 339 if (maniptype == IP_NAT_MANIP_SRC)
330 ct->status |= IPS_SRC_NAT; 340 ct->status |= IPS_SRC_NAT;
331 else 341 else
332 ct->status |= IPS_DST_NAT; 342 ct->status |= IPS_DST_NAT;
333 } 343 }
334 344
335 /* Place in source hash if this is the first time. */ 345 /* Place in source hash if this is the first time. */
336 if (have_to_hash) { 346 if (have_to_hash) {
337 unsigned int srchash; 347 unsigned int srchash;
338 348
339 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 349 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
340 write_lock_bh(&nf_nat_lock); 350 write_lock_bh(&nf_nat_lock);
351 /* nf_conntrack_alter_reply might re-allocate exntension aera */
352 info = &nfct_nat(ct)->info;
341 info->ct = ct; 353 info->ct = ct;
342 list_add(&info->bysource, &bysource[srchash]); 354 list_add(&info->bysource, &bysource[srchash]);
343 write_unlock_bh(&nf_nat_lock); 355 write_unlock_bh(&nf_nat_lock);
344 } 356 }
345 357
346 /* It's done. */ 358 /* It's done. */
347 if (maniptype == IP_NAT_MANIP_DST) 359 if (maniptype == IP_NAT_MANIP_DST)
348 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 360 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
349 else 361 else
350 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 362 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
351 363
352 return NF_ACCEPT; 364 return NF_ACCEPT;
353 } 365 }
354 EXPORT_SYMBOL(nf_nat_setup_info); 366 EXPORT_SYMBOL(nf_nat_setup_info);
355 367
356 /* Returns true if succeeded. */ 368 /* Returns true if succeeded. */
357 static int 369 static int
358 manip_pkt(u_int16_t proto, 370 manip_pkt(u_int16_t proto,
359 struct sk_buff **pskb, 371 struct sk_buff **pskb,
360 unsigned int iphdroff, 372 unsigned int iphdroff,
361 const struct nf_conntrack_tuple *target, 373 const struct nf_conntrack_tuple *target,
362 enum nf_nat_manip_type maniptype) 374 enum nf_nat_manip_type maniptype)
363 { 375 {
364 struct iphdr *iph; 376 struct iphdr *iph;
365 struct nf_nat_protocol *p; 377 struct nf_nat_protocol *p;
366 378
367 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) 379 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
368 return 0; 380 return 0;
369 381
370 iph = (void *)(*pskb)->data + iphdroff; 382 iph = (void *)(*pskb)->data + iphdroff;
371 383
372 /* Manipulate protcol part. */ 384 /* Manipulate protcol part. */
373 385
374 /* rcu_read_lock()ed by nf_hook_slow */ 386 /* rcu_read_lock()ed by nf_hook_slow */
375 p = __nf_nat_proto_find(proto); 387 p = __nf_nat_proto_find(proto);
376 if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) 388 if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
377 return 0; 389 return 0;
378 390
379 iph = (void *)(*pskb)->data + iphdroff; 391 iph = (void *)(*pskb)->data + iphdroff;
380 392
381 if (maniptype == IP_NAT_MANIP_SRC) { 393 if (maniptype == IP_NAT_MANIP_SRC) {
382 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 394 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
383 iph->saddr = target->src.u3.ip; 395 iph->saddr = target->src.u3.ip;
384 } else { 396 } else {
385 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 397 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
386 iph->daddr = target->dst.u3.ip; 398 iph->daddr = target->dst.u3.ip;
387 } 399 }
388 return 1; 400 return 1;
389 } 401 }
390 402
391 /* Do packet manipulations according to nf_nat_setup_info. */ 403 /* Do packet manipulations according to nf_nat_setup_info. */
392 unsigned int nf_nat_packet(struct nf_conn *ct, 404 unsigned int nf_nat_packet(struct nf_conn *ct,
393 enum ip_conntrack_info ctinfo, 405 enum ip_conntrack_info ctinfo,
394 unsigned int hooknum, 406 unsigned int hooknum,
395 struct sk_buff **pskb) 407 struct sk_buff **pskb)
396 { 408 {
397 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 409 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
398 unsigned long statusbit; 410 unsigned long statusbit;
399 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); 411 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
400 412
401 if (mtype == IP_NAT_MANIP_SRC) 413 if (mtype == IP_NAT_MANIP_SRC)
402 statusbit = IPS_SRC_NAT; 414 statusbit = IPS_SRC_NAT;
403 else 415 else
404 statusbit = IPS_DST_NAT; 416 statusbit = IPS_DST_NAT;
405 417
406 /* Invert if this is reply dir. */ 418 /* Invert if this is reply dir. */
407 if (dir == IP_CT_DIR_REPLY) 419 if (dir == IP_CT_DIR_REPLY)
408 statusbit ^= IPS_NAT_MASK; 420 statusbit ^= IPS_NAT_MASK;
409 421
410 /* Non-atomic: these bits don't change. */ 422 /* Non-atomic: these bits don't change. */
411 if (ct->status & statusbit) { 423 if (ct->status & statusbit) {
412 struct nf_conntrack_tuple target; 424 struct nf_conntrack_tuple target;
413 425
414 /* We are aiming to look like inverse of other direction. */ 426 /* We are aiming to look like inverse of other direction. */
415 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 427 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
416 428
417 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) 429 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
418 return NF_DROP; 430 return NF_DROP;
419 } 431 }
420 return NF_ACCEPT; 432 return NF_ACCEPT;
421 } 433 }
422 EXPORT_SYMBOL_GPL(nf_nat_packet); 434 EXPORT_SYMBOL_GPL(nf_nat_packet);
423 435
424 /* Dir is direction ICMP is coming from (opposite to packet it contains) */ 436 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
425 int nf_nat_icmp_reply_translation(struct nf_conn *ct, 437 int nf_nat_icmp_reply_translation(struct nf_conn *ct,
426 enum ip_conntrack_info ctinfo, 438 enum ip_conntrack_info ctinfo,
427 unsigned int hooknum, 439 unsigned int hooknum,
428 struct sk_buff **pskb) 440 struct sk_buff **pskb)
429 { 441 {
430 struct { 442 struct {
431 struct icmphdr icmp; 443 struct icmphdr icmp;
432 struct iphdr ip; 444 struct iphdr ip;
433 } *inside; 445 } *inside;
434 struct nf_conntrack_l4proto *l4proto; 446 struct nf_conntrack_l4proto *l4proto;
435 struct nf_conntrack_tuple inner, target; 447 struct nf_conntrack_tuple inner, target;
436 int hdrlen = ip_hdrlen(*pskb); 448 int hdrlen = ip_hdrlen(*pskb);
437 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 449 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
438 unsigned long statusbit; 450 unsigned long statusbit;
439 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 451 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
440 452
441 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) 453 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
442 return 0; 454 return 0;
443 455
444 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); 456 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
445 457
446 /* We're actually going to mangle it beyond trivial checksum 458 /* We're actually going to mangle it beyond trivial checksum
447 adjustment, so make sure the current checksum is correct. */ 459 adjustment, so make sure the current checksum is correct. */
448 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) 460 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
449 return 0; 461 return 0;
450 462
451 /* Must be RELATED */ 463 /* Must be RELATED */
452 NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || 464 NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
453 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); 465 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
454 466
455 /* Redirects on non-null nats must be dropped, else they'll 467 /* Redirects on non-null nats must be dropped, else they'll
456 start talking to each other without our translation, and be 468 start talking to each other without our translation, and be
457 confused... --RR */ 469 confused... --RR */
458 if (inside->icmp.type == ICMP_REDIRECT) { 470 if (inside->icmp.type == ICMP_REDIRECT) {
459 /* If NAT isn't finished, assume it and drop. */ 471 /* If NAT isn't finished, assume it and drop. */
460 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 472 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
461 return 0; 473 return 0;
462 474
463 if (ct->status & IPS_NAT_MASK) 475 if (ct->status & IPS_NAT_MASK)
464 return 0; 476 return 0;
465 } 477 }
466 478
467 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", 479 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
468 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 480 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
469 481
470 /* rcu_read_lock()ed by nf_hook_slow */ 482 /* rcu_read_lock()ed by nf_hook_slow */
471 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 483 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
472 484
473 if (!nf_ct_get_tuple(*pskb, 485 if (!nf_ct_get_tuple(*pskb,
474 ip_hdrlen(*pskb) + sizeof(struct icmphdr), 486 ip_hdrlen(*pskb) + sizeof(struct icmphdr),
475 (ip_hdrlen(*pskb) + 487 (ip_hdrlen(*pskb) +
476 sizeof(struct icmphdr) + inside->ip.ihl * 4), 488 sizeof(struct icmphdr) + inside->ip.ihl * 4),
477 (u_int16_t)AF_INET, 489 (u_int16_t)AF_INET,
478 inside->ip.protocol, 490 inside->ip.protocol,
479 &inner, l3proto, l4proto)) 491 &inner, l3proto, l4proto))
480 return 0; 492 return 0;
481 493
482 /* Change inner back to look like incoming packet. We do the 494 /* Change inner back to look like incoming packet. We do the
483 opposite manip on this hook to normal, because it might not 495 opposite manip on this hook to normal, because it might not
484 pass all hooks (locally-generated ICMP). Consider incoming 496 pass all hooks (locally-generated ICMP). Consider incoming
485 packet: PREROUTING (DST manip), routing produces ICMP, goes 497 packet: PREROUTING (DST manip), routing produces ICMP, goes
486 through POSTROUTING (which must correct the DST manip). */ 498 through POSTROUTING (which must correct the DST manip). */
487 if (!manip_pkt(inside->ip.protocol, pskb, 499 if (!manip_pkt(inside->ip.protocol, pskb,
488 ip_hdrlen(*pskb) + sizeof(inside->icmp), 500 ip_hdrlen(*pskb) + sizeof(inside->icmp),
489 &ct->tuplehash[!dir].tuple, 501 &ct->tuplehash[!dir].tuple,
490 !manip)) 502 !manip))
491 return 0; 503 return 0;
492 504
493 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { 505 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
494 /* Reloading "inside" here since manip_pkt inner. */ 506 /* Reloading "inside" here since manip_pkt inner. */
495 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); 507 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
496 inside->icmp.checksum = 0; 508 inside->icmp.checksum = 0;
497 inside->icmp.checksum = 509 inside->icmp.checksum =
498 csum_fold(skb_checksum(*pskb, hdrlen, 510 csum_fold(skb_checksum(*pskb, hdrlen,
499 (*pskb)->len - hdrlen, 0)); 511 (*pskb)->len - hdrlen, 0));
500 } 512 }
501 513
502 /* Change outer to look the reply to an incoming packet 514 /* Change outer to look the reply to an incoming packet
503 * (proto 0 means don't invert per-proto part). */ 515 * (proto 0 means don't invert per-proto part). */
504 if (manip == IP_NAT_MANIP_SRC) 516 if (manip == IP_NAT_MANIP_SRC)
505 statusbit = IPS_SRC_NAT; 517 statusbit = IPS_SRC_NAT;
506 else 518 else
507 statusbit = IPS_DST_NAT; 519 statusbit = IPS_DST_NAT;
508 520
509 /* Invert if this is reply dir. */ 521 /* Invert if this is reply dir. */
510 if (dir == IP_CT_DIR_REPLY) 522 if (dir == IP_CT_DIR_REPLY)
511 statusbit ^= IPS_NAT_MASK; 523 statusbit ^= IPS_NAT_MASK;
512 524
513 if (ct->status & statusbit) { 525 if (ct->status & statusbit) {
514 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 526 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
515 if (!manip_pkt(0, pskb, 0, &target, manip)) 527 if (!manip_pkt(0, pskb, 0, &target, manip))
516 return 0; 528 return 0;
517 } 529 }
518 530
519 return 1; 531 return 1;
520 } 532 }
521 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 533 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
522 534
523 /* Protocol registration. */ 535 /* Protocol registration. */
524 int nf_nat_protocol_register(struct nf_nat_protocol *proto) 536 int nf_nat_protocol_register(struct nf_nat_protocol *proto)
525 { 537 {
526 int ret = 0; 538 int ret = 0;
527 539
528 write_lock_bh(&nf_nat_lock); 540 write_lock_bh(&nf_nat_lock);
529 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 541 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
530 ret = -EBUSY; 542 ret = -EBUSY;
531 goto out; 543 goto out;
532 } 544 }
533 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 545 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
534 out: 546 out:
535 write_unlock_bh(&nf_nat_lock); 547 write_unlock_bh(&nf_nat_lock);
536 return ret; 548 return ret;
537 } 549 }
538 EXPORT_SYMBOL(nf_nat_protocol_register); 550 EXPORT_SYMBOL(nf_nat_protocol_register);
539 551
540 /* Noone stores the protocol anywhere; simply delete it. */ 552 /* Noone stores the protocol anywhere; simply delete it. */
541 void nf_nat_protocol_unregister(struct nf_nat_protocol *proto) 553 void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
542 { 554 {
543 write_lock_bh(&nf_nat_lock); 555 write_lock_bh(&nf_nat_lock);
544 rcu_assign_pointer(nf_nat_protos[proto->protonum], 556 rcu_assign_pointer(nf_nat_protos[proto->protonum],
545 &nf_nat_unknown_protocol); 557 &nf_nat_unknown_protocol);
546 write_unlock_bh(&nf_nat_lock); 558 write_unlock_bh(&nf_nat_lock);
547 synchronize_rcu(); 559 synchronize_rcu();
548 } 560 }
549 EXPORT_SYMBOL(nf_nat_protocol_unregister); 561 EXPORT_SYMBOL(nf_nat_protocol_unregister);
550 562
551 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 563 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
552 int 564 int
553 nf_nat_port_range_to_nfattr(struct sk_buff *skb, 565 nf_nat_port_range_to_nfattr(struct sk_buff *skb,
554 const struct nf_nat_range *range) 566 const struct nf_nat_range *range)
555 { 567 {
556 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), 568 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
557 &range->min.tcp.port); 569 &range->min.tcp.port);
558 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), 570 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
559 &range->max.tcp.port); 571 &range->max.tcp.port);
560 572
561 return 0; 573 return 0;
562 574
563 nfattr_failure: 575 nfattr_failure:
564 return -1; 576 return -1;
565 } 577 }
566 EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range); 578 EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range);
567 579
568 int 580 int
569 nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range) 581 nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
570 { 582 {
571 int ret = 0; 583 int ret = 0;
572 584
573 /* we have to return whether we actually parsed something or not */ 585 /* we have to return whether we actually parsed something or not */
574 586
575 if (tb[CTA_PROTONAT_PORT_MIN-1]) { 587 if (tb[CTA_PROTONAT_PORT_MIN-1]) {
576 ret = 1; 588 ret = 1;
577 range->min.tcp.port = 589 range->min.tcp.port =
578 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); 590 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
579 } 591 }
580 592
581 if (!tb[CTA_PROTONAT_PORT_MAX-1]) { 593 if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
582 if (ret) 594 if (ret)
583 range->max.tcp.port = range->min.tcp.port; 595 range->max.tcp.port = range->min.tcp.port;
584 } else { 596 } else {
585 ret = 1; 597 ret = 1;
586 range->max.tcp.port = 598 range->max.tcp.port =
587 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); 599 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
588 } 600 }
589 601
590 return ret; 602 return ret;
591 } 603 }
592 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr); 604 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
593 #endif 605 #endif
594 606
607 static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
608 {
609 struct nf_conn_nat *new_nat = nf_ct_ext_find(conntrack, NF_CT_EXT_NAT);
610 struct nf_conn_nat *old_nat = (struct nf_conn_nat *)old;
611 struct nf_conn *ct = old_nat->info.ct;
612 unsigned int srchash;
613
614 if (!(ct->status & IPS_NAT_DONE_MASK))
615 return;
616
617 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
618
619 write_lock_bh(&nf_nat_lock);
620 list_replace(&old_nat->info.bysource, &new_nat->info.bysource);
621 new_nat->info.ct = ct;
622 write_unlock_bh(&nf_nat_lock);
623 }
624
625 struct nf_ct_ext_type nat_extend = {
626 .len = sizeof(struct nf_conn_nat),
627 .align = __alignof__(struct nf_conn_nat),
628 .move = nf_nat_move_storage,
629 .id = NF_CT_EXT_NAT,
630 .flags = NF_CT_EXT_F_PREALLOC,
631 };
632
595 static int __init nf_nat_init(void) 633 static int __init nf_nat_init(void)
596 { 634 {
597 size_t i; 635 size_t i;
636 int ret;
598 637
638 ret = nf_ct_extend_register(&nat_extend);
639 if (ret < 0) {
640 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
641 return ret;
642 }
643
599 /* Leave them the same for the moment. */ 644 /* Leave them the same for the moment. */
600 nf_nat_htable_size = nf_conntrack_htable_size; 645 nf_nat_htable_size = nf_conntrack_htable_size;
601 646
602 /* One vmalloc for both hash tables */ 647 /* One vmalloc for both hash tables */
603 bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size); 648 bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
604 if (!bysource) 649 if (!bysource) {
605 return -ENOMEM; 650 ret = -ENOMEM;
651 goto cleanup_extend;
652 }
606 653
607 /* Sew in builtin protocols. */ 654 /* Sew in builtin protocols. */
608 write_lock_bh(&nf_nat_lock); 655 write_lock_bh(&nf_nat_lock);
609 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 656 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
610 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 657 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
611 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 658 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
612 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 659 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
613 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 660 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
614 write_unlock_bh(&nf_nat_lock); 661 write_unlock_bh(&nf_nat_lock);
615 662
616 for (i = 0; i < nf_nat_htable_size; i++) { 663 for (i = 0; i < nf_nat_htable_size; i++) {
617 INIT_LIST_HEAD(&bysource[i]); 664 INIT_LIST_HEAD(&bysource[i]);
618 } 665 }
619 666
620 /* FIXME: Man, this is a hack. <SIGH> */ 667 /* FIXME: Man, this is a hack. <SIGH> */
621 NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL); 668 NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
622 rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack); 669 rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
623 670
624 /* Initialize fake conntrack so that NAT will skip it */ 671 /* Initialize fake conntrack so that NAT will skip it */
625 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 672 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
626 673
627 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 674 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
628 return 0; 675 return 0;
676
677 cleanup_extend:
678 nf_ct_extend_unregister(&nat_extend);
679 return ret;
629 } 680 }
630 681
631 /* Clear NAT section of all conntracks, in case we're loaded again. */ 682 /* Clear NAT section of all conntracks, in case we're loaded again. */
632 static int clean_nat(struct nf_conn *i, void *data) 683 static int clean_nat(struct nf_conn *i, void *data)
633 { 684 {
634 struct nf_conn_nat *nat = nfct_nat(i); 685 struct nf_conn_nat *nat = nfct_nat(i);
635 686
636 if (!nat) 687 if (!nat)
637 return 0; 688 return 0;
638 memset(nat, 0, sizeof(nat)); 689 memset(nat, 0, sizeof(nat));
639 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); 690 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
640 return 0; 691 return 0;
641 } 692 }
642 693
643 static void __exit nf_nat_cleanup(void) 694 static void __exit nf_nat_cleanup(void)
644 { 695 {
645 nf_ct_iterate_cleanup(&clean_nat, NULL); 696 nf_ct_iterate_cleanup(&clean_nat, NULL);
646 rcu_assign_pointer(nf_conntrack_destroyed, NULL); 697 rcu_assign_pointer(nf_conntrack_destroyed, NULL);
647 synchronize_rcu(); 698 synchronize_rcu();
648 vfree(bysource); 699 vfree(bysource);
649 nf_ct_l3proto_put(l3proto); 700 nf_ct_l3proto_put(l3proto);
701 nf_ct_extend_unregister(&nat_extend);
650 } 702 }
651 703
652 MODULE_LICENSE("GPL"); 704 MODULE_LICENSE("GPL");
653 705
654 module_init(nf_nat_init); 706 module_init(nf_nat_init);
655 module_exit(nf_nat_cleanup); 707 module_exit(nf_nat_cleanup);
656 708
net/ipv4/netfilter/nf_nat_standalone.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8 #include <linux/types.h> 8 #include <linux/types.h>
9 #include <linux/icmp.h> 9 #include <linux/icmp.h>
10 #include <linux/ip.h> 10 #include <linux/ip.h>
11 #include <linux/netfilter.h> 11 #include <linux/netfilter.h>
12 #include <linux/netfilter_ipv4.h> 12 #include <linux/netfilter_ipv4.h>
13 #include <linux/module.h> 13 #include <linux/module.h>
14 #include <linux/skbuff.h> 14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h> 15 #include <linux/proc_fs.h>
16 #include <net/ip.h> 16 #include <net/ip.h>
17 #include <net/checksum.h> 17 #include <net/checksum.h>
18 #include <linux/spinlock.h> 18 #include <linux/spinlock.h>
19 19
20 #include <net/netfilter/nf_conntrack.h> 20 #include <net/netfilter/nf_conntrack.h>
21 #include <net/netfilter/nf_conntrack_core.h> 21 #include <net/netfilter/nf_conntrack_core.h>
22 #include <net/netfilter/nf_conntrack_extend.h>
22 #include <net/netfilter/nf_nat.h> 23 #include <net/netfilter/nf_nat.h>
23 #include <net/netfilter/nf_nat_rule.h> 24 #include <net/netfilter/nf_nat_rule.h>
24 #include <net/netfilter/nf_nat_protocol.h> 25 #include <net/netfilter/nf_nat_protocol.h>
25 #include <net/netfilter/nf_nat_core.h> 26 #include <net/netfilter/nf_nat_core.h>
26 #include <net/netfilter/nf_nat_helper.h> 27 #include <net/netfilter/nf_nat_helper.h>
27 #include <linux/netfilter_ipv4/ip_tables.h> 28 #include <linux/netfilter_ipv4/ip_tables.h>
28 29
29 #if 0 30 #if 0
30 #define DEBUGP printk 31 #define DEBUGP printk
31 #else 32 #else
32 #define DEBUGP(format, args...) 33 #define DEBUGP(format, args...)
33 #endif 34 #endif
34 35
35 #ifdef CONFIG_XFRM 36 #ifdef CONFIG_XFRM
36 static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) 37 static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
37 { 38 {
38 struct nf_conn *ct; 39 struct nf_conn *ct;
39 struct nf_conntrack_tuple *t; 40 struct nf_conntrack_tuple *t;
40 enum ip_conntrack_info ctinfo; 41 enum ip_conntrack_info ctinfo;
41 enum ip_conntrack_dir dir; 42 enum ip_conntrack_dir dir;
42 unsigned long statusbit; 43 unsigned long statusbit;
43 44
44 ct = nf_ct_get(skb, &ctinfo); 45 ct = nf_ct_get(skb, &ctinfo);
45 if (ct == NULL) 46 if (ct == NULL)
46 return; 47 return;
47 dir = CTINFO2DIR(ctinfo); 48 dir = CTINFO2DIR(ctinfo);
48 t = &ct->tuplehash[dir].tuple; 49 t = &ct->tuplehash[dir].tuple;
49 50
50 if (dir == IP_CT_DIR_ORIGINAL) 51 if (dir == IP_CT_DIR_ORIGINAL)
51 statusbit = IPS_DST_NAT; 52 statusbit = IPS_DST_NAT;
52 else 53 else
53 statusbit = IPS_SRC_NAT; 54 statusbit = IPS_SRC_NAT;
54 55
55 if (ct->status & statusbit) { 56 if (ct->status & statusbit) {
56 fl->fl4_dst = t->dst.u3.ip; 57 fl->fl4_dst = t->dst.u3.ip;
57 if (t->dst.protonum == IPPROTO_TCP || 58 if (t->dst.protonum == IPPROTO_TCP ||
58 t->dst.protonum == IPPROTO_UDP) 59 t->dst.protonum == IPPROTO_UDP)
59 fl->fl_ip_dport = t->dst.u.tcp.port; 60 fl->fl_ip_dport = t->dst.u.tcp.port;
60 } 61 }
61 62
62 statusbit ^= IPS_NAT_MASK; 63 statusbit ^= IPS_NAT_MASK;
63 64
64 if (ct->status & statusbit) { 65 if (ct->status & statusbit) {
65 fl->fl4_src = t->src.u3.ip; 66 fl->fl4_src = t->src.u3.ip;
66 if (t->dst.protonum == IPPROTO_TCP || 67 if (t->dst.protonum == IPPROTO_TCP ||
67 t->dst.protonum == IPPROTO_UDP) 68 t->dst.protonum == IPPROTO_UDP)
68 fl->fl_ip_sport = t->src.u.tcp.port; 69 fl->fl_ip_sport = t->src.u.tcp.port;
69 } 70 }
70 } 71 }
71 #endif 72 #endif
72 73
73 static unsigned int 74 static unsigned int
74 nf_nat_fn(unsigned int hooknum, 75 nf_nat_fn(unsigned int hooknum,
75 struct sk_buff **pskb, 76 struct sk_buff **pskb,
76 const struct net_device *in, 77 const struct net_device *in,
77 const struct net_device *out, 78 const struct net_device *out,
78 int (*okfn)(struct sk_buff *)) 79 int (*okfn)(struct sk_buff *))
79 { 80 {
80 struct nf_conn *ct; 81 struct nf_conn *ct;
81 enum ip_conntrack_info ctinfo; 82 enum ip_conntrack_info ctinfo;
82 struct nf_conn_nat *nat; 83 struct nf_conn_nat *nat;
83 /* maniptype == SRC for postrouting. */ 84 /* maniptype == SRC for postrouting. */
84 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 85 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
85 86
86 /* We never see fragments: conntrack defrags on pre-routing 87 /* We never see fragments: conntrack defrags on pre-routing
87 and local-out, and nf_nat_out protects post-routing. */ 88 and local-out, and nf_nat_out protects post-routing. */
88 NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); 89 NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
89 90
90 ct = nf_ct_get(*pskb, &ctinfo); 91 ct = nf_ct_get(*pskb, &ctinfo);
91 /* Can't track? It's not due to stress, or conntrack would 92 /* Can't track? It's not due to stress, or conntrack would
92 have dropped it. Hence it's the user's responsibilty to 93 have dropped it. Hence it's the user's responsibilty to
93 packet filter it out, or implement conntrack/NAT for that 94 packet filter it out, or implement conntrack/NAT for that
94 protocol. 8) --RR */ 95 protocol. 8) --RR */
95 if (!ct) { 96 if (!ct) {
96 /* Exception: ICMP redirect to new connection (not in 97 /* Exception: ICMP redirect to new connection (not in
97 hash table yet). We must not let this through, in 98 hash table yet). We must not let this through, in
98 case we're doing NAT to the same network. */ 99 case we're doing NAT to the same network. */
99 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { 100 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
100 struct icmphdr _hdr, *hp; 101 struct icmphdr _hdr, *hp;
101 102
102 hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), 103 hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
103 sizeof(_hdr), &_hdr); 104 sizeof(_hdr), &_hdr);
104 if (hp != NULL && 105 if (hp != NULL &&
105 hp->type == ICMP_REDIRECT) 106 hp->type == ICMP_REDIRECT)
106 return NF_DROP; 107 return NF_DROP;
107 } 108 }
108 return NF_ACCEPT; 109 return NF_ACCEPT;
109 } 110 }
110 111
111 /* Don't try to NAT if this packet is not conntracked */ 112 /* Don't try to NAT if this packet is not conntracked */
112 if (ct == &nf_conntrack_untracked) 113 if (ct == &nf_conntrack_untracked)
113 return NF_ACCEPT; 114 return NF_ACCEPT;
114 115
115 nat = nfct_nat(ct); 116 nat = nfct_nat(ct);
116 if (!nat) 117 if (!nat) {
117 return NF_ACCEPT; 118 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
119 if (nat == NULL) {
120 DEBUGP("failed to add NAT extension\n");
121 return NF_ACCEPT;
122 }
123 }
118 124
119 switch (ctinfo) { 125 switch (ctinfo) {
120 case IP_CT_RELATED: 126 case IP_CT_RELATED:
121 case IP_CT_RELATED+IP_CT_IS_REPLY: 127 case IP_CT_RELATED+IP_CT_IS_REPLY:
122 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { 128 if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
123 if (!nf_nat_icmp_reply_translation(ct, ctinfo, 129 if (!nf_nat_icmp_reply_translation(ct, ctinfo,
124 hooknum, pskb)) 130 hooknum, pskb))
125 return NF_DROP; 131 return NF_DROP;
126 else 132 else
127 return NF_ACCEPT; 133 return NF_ACCEPT;
128 } 134 }
129 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 135 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
130 case IP_CT_NEW: 136 case IP_CT_NEW:
131 137
132 /* Seen it before? This can happen for loopback, retrans, 138 /* Seen it before? This can happen for loopback, retrans,
133 or local packets.. */ 139 or local packets.. */
134 if (!nf_nat_initialized(ct, maniptype)) { 140 if (!nf_nat_initialized(ct, maniptype)) {
135 unsigned int ret; 141 unsigned int ret;
136 142
137 if (unlikely(nf_ct_is_confirmed(ct))) 143 if (unlikely(nf_ct_is_confirmed(ct)))
138 /* NAT module was loaded late */ 144 /* NAT module was loaded late */
139 ret = alloc_null_binding_confirmed(ct, hooknum); 145 ret = alloc_null_binding_confirmed(ct, hooknum);
140 else if (hooknum == NF_IP_LOCAL_IN) 146 else if (hooknum == NF_IP_LOCAL_IN)
141 /* LOCAL_IN hook doesn't have a chain! */ 147 /* LOCAL_IN hook doesn't have a chain! */
142 ret = alloc_null_binding(ct, hooknum); 148 ret = alloc_null_binding(ct, hooknum);
143 else 149 else
144 ret = nf_nat_rule_find(pskb, hooknum, in, out, 150 ret = nf_nat_rule_find(pskb, hooknum, in, out,
145 ct); 151 ct);
146 152
147 if (ret != NF_ACCEPT) { 153 if (ret != NF_ACCEPT) {
148 return ret; 154 return ret;
149 } 155 }
150 } else 156 } else
151 DEBUGP("Already setup manip %s for ct %p\n", 157 DEBUGP("Already setup manip %s for ct %p\n",
152 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", 158 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
153 ct); 159 ct);
154 break; 160 break;
155 161
156 default: 162 default:
157 /* ESTABLISHED */ 163 /* ESTABLISHED */
158 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 164 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
159 ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); 165 ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
160 } 166 }
161 167
162 return nf_nat_packet(ct, ctinfo, hooknum, pskb); 168 return nf_nat_packet(ct, ctinfo, hooknum, pskb);
163 } 169 }
164 170
165 static unsigned int 171 static unsigned int
166 nf_nat_in(unsigned int hooknum, 172 nf_nat_in(unsigned int hooknum,
167 struct sk_buff **pskb, 173 struct sk_buff **pskb,
168 const struct net_device *in, 174 const struct net_device *in,
169 const struct net_device *out, 175 const struct net_device *out,
170 int (*okfn)(struct sk_buff *)) 176 int (*okfn)(struct sk_buff *))
171 { 177 {
172 unsigned int ret; 178 unsigned int ret;
173 __be32 daddr = ip_hdr(*pskb)->daddr; 179 __be32 daddr = ip_hdr(*pskb)->daddr;
174 180
175 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 181 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
176 if (ret != NF_DROP && ret != NF_STOLEN && 182 if (ret != NF_DROP && ret != NF_STOLEN &&
177 daddr != ip_hdr(*pskb)->daddr) { 183 daddr != ip_hdr(*pskb)->daddr) {
178 dst_release((*pskb)->dst); 184 dst_release((*pskb)->dst);
179 (*pskb)->dst = NULL; 185 (*pskb)->dst = NULL;
180 } 186 }
181 return ret; 187 return ret;
182 } 188 }
183 189
184 static unsigned int 190 static unsigned int
185 nf_nat_out(unsigned int hooknum, 191 nf_nat_out(unsigned int hooknum,
186 struct sk_buff **pskb, 192 struct sk_buff **pskb,
187 const struct net_device *in, 193 const struct net_device *in,
188 const struct net_device *out, 194 const struct net_device *out,
189 int (*okfn)(struct sk_buff *)) 195 int (*okfn)(struct sk_buff *))
190 { 196 {
191 #ifdef CONFIG_XFRM 197 #ifdef CONFIG_XFRM
192 struct nf_conn *ct; 198 struct nf_conn *ct;
193 enum ip_conntrack_info ctinfo; 199 enum ip_conntrack_info ctinfo;
194 #endif 200 #endif
195 unsigned int ret; 201 unsigned int ret;
196 202
197 /* root is playing with raw sockets. */ 203 /* root is playing with raw sockets. */
198 if ((*pskb)->len < sizeof(struct iphdr) || 204 if ((*pskb)->len < sizeof(struct iphdr) ||
199 ip_hdrlen(*pskb) < sizeof(struct iphdr)) 205 ip_hdrlen(*pskb) < sizeof(struct iphdr))
200 return NF_ACCEPT; 206 return NF_ACCEPT;
201 207
202 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 208 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
203 #ifdef CONFIG_XFRM 209 #ifdef CONFIG_XFRM
204 if (ret != NF_DROP && ret != NF_STOLEN && 210 if (ret != NF_DROP && ret != NF_STOLEN &&
205 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { 211 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
206 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 212 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
207 213
208 if (ct->tuplehash[dir].tuple.src.u3.ip != 214 if (ct->tuplehash[dir].tuple.src.u3.ip !=
209 ct->tuplehash[!dir].tuple.dst.u3.ip 215 ct->tuplehash[!dir].tuple.dst.u3.ip
210 || ct->tuplehash[dir].tuple.src.u.all != 216 || ct->tuplehash[dir].tuple.src.u.all !=
211 ct->tuplehash[!dir].tuple.dst.u.all 217 ct->tuplehash[!dir].tuple.dst.u.all
212 ) 218 )
213 return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; 219 return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
214 } 220 }
215 #endif 221 #endif
216 return ret; 222 return ret;
217 } 223 }
218 224
219 static unsigned int 225 static unsigned int
220 nf_nat_local_fn(unsigned int hooknum, 226 nf_nat_local_fn(unsigned int hooknum,
221 struct sk_buff **pskb, 227 struct sk_buff **pskb,
222 const struct net_device *in, 228 const struct net_device *in,
223 const struct net_device *out, 229 const struct net_device *out,
224 int (*okfn)(struct sk_buff *)) 230 int (*okfn)(struct sk_buff *))
225 { 231 {
226 struct nf_conn *ct; 232 struct nf_conn *ct;
227 enum ip_conntrack_info ctinfo; 233 enum ip_conntrack_info ctinfo;
228 unsigned int ret; 234 unsigned int ret;
229 235
230 /* root is playing with raw sockets. */ 236 /* root is playing with raw sockets. */
231 if ((*pskb)->len < sizeof(struct iphdr) || 237 if ((*pskb)->len < sizeof(struct iphdr) ||
232 ip_hdrlen(*pskb) < sizeof(struct iphdr)) 238 ip_hdrlen(*pskb) < sizeof(struct iphdr))
233 return NF_ACCEPT; 239 return NF_ACCEPT;
234 240
235 ret = nf_nat_fn(hooknum, pskb, in, out, okfn); 241 ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
236 if (ret != NF_DROP && ret != NF_STOLEN && 242 if (ret != NF_DROP && ret != NF_STOLEN &&
237 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { 243 (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
238 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 244 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
239 245
240 if (ct->tuplehash[dir].tuple.dst.u3.ip != 246 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
241 ct->tuplehash[!dir].tuple.src.u3.ip) { 247 ct->tuplehash[!dir].tuple.src.u3.ip) {
242 if (ip_route_me_harder(pskb, RTN_UNSPEC)) 248 if (ip_route_me_harder(pskb, RTN_UNSPEC))
243 ret = NF_DROP; 249 ret = NF_DROP;
244 } 250 }
245 #ifdef CONFIG_XFRM 251 #ifdef CONFIG_XFRM
246 else if (ct->tuplehash[dir].tuple.dst.u.all != 252 else if (ct->tuplehash[dir].tuple.dst.u.all !=
247 ct->tuplehash[!dir].tuple.src.u.all) 253 ct->tuplehash[!dir].tuple.src.u.all)
248 if (ip_xfrm_me_harder(pskb)) 254 if (ip_xfrm_me_harder(pskb))
249 ret = NF_DROP; 255 ret = NF_DROP;
250 #endif 256 #endif
251 } 257 }
252 return ret; 258 return ret;
253 } 259 }
254 260
255 static unsigned int 261 static unsigned int
256 nf_nat_adjust(unsigned int hooknum, 262 nf_nat_adjust(unsigned int hooknum,
257 struct sk_buff **pskb, 263 struct sk_buff **pskb,
258 const struct net_device *in, 264 const struct net_device *in,
259 const struct net_device *out, 265 const struct net_device *out,
260 int (*okfn)(struct sk_buff *)) 266 int (*okfn)(struct sk_buff *))
261 { 267 {
262 struct nf_conn *ct; 268 struct nf_conn *ct;
263 enum ip_conntrack_info ctinfo; 269 enum ip_conntrack_info ctinfo;
264 270
265 ct = nf_ct_get(*pskb, &ctinfo); 271 ct = nf_ct_get(*pskb, &ctinfo);
266 if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { 272 if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
267 DEBUGP("nf_nat_standalone: adjusting sequence number\n"); 273 DEBUGP("nf_nat_standalone: adjusting sequence number\n");
268 if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) 274 if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
269 return NF_DROP; 275 return NF_DROP;
270 } 276 }
271 return NF_ACCEPT; 277 return NF_ACCEPT;
272 } 278 }
273 279
274 /* We must be after connection tracking and before packet filtering. */ 280 /* We must be after connection tracking and before packet filtering. */
275 281
276 static struct nf_hook_ops nf_nat_ops[] = { 282 static struct nf_hook_ops nf_nat_ops[] = {
277 /* Before packet filtering, change destination */ 283 /* Before packet filtering, change destination */
278 { 284 {
279 .hook = nf_nat_in, 285 .hook = nf_nat_in,
280 .owner = THIS_MODULE, 286 .owner = THIS_MODULE,
281 .pf = PF_INET, 287 .pf = PF_INET,
282 .hooknum = NF_IP_PRE_ROUTING, 288 .hooknum = NF_IP_PRE_ROUTING,
283 .priority = NF_IP_PRI_NAT_DST, 289 .priority = NF_IP_PRI_NAT_DST,
284 }, 290 },
285 /* After packet filtering, change source */ 291 /* After packet filtering, change source */
286 { 292 {
287 .hook = nf_nat_out, 293 .hook = nf_nat_out,
288 .owner = THIS_MODULE, 294 .owner = THIS_MODULE,
289 .pf = PF_INET, 295 .pf = PF_INET,
290 .hooknum = NF_IP_POST_ROUTING, 296 .hooknum = NF_IP_POST_ROUTING,
291 .priority = NF_IP_PRI_NAT_SRC, 297 .priority = NF_IP_PRI_NAT_SRC,
292 }, 298 },
293 /* After conntrack, adjust sequence number */ 299 /* After conntrack, adjust sequence number */
294 { 300 {
295 .hook = nf_nat_adjust, 301 .hook = nf_nat_adjust,
296 .owner = THIS_MODULE, 302 .owner = THIS_MODULE,
297 .pf = PF_INET, 303 .pf = PF_INET,
298 .hooknum = NF_IP_POST_ROUTING, 304 .hooknum = NF_IP_POST_ROUTING,
299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 305 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
300 }, 306 },
301 /* Before packet filtering, change destination */ 307 /* Before packet filtering, change destination */
302 { 308 {
303 .hook = nf_nat_local_fn, 309 .hook = nf_nat_local_fn,
304 .owner = THIS_MODULE, 310 .owner = THIS_MODULE,
305 .pf = PF_INET, 311 .pf = PF_INET,
306 .hooknum = NF_IP_LOCAL_OUT, 312 .hooknum = NF_IP_LOCAL_OUT,
307 .priority = NF_IP_PRI_NAT_DST, 313 .priority = NF_IP_PRI_NAT_DST,
308 }, 314 },
309 /* After packet filtering, change source */ 315 /* After packet filtering, change source */
310 { 316 {
311 .hook = nf_nat_fn, 317 .hook = nf_nat_fn,
312 .owner = THIS_MODULE, 318 .owner = THIS_MODULE,
313 .pf = PF_INET, 319 .pf = PF_INET,
314 .hooknum = NF_IP_LOCAL_IN, 320 .hooknum = NF_IP_LOCAL_IN,
315 .priority = NF_IP_PRI_NAT_SRC, 321 .priority = NF_IP_PRI_NAT_SRC,
316 }, 322 },
317 /* After conntrack, adjust sequence number */ 323 /* After conntrack, adjust sequence number */
318 { 324 {
319 .hook = nf_nat_adjust, 325 .hook = nf_nat_adjust,
320 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
321 .pf = PF_INET, 327 .pf = PF_INET,
322 .hooknum = NF_IP_LOCAL_IN, 328 .hooknum = NF_IP_LOCAL_IN,
323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST, 329 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
324 }, 330 },
325 }; 331 };
326 332
327 static int __init nf_nat_standalone_init(void) 333 static int __init nf_nat_standalone_init(void)
328 { 334 {
329 int size, ret = 0; 335 int ret = 0;
330 336
331 need_conntrack(); 337 need_conntrack();
332 338
333 size = ALIGN(sizeof(struct nf_conn), __alignof__(struct nf_conn_nat)) +
334 sizeof(struct nf_conn_nat);
335 ret = nf_conntrack_register_cache(NF_CT_F_NAT, "nf_nat:base", size);
336 if (ret < 0) {
337 printk(KERN_ERR "nf_nat_init: Unable to create slab cache\n");
338 return ret;
339 }
340
341 #ifdef CONFIG_XFRM 339 #ifdef CONFIG_XFRM
342 BUG_ON(ip_nat_decode_session != NULL); 340 BUG_ON(ip_nat_decode_session != NULL);
343 ip_nat_decode_session = nat_decode_session; 341 ip_nat_decode_session = nat_decode_session;
344 #endif 342 #endif
345 ret = nf_nat_rule_init(); 343 ret = nf_nat_rule_init();
346 if (ret < 0) { 344 if (ret < 0) {
347 printk("nf_nat_init: can't setup rules.\n"); 345 printk("nf_nat_init: can't setup rules.\n");
348 goto cleanup_decode_session; 346 goto cleanup_decode_session;
349 } 347 }
350 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 348 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
351 if (ret < 0) { 349 if (ret < 0) {
352 printk("nf_nat_init: can't register hooks.\n"); 350 printk("nf_nat_init: can't register hooks.\n");
353 goto cleanup_rule_init; 351 goto cleanup_rule_init;
354 } 352 }
355 nf_nat_module_is_loaded = 1; 353 nf_nat_module_is_loaded = 1;
356 return ret; 354 return ret;
357 355
358 cleanup_rule_init: 356 cleanup_rule_init:
359 nf_nat_rule_cleanup(); 357 nf_nat_rule_cleanup();
360 cleanup_decode_session: 358 cleanup_decode_session:
361 #ifdef CONFIG_XFRM 359 #ifdef CONFIG_XFRM
362 ip_nat_decode_session = NULL; 360 ip_nat_decode_session = NULL;
363 synchronize_net(); 361 synchronize_net();
364 #endif 362 #endif
365 nf_conntrack_unregister_cache(NF_CT_F_NAT);
366 return ret; 363 return ret;
367 } 364 }
368 365
369 static void __exit nf_nat_standalone_fini(void) 366 static void __exit nf_nat_standalone_fini(void)
370 { 367 {
371 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 368 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
372 nf_nat_rule_cleanup(); 369 nf_nat_rule_cleanup();
373 nf_nat_module_is_loaded = 0; 370 nf_nat_module_is_loaded = 0;
374 #ifdef CONFIG_XFRM 371 #ifdef CONFIG_XFRM
375 ip_nat_decode_session = NULL; 372 ip_nat_decode_session = NULL;
376 synchronize_net(); 373 synchronize_net();
377 #endif 374 #endif
378 /* Conntrack caches are unregistered in nf_conntrack_cleanup */ 375 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
379 } 376 }
380 377