Blame view
net/ipv4/netfilter/ipt_CLUSTERIP.c
19.6 KB
e905a9eda
|
1 |
/* Cluster IP hashmark target |
1da177e4c
|
2 3 4 5 6 7 8 9 10 11 |
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * based on ideas of Fabio Olive Leite <olive@unixforge.org> * * Development of this code funded by SuSE Linux AG, http://www.suse.com/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * */ |
ff67e4e42
|
12 |
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
1da177e4c
|
13 |
#include <linux/module.h> |
1da177e4c
|
14 15 |
#include <linux/proc_fs.h> #include <linux/jhash.h> |
136e92bbe
|
16 |
#include <linux/bitops.h> |
1da177e4c
|
17 |
#include <linux/skbuff.h> |
5a0e3ad6a
|
18 |
#include <linux/slab.h> |
1da177e4c
|
19 20 21 22 23 |
#include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <linux/if_arp.h> |
1da177e4c
|
24 |
#include <linux/seq_file.h> |
b54ab92b8
|
25 |
#include <linux/refcount.h> |
1da177e4c
|
26 |
#include <linux/netfilter_arp.h> |
6709dbbb1
|
27 |
#include <linux/netfilter/x_tables.h> |
1da177e4c
|
28 29 |
#include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> |
587aa6416
|
30 |
#include <net/netfilter/nf_conntrack.h> |
457c4cbc5
|
31 |
#include <net/net_namespace.h> |
ce4ff76c1
|
32 |
#include <net/netns/generic.h> |
587aa6416
|
33 |
#include <net/checksum.h> |
3d04ebb6a
|
34 |
#include <net/ip.h> |
1da177e4c
|
35 |
|
136e92bbe
|
36 |
#define CLUSTERIP_VERSION "0.8" |
1da177e4c
|
37 |
|
1da177e4c
|
38 39 |
MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); |
2ae15b64e
|
40 |
MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); |
1da177e4c
|
41 42 43 |
struct clusterip_config { struct list_head list; /* list of all configs */ |
b54ab92b8
|
44 45 |
refcount_t refcount; /* reference count */ refcount_t entries; /* number of entries/rules |
445136244
|
46 |
* referencing us */ |
1da177e4c
|
47 |
|
6a19d6147
|
48 |
__be32 clusterip; /* the IP address */ |
1da177e4c
|
49 50 51 |
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ |
136e92bbe
|
52 |
unsigned long local_nodes; /* node number array */ |
1da177e4c
|
53 54 55 56 57 58 |
#ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ |
d73f33b16
|
59 |
struct rcu_head rcu; |
1da177e4c
|
60 |
}; |
1da177e4c
|
61 |
#ifdef CONFIG_PROC_FS |
9a32144e9
|
62 |
static const struct file_operations clusterip_proc_fops; |
1da177e4c
|
63 |
#endif |
1da177e4c
|
64 |
|
c7d03a00b
|
65 |
static unsigned int clusterip_net_id __read_mostly; |
ce4ff76c1
|
66 67 |
struct clusterip_net { |
26a89e435
|
68 |
struct list_head configs; |
f1e8077f4
|
69 70 |
/* lock protects the configs list */ spinlock_t lock; |
1da177e4c
|
71 72 |
#ifdef CONFIG_PROC_FS |
ce4ff76c1
|
73 |
struct proc_dir_entry *procdir; |
1da177e4c
|
74 |
#endif |
ce4ff76c1
|
75 |
}; |
1da177e4c
|
76 77 |
static inline void |
445136244
|
78 79 |
clusterip_config_get(struct clusterip_config *c) { |
b54ab92b8
|
80 |
refcount_inc(&c->refcount); |
1da177e4c
|
81 |
} |
d73f33b16
|
82 83 84 85 86 |
static void clusterip_config_rcu_free(struct rcu_head *head) { kfree(container_of(head, struct clusterip_config, rcu)); } |
1da177e4c
|
87 |
static inline void |
445136244
|
88 89 |
clusterip_config_put(struct clusterip_config *c) { |
b54ab92b8
|
90 |
if (refcount_dec_and_test(&c->refcount)) |
d73f33b16
|
91 |
call_rcu_bh(&c->rcu, clusterip_config_rcu_free); |
445136244
|
92 |
} |
445136244
|
93 94 95 96 97 98 |
/* decrease the count of entries using/referencing this config. If last * entry(rule) is removed, remove the config from lists, but don't free it * yet, since proc-files could still be holding references */ static inline void clusterip_config_entry_put(struct clusterip_config *c) { |
d86946d2c
|
99 100 |
struct net *net = dev_net(c->dev); struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
f1e8077f4
|
101 |
|
d73f33b16
|
102 |
local_bh_disable(); |
b54ab92b8
|
103 |
if (refcount_dec_and_lock(&c->entries, &cn->lock)) { |
d73f33b16
|
104 |
list_del_rcu(&c->list); |
f1e8077f4
|
105 |
spin_unlock(&cn->lock); |
d73f33b16
|
106 |
local_bh_enable(); |
445136244
|
107 |
|
22bedad3c
|
108 |
dev_mc_del(c->dev, c->clustermac); |
1da177e4c
|
109 |
dev_put(c->dev); |
445136244
|
110 111 112 113 114 |
/* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS |
a8ca16ea7
|
115 |
proc_remove(c->pde); |
445136244
|
116 |
#endif |
4dee95972
|
117 |
return; |
1da177e4c
|
118 |
} |
d73f33b16
|
119 |
local_bh_enable(); |
1da177e4c
|
120 |
} |
1da177e4c
|
121 |
static struct clusterip_config * |
b5ef0f85b
|
122 |
__clusterip_config_find(struct net *net, __be32 clusterip) |
1da177e4c
|
123 |
{ |
4c6109795
|
124 |
struct clusterip_config *c; |
b5ef0f85b
|
125 |
struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
1da177e4c
|
126 |
|
26a89e435
|
127 |
list_for_each_entry_rcu(c, &cn->configs, list) { |
7c4e36bc1
|
128 |
if (c->clusterip == clusterip) |
1da177e4c
|
129 |
return c; |
1da177e4c
|
130 131 132 133 134 135 |
} return NULL; } static inline struct clusterip_config * |
b5ef0f85b
|
136 |
clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) |
1da177e4c
|
137 138 |
{ struct clusterip_config *c; |
d73f33b16
|
139 |
rcu_read_lock_bh(); |
b5ef0f85b
|
140 |
c = __clusterip_config_find(net, clusterip); |
d73f33b16
|
141 |
if (c) { |
3fd0b634d
|
142 143 144 145 146 |
#ifdef CONFIG_PROC_FS if (!c->pde) c = NULL; else #endif |
b54ab92b8
|
147 |
if (unlikely(!refcount_inc_not_zero(&c->refcount))) |
d73f33b16
|
148 149 |
c = NULL; else if (entry) |
b54ab92b8
|
150 |
refcount_inc(&c->entries); |
1da177e4c
|
151 |
} |
d73f33b16
|
152 |
rcu_read_unlock_bh(); |
1da177e4c
|
153 154 155 |
return c; } |
136e92bbe
|
156 157 158 159 160 |
static void clusterip_config_init_nodelist(struct clusterip_config *c, const struct ipt_clusterip_tgt_info *i) { int n; |
7c4e36bc1
|
161 |
for (n = 0; n < i->num_local_nodes; n++) |
136e92bbe
|
162 |
set_bit(i->local_nodes[n] - 1, &c->local_nodes); |
136e92bbe
|
163 |
} |
1da177e4c
|
164 |
static struct clusterip_config * |
3cf93c96a
|
165 |
clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, |
6c5d5cfbe
|
166 |
struct net_device *dev) |
1da177e4c
|
167 |
{ |
6c5d5cfbe
|
168 |
struct net *net = dev_net(dev); |
1da177e4c
|
169 |
struct clusterip_config *c; |
6c5d5cfbe
|
170 |
struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
1da177e4c
|
171 |
|
0da974f4f
|
172 |
c = kzalloc(sizeof(*c), GFP_ATOMIC); |
1da177e4c
|
173 |
if (!c) |
6c5d5cfbe
|
174 |
return ERR_PTR(-ENOMEM); |
1da177e4c
|
175 |
|
1da177e4c
|
176 177 178 179 |
c->dev = dev; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; |
136e92bbe
|
180 |
clusterip_config_init_nodelist(c, i); |
1da177e4c
|
181 182 |
c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; |
b54ab92b8
|
183 184 |
refcount_set(&c->refcount, 1); refcount_set(&c->entries, 1); |
1da177e4c
|
185 |
|
6c5d5cfbe
|
186 187 188 189 190 191 192 193 194 195 |
spin_lock_bh(&cn->lock); if (__clusterip_config_find(net, ip)) { spin_unlock_bh(&cn->lock); kfree(c); return ERR_PTR(-EBUSY); } list_add_rcu(&c->list, &cn->configs); spin_unlock_bh(&cn->lock); |
1da177e4c
|
196 |
#ifdef CONFIG_PROC_FS |
76592584b
|
197 198 199 200 |
{ char buffer[16]; /* create proc dir entry */ |
cffee385d
|
201 |
sprintf(buffer, "%pI4", &ip); |
6e79d85d9
|
202 |
c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, |
ce4ff76c1
|
203 |
cn->procdir, |
6e79d85d9
|
204 |
&clusterip_proc_fops, c); |
76592584b
|
205 |
if (!c->pde) { |
6c5d5cfbe
|
206 207 208 |
spin_lock_bh(&cn->lock); list_del_rcu(&c->list); spin_unlock_bh(&cn->lock); |
76592584b
|
209 |
kfree(c); |
6c5d5cfbe
|
210 211 |
return ERR_PTR(-ENOMEM); |
76592584b
|
212 |
} |
1da177e4c
|
213 |
} |
1da177e4c
|
214 |
#endif |
1da177e4c
|
215 216 |
return c; } |
76592584b
|
217 |
#ifdef CONFIG_PROC_FS |
1da177e4c
|
218 219 220 |
static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { |
1da177e4c
|
221 |
|
136e92bbe
|
222 223 |
if (nodenum == 0 || nodenum > c->num_total_nodes) |
1da177e4c
|
224 |
return 1; |
1da177e4c
|
225 |
|
136e92bbe
|
226 227 228 |
/* check if we already have this number in our bitfield */ if (test_and_set_bit(nodenum - 1, &c->local_nodes)) return 1; |
1da177e4c
|
229 |
|
1da177e4c
|
230 231 |
return 0; } |
e1931b784
|
232 |
static bool |
1da177e4c
|
233 234 |
clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { |
136e92bbe
|
235 236 |
if (nodenum == 0 || nodenum > c->num_total_nodes) |
e1931b784
|
237 |
return true; |
e905a9eda
|
238 |
|
136e92bbe
|
239 |
if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
e1931b784
|
240 |
return false; |
1da177e4c
|
241 |
|
e1931b784
|
242 |
return true; |
1da177e4c
|
243 |
} |
76592584b
|
244 |
#endif |
1da177e4c
|
245 246 |
static inline u_int32_t |
a47362a22
|
247 248 |
clusterip_hashfn(const struct sk_buff *skb, const struct clusterip_config *config) |
1da177e4c
|
249 |
{ |
a47362a22
|
250 |
const struct iphdr *iph = ip_hdr(skb); |
1da177e4c
|
251 |
unsigned long hashval; |
3d04ebb6a
|
252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
u_int16_t sport = 0, dport = 0; int poff; poff = proto_ports_offset(iph->protocol); if (poff >= 0) { const u_int16_t *ports; u16 _ports[2]; ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); if (ports) { sport = ports[0]; dport = ports[1]; } } else { |
e87cc4728
|
266 267 |
net_info_ratelimited("unknown protocol %u ", iph->protocol); |
1da177e4c
|
268 269 270 271 272 273 274 275 |
} switch (config->hash_mode) { case CLUSTERIP_HASHMODE_SIP: hashval = jhash_1word(ntohl(iph->saddr), config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT: |
e905a9eda
|
276 |
hashval = jhash_2words(ntohl(iph->saddr), sport, |
1da177e4c
|
277 278 279 280 281 282 283 284 285 286 287 |
config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT_DPT: hashval = jhash_3words(ntohl(iph->saddr), sport, dport, config->hash_initval); break; default: /* to make gcc happy */ hashval = 0; /* This cannot happen, unless the check function wasn't called * at rule load time */ |
ff67e4e42
|
288 289 |
pr_info("unknown mode %u ", config->hash_mode); |
1da177e4c
|
290 291 292 293 294 |
BUG(); break; } /* node numbers are 1..n, not 0..n */ |
8fc54f689
|
295 |
return reciprocal_scale(hashval, config->num_total_nodes) + 1; |
1da177e4c
|
296 297 298 |
} static inline int |
a47362a22
|
299 |
clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) |
1da177e4c
|
300 |
{ |
136e92bbe
|
301 |
return test_bit(hash - 1, &config->local_nodes); |
1da177e4c
|
302 |
} |
e905a9eda
|
303 304 |
/*********************************************************************** * IPTABLES TARGET |
1da177e4c
|
305 306 307 |
***********************************************************************/ static unsigned int |
4b560b447
|
308 |
clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) |
1da177e4c
|
309 |
{ |
7eb355865
|
310 |
const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
587aa6416
|
311 |
struct nf_conn *ct; |
1da177e4c
|
312 |
enum ip_conntrack_info ctinfo; |
587aa6416
|
313 |
u_int32_t hash; |
1da177e4c
|
314 315 316 317 |
/* don't need to clusterip_config_get() here, since refcount * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ |
3db05fea5
|
318 |
ct = nf_ct_get(skb, &ctinfo); |
94d117a1c
|
319 |
if (ct == NULL) |
1da177e4c
|
320 |
return NF_DROP; |
1da177e4c
|
321 322 323 |
/* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ |
3666ed1c4
|
324 325 |
if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || |
fb0488337
|
326 |
ctinfo == IP_CT_RELATED_REPLY)) |
6709dbbb1
|
327 |
return XT_CONTINUE; |
1da177e4c
|
328 |
|
e905a9eda
|
329 |
/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
1da177e4c
|
330 331 |
* TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ |
3db05fea5
|
332 |
hash = clusterip_hashfn(skb, cipinfo->config); |
1da177e4c
|
333 334 |
switch (ctinfo) { |
181b1e9ce
|
335 336 337 338 339 340 341 342 343 344 345 346 347 |
case IP_CT_NEW: ct->mark = hash; break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the moment. * They can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: case IP_CT_ESTABLISHED_REPLY: break; default: /* Prevent gcc warnings */ break; |
1da177e4c
|
348 |
} |
0d53778e8
|
349 |
#ifdef DEBUG |
3c9fba656
|
350 |
nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
1da177e4c
|
351 |
#endif |
0d53778e8
|
352 |
pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
1da177e4c
|
353 |
if (!clusterip_responsible(cipinfo->config, hash)) { |
0d53778e8
|
354 355 |
pr_debug("not responsible "); |
1da177e4c
|
356 357 |
return NF_DROP; } |
0d53778e8
|
358 359 |
pr_debug("responsible "); |
1da177e4c
|
360 361 362 |
/* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ |
3db05fea5
|
363 |
skb->pkt_type = PACKET_HOST; |
1da177e4c
|
364 |
|
6709dbbb1
|
365 |
return XT_CONTINUE; |
1da177e4c
|
366 |
} |
135367b8f
|
367 |
static int clusterip_tg_check(const struct xt_tgchk_param *par) |
1da177e4c
|
368 |
{ |
af5d6dc20
|
369 370 |
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; |
1da177e4c
|
371 |
struct clusterip_config *config; |
4a5a5c73b
|
372 |
int ret; |
1da177e4c
|
373 |
|
55917a21d
|
374 375 376 377 378 |
if (par->nft_compat) { pr_err("cannot use CLUSTERIP target from nftables compat "); return -EOPNOTSUPP; } |
1da177e4c
|
379 380 381 |
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { |
ff67e4e42
|
382 383 |
pr_info("unknown mode %u ", cipinfo->hash_mode); |
d6b00a534
|
384 |
return -EINVAL; |
1da177e4c
|
385 386 |
} |
3666ed1c4
|
387 388 |
if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { |
ff67e4e42
|
389 390 |
pr_info("Please specify destination IP "); |
d6b00a534
|
391 |
return -EINVAL; |
1da177e4c
|
392 393 394 |
} /* FIXME: further sanity checks */ |
d86946d2c
|
395 |
config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); |
d3c3f4243
|
396 |
if (!config) { |
1da177e4c
|
397 |
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
ff67e4e42
|
398 399 400 |
pr_info("no config found for %pI4, need 'new' ", &e->ip.dst.s_addr); |
d6b00a534
|
401 |
return -EINVAL; |
1da177e4c
|
402 403 404 405 |
} else { struct net_device *dev; if (e->ip.iniface[0] == '\0') { |
ff67e4e42
|
406 407 |
pr_info("Please specify an interface name "); |
d6b00a534
|
408 |
return -EINVAL; |
1da177e4c
|
409 |
} |
d86946d2c
|
410 |
dev = dev_get_by_name(par->net, e->ip.iniface); |
1da177e4c
|
411 |
if (!dev) { |
ff67e4e42
|
412 413 414 |
pr_info("no such interface %s ", e->ip.iniface); |
4a5a5c73b
|
415 |
return -ENOENT; |
1da177e4c
|
416 |
} |
e905a9eda
|
417 |
config = clusterip_config_init(cipinfo, |
1da177e4c
|
418 |
e->ip.dst.s_addr, dev); |
6c5d5cfbe
|
419 |
if (IS_ERR(config)) { |
1da177e4c
|
420 |
dev_put(dev); |
6c5d5cfbe
|
421 |
return PTR_ERR(config); |
1da177e4c
|
422 |
} |
22bedad3c
|
423 |
dev_mc_add(config->dev, config->clustermac); |
1da177e4c
|
424 425 |
} } |
d3c3f4243
|
426 |
cipinfo->config = config; |
1da177e4c
|
427 |
|
ecb2421b5
|
428 |
ret = nf_ct_netns_get(par->net, par->family); |
f95c74e33
|
429 |
if (ret < 0) |
ff67e4e42
|
430 431 432 |
pr_info("cannot load conntrack support for proto=%u ", par->family); |
43270b1bc
|
433 434 435 436 437 438 439 |
if (!par->net->xt.clusterip_deprecated_warning) { pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " "use xt_cluster instead "); par->net->xt.clusterip_deprecated_warning = true; } |
f95c74e33
|
440 |
return ret; |
1da177e4c
|
441 442 443 |
} /* drop reference count of cluster config when rule is deleted */ |
a2df1648b
|
444 |
static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) |
1da177e4c
|
445 |
{ |
a2df1648b
|
446 |
const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
1da177e4c
|
447 |
|
445136244
|
448 449 450 |
/* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ clusterip_config_entry_put(cipinfo->config); |
1da177e4c
|
451 |
clusterip_config_put(cipinfo->config); |
11078c371
|
452 |
|
fe50543c1
|
453 |
nf_ct_netns_put(par->net, par->family); |
1da177e4c
|
454 |
} |
d3c3f4243
|
455 456 457 458 459 460 461 462 463 464 465 466 467 |
#ifdef CONFIG_COMPAT struct compat_ipt_clusterip_tgt_info { u_int32_t flags; u_int8_t clustermac[6]; u_int16_t num_total_nodes; u_int16_t num_local_nodes; u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; u_int32_t hash_mode; u_int32_t hash_initval; compat_uptr_t config; }; #endif /* CONFIG_COMPAT */ |
d3c5ee6d5
|
468 |
static struct xt_target clusterip_tg_reg __read_mostly = { |
1d5cd9097
|
469 |
.name = "CLUSTERIP", |
ee999d8b9
|
470 |
.family = NFPROTO_IPV4, |
d3c5ee6d5
|
471 472 473 |
.target = clusterip_tg, .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, |
d3c3f4243
|
474 |
.targetsize = sizeof(struct ipt_clusterip_tgt_info), |
ec2318904
|
475 |
.usersize = offsetof(struct ipt_clusterip_tgt_info, config), |
d3c3f4243
|
476 477 478 |
#ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), #endif /* CONFIG_COMPAT */ |
1d5cd9097
|
479 |
.me = THIS_MODULE |
1da177e4c
|
480 |
}; |
e905a9eda
|
481 482 |
/*********************************************************************** * ARP MANGLING CODE |
1da177e4c
|
483 484 485 486 487 |
***********************************************************************/ /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; |
6a19d6147
|
488 |
__be32 src_ip; |
1da177e4c
|
489 |
u_int8_t dst_hw[ETH_ALEN]; |
6a19d6147
|
490 |
__be32 dst_ip; |
3f30fc157
|
491 |
} __packed; |
1da177e4c
|
492 |
|
0d53778e8
|
493 |
#ifdef DEBUG |
e905a9eda
|
494 |
static void arp_print(struct arp_payload *payload) |
1da177e4c
|
495 496 497 |
{ #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; |
c8d71d08a
|
498 |
int j, k; |
1da177e4c
|
499 |
|
c8d71d08a
|
500 |
for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) { |
6a8341b68
|
501 502 |
hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); |
c8d71d08a
|
503 |
hbuffer[k++] = ':'; |
1da177e4c
|
504 |
} |
c8d71d08a
|
505 |
hbuffer[--k] = '\0'; |
1da177e4c
|
506 |
|
ff67e4e42
|
507 508 509 |
pr_debug("src %pI4@%s, dst %pI4 ", &payload->src_ip, hbuffer, &payload->dst_ip); |
1da177e4c
|
510 511 512 513 |
} #endif static unsigned int |
06198b34a
|
514 |
arp_mangle(void *priv, |
3db05fea5
|
515 |
struct sk_buff *skb, |
238e54c9c
|
516 |
const struct nf_hook_state *state) |
1da177e4c
|
517 |
{ |
3db05fea5
|
518 |
struct arphdr *arp = arp_hdr(skb); |
1da177e4c
|
519 520 |
struct arp_payload *payload; struct clusterip_config *c; |
9dff2c966
|
521 |
struct net *net = state->net; |
1da177e4c
|
522 523 |
/* we don't care about non-ethernet and non-ipv4 ARP */ |
3666ed1c4
|
524 525 526 |
if (arp->ar_hrd != htons(ARPHRD_ETHER) || arp->ar_pro != htons(ETH_P_IP) || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) |
1da177e4c
|
527 |
return NF_ACCEPT; |
4095ebf1e
|
528 |
/* we only want to mangle arp requests and replies */ |
3666ed1c4
|
529 530 |
if (arp->ar_op != htons(ARPOP_REPLY) && arp->ar_op != htons(ARPOP_REQUEST)) |
1da177e4c
|
531 532 533 |
return NF_ACCEPT; payload = (void *)(arp+1); |
e905a9eda
|
534 |
/* if there is no clusterip configuration for the arp reply's |
1da177e4c
|
535 |
* source ip, we don't want to mangle it */ |
d86946d2c
|
536 |
c = clusterip_config_find_get(net, payload->src_ip, 0); |
1da177e4c
|
537 538 |
if (!c) return NF_ACCEPT; |
e905a9eda
|
539 |
/* normally the linux kernel always replies to arp queries of |
1da177e4c
|
540 541 542 |
* addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ |
238e54c9c
|
543 |
if (c->dev != state->out) { |
ff67e4e42
|
544 |
pr_debug("not mangling arp reply on different " |
0d53778e8
|
545 546 |
"interface: cip'%s'-skb'%s' ", |
238e54c9c
|
547 |
c->dev->name, state->out->name); |
1da177e4c
|
548 549 550 551 552 553 |
clusterip_config_put(c); return NF_ACCEPT; } /* mangle reply hardware address */ memcpy(payload->src_hw, c->clustermac, arp->ar_hln); |
0d53778e8
|
554 |
#ifdef DEBUG |
ff67e4e42
|
555 |
pr_debug("mangled arp reply: "); |
1da177e4c
|
556 557 558 559 560 561 562 |
arp_print(payload); #endif clusterip_config_put(c); return NF_ACCEPT; } |
1999414a4
|
563 |
static struct nf_hook_ops cip_arp_ops __read_mostly = { |
1da177e4c
|
564 |
.hook = arp_mangle, |
ee999d8b9
|
565 |
.pf = NFPROTO_ARP, |
1da177e4c
|
566 567 568 |
.hooknum = NF_ARP_OUT, .priority = -1 }; |
e905a9eda
|
569 570 |
/*********************************************************************** * PROC DIR HANDLING |
1da177e4c
|
571 572 573 |
***********************************************************************/ #ifdef CONFIG_PROC_FS |
136e92bbe
|
574 575 576 577 578 579 |
struct clusterip_seq_position { unsigned int pos; /* position */ unsigned int weight; /* number of bits set == size */ unsigned int bit; /* current bit */ unsigned long val; /* current value */ }; |
1da177e4c
|
580 581 |
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { |
477781477
|
582 |
struct clusterip_config *c = s->private; |
136e92bbe
|
583 584 585 586 587 588 589 590 |
unsigned int weight; u_int32_t local_nodes; struct clusterip_seq_position *idx; /* FIXME: possible race */ local_nodes = c->local_nodes; weight = hweight32(local_nodes); if (*pos >= weight) |
1da177e4c
|
591 |
return NULL; |
136e92bbe
|
592 593 |
idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); if (!idx) |
1da177e4c
|
594 |
return ERR_PTR(-ENOMEM); |
136e92bbe
|
595 596 597 598 599 600 601 |
idx->pos = *pos; idx->weight = weight; idx->bit = ffs(local_nodes); idx->val = local_nodes; clear_bit(idx->bit - 1, &idx->val); return idx; |
1da177e4c
|
602 603 604 605 |
} static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { |
3cf93c96a
|
606 |
struct clusterip_seq_position *idx = v; |
1da177e4c
|
607 |
|
136e92bbe
|
608 609 |
*pos = ++idx->pos; if (*pos >= idx->weight) { |
1da177e4c
|
610 611 612 |
kfree(v); return NULL; } |
136e92bbe
|
613 614 615 |
idx->bit = ffs(idx->val); clear_bit(idx->bit - 1, &idx->val); return idx; |
1da177e4c
|
616 617 618 619 |
} static void clusterip_seq_stop(struct seq_file *s, void *v) { |
902a3dd5e
|
620 621 |
if (!IS_ERR(v)) kfree(v); |
1da177e4c
|
622 623 624 625 |
} static int clusterip_seq_show(struct seq_file *s, void *v) { |
3cf93c96a
|
626 |
struct clusterip_seq_position *idx = v; |
1da177e4c
|
627 |
|
e905a9eda
|
628 |
if (idx->pos != 0) |
1da177e4c
|
629 |
seq_putc(s, ','); |
1da177e4c
|
630 |
|
136e92bbe
|
631 632 633 |
seq_printf(s, "%u", idx->bit); if (idx->pos == idx->weight - 1) |
1da177e4c
|
634 635 636 637 638 |
seq_putc(s, ' '); return 0; } |
56b3d975b
|
639 |
static const struct seq_operations clusterip_seq_ops = { |
1da177e4c
|
640 641 642 643 644 645 646 647 648 649 650 651 |
.start = clusterip_seq_start, .next = clusterip_seq_next, .stop = clusterip_seq_stop, .show = clusterip_seq_show, }; static int clusterip_proc_open(struct inode *inode, struct file *file) { int ret = seq_open(file, &clusterip_seq_ops); if (!ret) { struct seq_file *sf = file->private_data; |
d9dda78ba
|
652 |
struct clusterip_config *c = PDE_DATA(inode); |
1da177e4c
|
653 |
|
477781477
|
654 |
sf->private = c; |
1da177e4c
|
655 656 657 658 659 660 661 662 663 |
clusterip_config_get(c); } return ret; } static int clusterip_proc_release(struct inode *inode, struct file *file) { |
d9dda78ba
|
664 |
struct clusterip_config *c = PDE_DATA(inode); |
1da177e4c
|
665 666 667 668 669 670 671 672 673 674 675 676 677 |
int ret; ret = seq_release(inode, file); if (!ret) clusterip_config_put(c); return ret; } static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { |
d9dda78ba
|
678 |
struct clusterip_config *c = PDE_DATA(file_inode(file)); |
1da177e4c
|
679 680 |
#define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; |
1da177e4c
|
681 |
unsigned long nodenum; |
4b5511ebc
|
682 |
int rc; |
1da177e4c
|
683 |
|
961ed183a
|
684 685 686 |
if (size > PROC_WRITELEN) return -EIO; if (copy_from_user(buffer, input, size)) |
1da177e4c
|
687 |
return -EFAULT; |
961ed183a
|
688 |
buffer[size] = 0; |
1da177e4c
|
689 690 |
if (*buffer == '+') { |
4b5511ebc
|
691 692 693 |
rc = kstrtoul(buffer+1, 10, &nodenum); if (rc) return rc; |
1da177e4c
|
694 695 696 |
if (clusterip_add_node(c, nodenum)) return -ENOMEM; } else if (*buffer == '-') { |
4b5511ebc
|
697 698 699 |
rc = kstrtoul(buffer+1, 10, &nodenum); if (rc) return rc; |
1da177e4c
|
700 701 702 703 704 705 706 |
if (clusterip_del_node(c, nodenum)) return -ENOENT; } else return -EIO; return size; } |
9a32144e9
|
707 |
static const struct file_operations clusterip_proc_fops = { |
1da177e4c
|
708 709 710 711 712 713 714 715 716 |
.owner = THIS_MODULE, .open = clusterip_proc_open, .read = seq_read, .write = clusterip_proc_write, .llseek = seq_lseek, .release = clusterip_proc_release, }; #endif /* CONFIG_PROC_FS */ |
ce4ff76c1
|
717 718 |
static int clusterip_net_init(struct net *net) { |
ce4ff76c1
|
719 |
struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
26a89e435
|
720 |
INIT_LIST_HEAD(&cn->configs); |
f1e8077f4
|
721 |
spin_lock_init(&cn->lock); |
26a89e435
|
722 |
#ifdef CONFIG_PROC_FS |
ce4ff76c1
|
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 |
cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); if (!cn->procdir) { pr_err("Unable to proc dir entry "); return -ENOMEM; } #endif /* CONFIG_PROC_FS */ return 0; } static void clusterip_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); #endif } static struct pernet_operations clusterip_net_ops = { .init = clusterip_net_init, .exit = clusterip_net_exit, .id = &clusterip_net_id, .size = sizeof(struct clusterip_net), }; |
d3c5ee6d5
|
748 |
static int __init clusterip_tg_init(void) |
1da177e4c
|
749 750 |
{ int ret; |
ce4ff76c1
|
751 |
ret = register_pernet_subsys(&clusterip_net_ops); |
32292a7ff
|
752 753 |
if (ret < 0) return ret; |
1da177e4c
|
754 |
|
ce4ff76c1
|
755 756 757 |
ret = xt_register_target(&clusterip_tg_reg); if (ret < 0) goto cleanup_subsys; |
32292a7ff
|
758 759 |
ret = nf_register_hook(&cip_arp_ops); if (ret < 0) |
1da177e4c
|
760 |
goto cleanup_target; |
1da177e4c
|
761 |
|
ff67e4e42
|
762 763 |
pr_info("ClusterIP Version %s loaded successfully ", |
1da177e4c
|
764 |
CLUSTERIP_VERSION); |
ce4ff76c1
|
765 |
|
1da177e4c
|
766 |
return 0; |
1da177e4c
|
767 |
cleanup_target: |
d3c5ee6d5
|
768 |
xt_unregister_target(&clusterip_tg_reg); |
ce4ff76c1
|
769 770 |
cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); |
32292a7ff
|
771 |
return ret; |
1da177e4c
|
772 |
} |
d3c5ee6d5
|
773 |
static void __exit clusterip_tg_exit(void) |
1da177e4c
|
774 |
{ |
ff67e4e42
|
775 776 |
pr_info("ClusterIP Version %s unloading ", CLUSTERIP_VERSION); |
ce4ff76c1
|
777 |
|
32292a7ff
|
778 |
nf_unregister_hook(&cip_arp_ops); |
d3c5ee6d5
|
779 |
xt_unregister_target(&clusterip_tg_reg); |
ce4ff76c1
|
780 |
unregister_pernet_subsys(&clusterip_net_ops); |
d73f33b16
|
781 782 783 |
/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ rcu_barrier_bh(); |
1da177e4c
|
784 |
} |
d3c5ee6d5
|
785 786 |
module_init(clusterip_tg_init); module_exit(clusterip_tg_exit); |