Commit 6c027889696a7a694b0e2f6e3cabadefec7553b6

Authored by Jozsef Kadlecsik
Committed by Patrick McHardy
1 parent 543261907d

netfilter: ipset: hash:ip set type support

The module implements the hash:ip type support in four flavours:
for IPv4 or IPv6, both without and with timeout support.

All the hash types are based on the "array hash" or ahash structure
and functions as a good compromise between minimal memory footprint
and speed. The hashing uses arrays to resolve clashes. The hash table
is resized (doubled) when searching becomes too long. Resizing can be
triggered by userspace add commands only and those are serialized by
the nfnl mutex. During resizing the set is read-locked, so the only
possible concurrent operations are the kernel side readers. Those are
protected by RCU locking.

Because of the four flavours and the other hash types, the functions
are implemented in general forms in the ip_set_ahash.h header file
and the real functions are generated before compiling by macro expansion.
Thus the dereferencing of low-level functions and void pointer arguments
could be avoided: the low-level functions are inlined, the function
arguments are pointers of type-specific structures.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>

Showing 5 changed files with 1580 additions and 0 deletions Side-by-side Diff

include/linux/netfilter/ipset/ip_set_ahash.h
Changes suppressed. Click to show
  1 +#ifndef _IP_SET_AHASH_H
  2 +#define _IP_SET_AHASH_H
  3 +
  4 +#include <linux/rcupdate.h>
  5 +#include <linux/jhash.h>
  6 +#include <linux/netfilter/ipset/ip_set_timeout.h>
  7 +
  8 +/* Hashing which uses arrays to resolve clashing. The hash table is resized
  9 + * (doubled) when searching becomes too long.
  10 + * Internally jhash is used with the assumption that the size of the
  11 + * stored data is a multiple of sizeof(u32). If storage supports timeout,
  12 + * the timeout field must be the last one in the data structure - that field
  13 + * is ignored when computing the hash key.
  14 + *
  15 + * Readers and resizing
  16 + *
  17 + * Resizing can be triggered by userspace command only, and those
  18 + * are serialized by the nfnl mutex. During resizing the set is
  19 + * read-locked, so the only possible concurrent operations are
  20 + * the kernel side readers. Those must be protected by proper RCU locking.
  21 + */
  22 +
  23 +/* Number of elements to store in an initial array block */
  24 +#define AHASH_INIT_SIZE 4
  25 +/* Max number of elements to store in an array block */
  26 +#define AHASH_MAX_SIZE (3*4)
  27 +
  28 +/* A hash bucket */
  29 +struct hbucket {
  30 + void *value; /* the array of the values */
  31 + u8 size; /* size of the array */
  32 + u8 pos; /* position of the first free entry */
  33 +};
  34 +
  35 +/* The hash table: the table size stored here in order to make resizing easy */
  36 +struct htable {
  37 + u8 htable_bits; /* size of hash table == 2^htable_bits */
  38 + struct hbucket bucket[0]; /* hashtable buckets */
  39 +};
  40 +
  41 +#define hbucket(h, i) &((h)->bucket[i])
  42 +
  43 +/* Book-keeping of the prefixes added to the set */
  44 +struct ip_set_hash_nets {
  45 + u8 cidr; /* the different cidr values in the set */
  46 + u32 nets; /* number of elements per cidr */
  47 +};
  48 +
  49 +/* The generic ip_set hash structure */
  50 +struct ip_set_hash {
  51 + struct htable *table; /* the hash table */
  52 + u32 maxelem; /* max elements in the hash */
  53 + u32 elements; /* current element (vs timeout) */
  54 + u32 initval; /* random jhash init value */
  55 + u32 timeout; /* timeout value, if enabled */
  56 + struct timer_list gc; /* garbage collection when timeout enabled */
  57 +#ifdef IP_SET_HASH_WITH_NETMASK
  58 + u8 netmask; /* netmask value for subnets to store */
  59 +#endif
  60 +#ifdef IP_SET_HASH_WITH_NETS
  61 + struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
  62 +#endif
  63 +};
  64 +
  65 +/* Compute htable_bits from the user input parameter hashsize */
  66 +static u8
  67 +htable_bits(u32 hashsize)
  68 +{
  69 + /* Assume that hashsize == 2^htable_bits */
  70 + u8 bits = fls(hashsize - 1);
  71 + if (jhash_size(bits) != hashsize)
  72 + /* Round up to the first 2^n value */
  73 + bits = fls(hashsize);
  74 +
  75 + return bits;
  76 +}
  77 +
  78 +#ifdef IP_SET_HASH_WITH_NETS
  79 +
  80 +#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
  81 +
  82 +/* Network cidr size book keeping when the hash stores different
  83 + * sized networks */
  84 +static void
  85 +add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
  86 +{
  87 + u8 i;
  88 +
  89 + ++h->nets[cidr-1].nets;
  90 +
  91 + pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
  92 +
  93 + if (h->nets[cidr-1].nets > 1)
  94 + return;
  95 +
  96 + /* New cidr size */
  97 + for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
  98 + /* Add in increasing prefix order, so larger cidr first */
  99 + if (h->nets[i].cidr < cidr)
  100 + swap(h->nets[i].cidr, cidr);
  101 + }
  102 + if (i < host_mask)
  103 + h->nets[i].cidr = cidr;
  104 +}
  105 +
  106 +static void
  107 +del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
  108 +{
  109 + u8 i;
  110 +
  111 + --h->nets[cidr-1].nets;
  112 +
  113 + pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
  114 +
  115 + if (h->nets[cidr-1].nets != 0)
  116 + return;
  117 +
  118 + /* All entries with this cidr size deleted, so cleanup h->cidr[] */
  119 + for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
  120 + if (h->nets[i].cidr == cidr)
  121 + h->nets[i].cidr = cidr = h->nets[i+1].cidr;
  122 + }
  123 + h->nets[i - 1].cidr = 0;
  124 +}
  125 +#endif
  126 +
  127 +/* Destroy the hashtable part of the set */
  128 +static void
  129 +ahash_destroy(struct htable *t)
  130 +{
  131 + struct hbucket *n;
  132 + u32 i;
  133 +
  134 + for (i = 0; i < jhash_size(t->htable_bits); i++) {
  135 + n = hbucket(t, i);
  136 + if (n->size)
  137 + /* FIXME: use slab cache */
  138 + kfree(n->value);
  139 + }
  140 +
  141 + ip_set_free(t);
  142 +}
  143 +
  144 +/* Calculate the actual memory size of the set data */
  145 +static size_t
  146 +ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
  147 +{
  148 + u32 i;
  149 + struct htable *t = h->table;
  150 + size_t memsize = sizeof(*h)
  151 + + sizeof(*t)
  152 +#ifdef IP_SET_HASH_WITH_NETS
  153 + + sizeof(struct ip_set_hash_nets) * host_mask
  154 +#endif
  155 + + jhash_size(t->htable_bits) * sizeof(struct hbucket);
  156 +
  157 + for (i = 0; i < jhash_size(t->htable_bits); i++)
  158 + memsize += t->bucket[i].size * dsize;
  159 +
  160 + return memsize;
  161 +}
  162 +
  163 +/* Flush a hash type of set: destroy all elements */
  164 +static void
  165 +ip_set_hash_flush(struct ip_set *set)
  166 +{
  167 + struct ip_set_hash *h = set->data;
  168 + struct htable *t = h->table;
  169 + struct hbucket *n;
  170 + u32 i;
  171 +
  172 + for (i = 0; i < jhash_size(t->htable_bits); i++) {
  173 + n = hbucket(t, i);
  174 + if (n->size) {
  175 + n->size = n->pos = 0;
  176 + /* FIXME: use slab cache */
  177 + kfree(n->value);
  178 + }
  179 + }
  180 +#ifdef IP_SET_HASH_WITH_NETS
  181 + memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
  182 + * SET_HOST_MASK(set->family));
  183 +#endif
  184 + h->elements = 0;
  185 +}
  186 +
  187 +/* Destroy a hash type of set */
  188 +static void
  189 +ip_set_hash_destroy(struct ip_set *set)
  190 +{
  191 + struct ip_set_hash *h = set->data;
  192 +
  193 + if (with_timeout(h->timeout))
  194 + del_timer_sync(&h->gc);
  195 +
  196 + ahash_destroy(h->table);
  197 + kfree(h);
  198 +
  199 + set->data = NULL;
  200 +}
  201 +
  202 +#define HKEY(data, initval, htable_bits) \
  203 +(jhash2((u32 *)(data), sizeof(struct type_pf_elem)/sizeof(u32), initval) \
  204 + & jhash_mask(htable_bits))
  205 +
  206 +#endif /* _IP_SET_AHASH_H */
  207 +
  208 +#define CONCAT(a, b, c) a##b##c
  209 +#define TOKEN(a, b, c) CONCAT(a, b, c)
  210 +
  211 +/* Type/family dependent function prototypes */
  212 +
  213 +#define type_pf_data_equal TOKEN(TYPE, PF, _data_equal)
  214 +#define type_pf_data_isnull TOKEN(TYPE, PF, _data_isnull)
  215 +#define type_pf_data_copy TOKEN(TYPE, PF, _data_copy)
  216 +#define type_pf_data_zero_out TOKEN(TYPE, PF, _data_zero_out)
  217 +#define type_pf_data_netmask TOKEN(TYPE, PF, _data_netmask)
  218 +#define type_pf_data_list TOKEN(TYPE, PF, _data_list)
  219 +#define type_pf_data_tlist TOKEN(TYPE, PF, _data_tlist)
  220 +
  221 +#define type_pf_elem TOKEN(TYPE, PF, _elem)
  222 +#define type_pf_telem TOKEN(TYPE, PF, _telem)
  223 +#define type_pf_data_timeout TOKEN(TYPE, PF, _data_timeout)
  224 +#define type_pf_data_expired TOKEN(TYPE, PF, _data_expired)
  225 +#define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
  226 +
  227 +#define type_pf_elem_add TOKEN(TYPE, PF, _elem_add)
  228 +#define type_pf_add TOKEN(TYPE, PF, _add)
  229 +#define type_pf_del TOKEN(TYPE, PF, _del)
  230 +#define type_pf_test_cidrs TOKEN(TYPE, PF, _test_cidrs)
  231 +#define type_pf_test TOKEN(TYPE, PF, _test)
  232 +
  233 +#define type_pf_elem_tadd TOKEN(TYPE, PF, _elem_tadd)
  234 +#define type_pf_del_telem TOKEN(TYPE, PF, _ahash_del_telem)
  235 +#define type_pf_expire TOKEN(TYPE, PF, _expire)
  236 +#define type_pf_tadd TOKEN(TYPE, PF, _tadd)
  237 +#define type_pf_tdel TOKEN(TYPE, PF, _tdel)
  238 +#define type_pf_ttest_cidrs TOKEN(TYPE, PF, _ahash_ttest_cidrs)
  239 +#define type_pf_ttest TOKEN(TYPE, PF, _ahash_ttest)
  240 +
  241 +#define type_pf_resize TOKEN(TYPE, PF, _resize)
  242 +#define type_pf_tresize TOKEN(TYPE, PF, _tresize)
  243 +#define type_pf_flush ip_set_hash_flush
  244 +#define type_pf_destroy ip_set_hash_destroy
  245 +#define type_pf_head TOKEN(TYPE, PF, _head)
  246 +#define type_pf_list TOKEN(TYPE, PF, _list)
  247 +#define type_pf_tlist TOKEN(TYPE, PF, _tlist)
  248 +#define type_pf_same_set TOKEN(TYPE, PF, _same_set)
  249 +#define type_pf_kadt TOKEN(TYPE, PF, _kadt)
  250 +#define type_pf_uadt TOKEN(TYPE, PF, _uadt)
  251 +#define type_pf_gc TOKEN(TYPE, PF, _gc)
  252 +#define type_pf_gc_init TOKEN(TYPE, PF, _gc_init)
  253 +#define type_pf_variant TOKEN(TYPE, PF, _variant)
  254 +#define type_pf_tvariant TOKEN(TYPE, PF, _tvariant)
  255 +
  256 +/* Flavour without timeout */
  257 +
  258 +/* Get the ith element from the array block n */
  259 +#define ahash_data(n, i) \
  260 + ((struct type_pf_elem *)((n)->value) + (i))
  261 +
  262 +/* Add an element to the hash table when resizing the set:
  263 + * we spare the maintenance of the internal counters. */
  264 +static int
  265 +type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value)
  266 +{
  267 + if (n->pos >= n->size) {
  268 + void *tmp;
  269 +
  270 + if (n->size >= AHASH_MAX_SIZE)
  271 + /* Trigger rehashing */
  272 + return -EAGAIN;
  273 +
  274 + tmp = kzalloc((n->size + AHASH_INIT_SIZE)
  275 + * sizeof(struct type_pf_elem),
  276 + GFP_ATOMIC);
  277 + if (!tmp)
  278 + return -ENOMEM;
  279 + if (n->size) {
  280 + memcpy(tmp, n->value,
  281 + sizeof(struct type_pf_elem) * n->size);
  282 + kfree(n->value);
  283 + }
  284 + n->value = tmp;
  285 + n->size += AHASH_INIT_SIZE;
  286 + }
  287 + type_pf_data_copy(ahash_data(n, n->pos++), value);
  288 + return 0;
  289 +}
  290 +
  291 +/* Resize a hash: create a new hash table with doubling the hashsize
  292 + * and inserting the elements to it. Repeat until we succeed or
  293 + * fail due to memory pressures. */
  294 +static int
  295 +type_pf_resize(struct ip_set *set, bool retried)
  296 +{
  297 + struct ip_set_hash *h = set->data;
  298 + struct htable *t, *orig = h->table;
  299 + u8 htable_bits = orig->htable_bits;
  300 + const struct type_pf_elem *data;
  301 + struct hbucket *n, *m;
  302 + u32 i, j;
  303 + int ret;
  304 +
  305 +retry:
  306 + ret = 0;
  307 + htable_bits++;
  308 + pr_debug("attempt to resize set %s from %u to %u, t %p\n",
  309 + set->name, orig->htable_bits, htable_bits, orig);
  310 + if (!htable_bits)
  311 + /* In case we have plenty of memory :-) */
  312 + return -IPSET_ERR_HASH_FULL;
  313 + t = ip_set_alloc(sizeof(*t)
  314 + + jhash_size(htable_bits) * sizeof(struct hbucket));
  315 + if (!t)
  316 + return -ENOMEM;
  317 + t->htable_bits = htable_bits;
  318 +
  319 + read_lock_bh(&set->lock);
  320 + for (i = 0; i < jhash_size(orig->htable_bits); i++) {
  321 + n = hbucket(orig, i);
  322 + for (j = 0; j < n->pos; j++) {
  323 + data = ahash_data(n, j);
  324 + m = hbucket(t, HKEY(data, h->initval, htable_bits));
  325 + ret = type_pf_elem_add(m, data);
  326 + if (ret < 0) {
  327 + read_unlock_bh(&set->lock);
  328 + ahash_destroy(t);
  329 + if (ret == -EAGAIN)
  330 + goto retry;
  331 + return ret;
  332 + }
  333 + }
  334 + }
  335 +
  336 + rcu_assign_pointer(h->table, t);
  337 + read_unlock_bh(&set->lock);
  338 +
  339 + /* Give time to other readers of the set */
  340 + synchronize_rcu_bh();
  341 +
  342 + pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
  343 + orig->htable_bits, orig, t->htable_bits, t);
  344 + ahash_destroy(orig);
  345 +
  346 + return 0;
  347 +}
  348 +
  349 +/* Add an element to a hash and update the internal counters when succeeded,
  350 + * otherwise report the proper error code. */
  351 +static int
  352 +type_pf_add(struct ip_set *set, void *value, u32 timeout)
  353 +{
  354 + struct ip_set_hash *h = set->data;
  355 + struct htable *t;
  356 + const struct type_pf_elem *d = value;
  357 + struct hbucket *n;
  358 + int i, ret = 0;
  359 + u32 key;
  360 +
  361 + if (h->elements >= h->maxelem)
  362 + return -IPSET_ERR_HASH_FULL;
  363 +
  364 + rcu_read_lock_bh();
  365 + t = rcu_dereference_bh(h->table);
  366 + key = HKEY(value, h->initval, t->htable_bits);
  367 + n = hbucket(t, key);
  368 + for (i = 0; i < n->pos; i++)
  369 + if (type_pf_data_equal(ahash_data(n, i), d)) {
  370 + ret = -IPSET_ERR_EXIST;
  371 + goto out;
  372 + }
  373 +
  374 + ret = type_pf_elem_add(n, value);
  375 + if (ret != 0)
  376 + goto out;
  377 +
  378 +#ifdef IP_SET_HASH_WITH_NETS
  379 + add_cidr(h, d->cidr, HOST_MASK);
  380 +#endif
  381 + h->elements++;
  382 +out:
  383 + rcu_read_unlock_bh();
  384 + return ret;
  385 +}
  386 +
  387 +/* Delete an element from the hash: swap it with the last element
  388 + * and free up space if possible.
  389 + */
  390 +static int
  391 +type_pf_del(struct ip_set *set, void *value, u32 timeout)
  392 +{
  393 + struct ip_set_hash *h = set->data;
  394 + struct htable *t = h->table;
  395 + const struct type_pf_elem *d = value;
  396 + struct hbucket *n;
  397 + int i;
  398 + struct type_pf_elem *data;
  399 + u32 key;
  400 +
  401 + key = HKEY(value, h->initval, t->htable_bits);
  402 + n = hbucket(t, key);
  403 + for (i = 0; i < n->pos; i++) {
  404 + data = ahash_data(n, i);
  405 + if (!type_pf_data_equal(data, d))
  406 + continue;
  407 + if (i != n->pos - 1)
  408 + /* Not last one */
  409 + type_pf_data_copy(data, ahash_data(n, n->pos - 1));
  410 +
  411 + n->pos--;
  412 + h->elements--;
  413 +#ifdef IP_SET_HASH_WITH_NETS
  414 + del_cidr(h, d->cidr, HOST_MASK);
  415 +#endif
  416 + if (n->pos + AHASH_INIT_SIZE < n->size) {
  417 + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
  418 + * sizeof(struct type_pf_elem),
  419 + GFP_ATOMIC);
  420 + if (!tmp)
  421 + return 0;
  422 + n->size -= AHASH_INIT_SIZE;
  423 + memcpy(tmp, n->value,
  424 + n->size * sizeof(struct type_pf_elem));
  425 + kfree(n->value);
  426 + n->value = tmp;
  427 + }
  428 + return 0;
  429 + }
  430 +
  431 + return -IPSET_ERR_EXIST;
  432 +}
  433 +
  434 +#ifdef IP_SET_HASH_WITH_NETS
  435 +
  436 +/* Special test function which takes into account the different network
  437 + * sizes added to the set */
  438 +static int
  439 +type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
  440 +{
  441 + struct ip_set_hash *h = set->data;
  442 + struct htable *t = h->table;
  443 + struct hbucket *n;
  444 + const struct type_pf_elem *data;
  445 + int i, j = 0;
  446 + u32 key;
  447 + u8 host_mask = SET_HOST_MASK(set->family);
  448 +
  449 + pr_debug("test by nets\n");
  450 + for (; j < host_mask && h->nets[j].cidr; j++) {
  451 + type_pf_data_netmask(d, h->nets[j].cidr);
  452 + key = HKEY(d, h->initval, t->htable_bits);
  453 + n = hbucket(t, key);
  454 + for (i = 0; i < n->pos; i++) {
  455 + data = ahash_data(n, i);
  456 + if (type_pf_data_equal(data, d))
  457 + return 1;
  458 + }
  459 + }
  460 + return 0;
  461 +}
  462 +#endif
  463 +
  464 +/* Test whether the element is added to the set */
  465 +static int
  466 +type_pf_test(struct ip_set *set, void *value, u32 timeout)
  467 +{
  468 + struct ip_set_hash *h = set->data;
  469 + struct htable *t = h->table;
  470 + struct type_pf_elem *d = value;
  471 + struct hbucket *n;
  472 + const struct type_pf_elem *data;
  473 + int i;
  474 + u32 key;
  475 +
  476 +#ifdef IP_SET_HASH_WITH_NETS
  477 + /* If we test an IP address and not a network address,
  478 + * try all possible network sizes */
  479 + if (d->cidr == SET_HOST_MASK(set->family))
  480 + return type_pf_test_cidrs(set, d, timeout);
  481 +#endif
  482 +
  483 + key = HKEY(d, h->initval, t->htable_bits);
  484 + n = hbucket(t, key);
  485 + for (i = 0; i < n->pos; i++) {
  486 + data = ahash_data(n, i);
  487 + if (type_pf_data_equal(data, d))
  488 + return 1;
  489 + }
  490 + return 0;
  491 +}
  492 +
  493 +/* Reply a HEADER request: fill out the header part of the set */
  494 +static int
  495 +type_pf_head(struct ip_set *set, struct sk_buff *skb)
  496 +{
  497 + const struct ip_set_hash *h = set->data;
  498 + struct nlattr *nested;
  499 + size_t memsize;
  500 +
  501 + read_lock_bh(&set->lock);
  502 + memsize = ahash_memsize(h, with_timeout(h->timeout)
  503 + ? sizeof(struct type_pf_telem)
  504 + : sizeof(struct type_pf_elem),
  505 + set->family == AF_INET ? 32 : 128);
  506 + read_unlock_bh(&set->lock);
  507 +
  508 + nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
  509 + if (!nested)
  510 + goto nla_put_failure;
  511 + NLA_PUT_NET32(skb, IPSET_ATTR_HASHSIZE,
  512 + htonl(jhash_size(h->table->htable_bits)));
  513 + NLA_PUT_NET32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem));
  514 +#ifdef IP_SET_HASH_WITH_NETMASK
  515 + if (h->netmask != HOST_MASK)
  516 + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, h->netmask);
  517 +#endif
  518 + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
  519 + htonl(atomic_read(&set->ref) - 1));
  520 + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize));
  521 + if (with_timeout(h->timeout))
  522 + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout));
  523 + ipset_nest_end(skb, nested);
  524 +
  525 + return 0;
  526 +nla_put_failure:
  527 + return -EMSGSIZE;
  528 +}
  529 +
  530 +/* Reply a LIST/SAVE request: dump the elements of the specified set */
  531 +static int
  532 +type_pf_list(const struct ip_set *set,
  533 + struct sk_buff *skb, struct netlink_callback *cb)
  534 +{
  535 + const struct ip_set_hash *h = set->data;
  536 + const struct htable *t = h->table;
  537 + struct nlattr *atd, *nested;
  538 + const struct hbucket *n;
  539 + const struct type_pf_elem *data;
  540 + u32 first = cb->args[2];
  541 + /* We assume that one hash bucket fills into one page */
  542 + void *incomplete;
  543 + int i;
  544 +
  545 + atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
  546 + if (!atd)
  547 + return -EMSGSIZE;
  548 + pr_debug("list hash set %s\n", set->name);
  549 + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
  550 + incomplete = skb_tail_pointer(skb);
  551 + n = hbucket(t, cb->args[2]);
  552 + pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
  553 + for (i = 0; i < n->pos; i++) {
  554 + data = ahash_data(n, i);
  555 + pr_debug("list hash %lu hbucket %p i %u, data %p\n",
  556 + cb->args[2], n, i, data);
  557 + nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
  558 + if (!nested) {
  559 + if (cb->args[2] == first) {
  560 + nla_nest_cancel(skb, atd);
  561 + return -EMSGSIZE;
  562 + } else
  563 + goto nla_put_failure;
  564 + }
  565 + if (type_pf_data_list(skb, data))
  566 + goto nla_put_failure;
  567 + ipset_nest_end(skb, nested);
  568 + }
  569 + }
  570 + ipset_nest_end(skb, atd);
  571 + /* Set listing finished */
  572 + cb->args[2] = 0;
  573 +
  574 + return 0;
  575 +
  576 +nla_put_failure:
  577 + nlmsg_trim(skb, incomplete);
  578 + ipset_nest_end(skb, atd);
  579 + if (unlikely(first == cb->args[2])) {
  580 + pr_warning("Can't list set %s: one bucket does not fit into "
  581 + "a message. Please report it!\n", set->name);
  582 + cb->args[2] = 0;
  583 + return -EMSGSIZE;
  584 + }
  585 + return 0;
  586 +}
  587 +
  588 +static int
  589 +type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
  590 + enum ipset_adt adt, u8 pf, u8 dim, u8 flags);
  591 +static int
  592 +type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
  593 + enum ipset_adt adt, u32 *lineno, u32 flags);
  594 +
  595 +static const struct ip_set_type_variant type_pf_variant = {
  596 + .kadt = type_pf_kadt,
  597 + .uadt = type_pf_uadt,
  598 + .adt = {
  599 + [IPSET_ADD] = type_pf_add,
  600 + [IPSET_DEL] = type_pf_del,
  601 + [IPSET_TEST] = type_pf_test,
  602 + },
  603 + .destroy = type_pf_destroy,
  604 + .flush = type_pf_flush,
  605 + .head = type_pf_head,
  606 + .list = type_pf_list,
  607 + .resize = type_pf_resize,
  608 + .same_set = type_pf_same_set,
  609 +};
  610 +
  611 +/* Flavour with timeout support */
  612 +
  613 +#define ahash_tdata(n, i) \
  614 + (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
  615 +
  616 +static inline u32
  617 +type_pf_data_timeout(const struct type_pf_elem *data)
  618 +{
  619 + const struct type_pf_telem *tdata =
  620 + (const struct type_pf_telem *) data;
  621 +
  622 + return tdata->timeout;
  623 +}
  624 +
  625 +static inline bool
  626 +type_pf_data_expired(const struct type_pf_elem *data)
  627 +{
  628 + const struct type_pf_telem *tdata =
  629 + (const struct type_pf_telem *) data;
  630 +
  631 + return ip_set_timeout_expired(tdata->timeout);
  632 +}
  633 +
  634 +static inline void
  635 +type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
  636 +{
  637 + struct type_pf_telem *tdata = (struct type_pf_telem *) data;
  638 +
  639 + tdata->timeout = ip_set_timeout_set(timeout);
  640 +}
  641 +
  642 +static int
  643 +type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
  644 + u32 timeout)
  645 +{
  646 + struct type_pf_elem *data;
  647 +
  648 + if (n->pos >= n->size) {
  649 + void *tmp;
  650 +
  651 + if (n->size >= AHASH_MAX_SIZE)
  652 + /* Trigger rehashing */
  653 + return -EAGAIN;
  654 +
  655 + tmp = kzalloc((n->size + AHASH_INIT_SIZE)
  656 + * sizeof(struct type_pf_telem),
  657 + GFP_ATOMIC);
  658 + if (!tmp)
  659 + return -ENOMEM;
  660 + if (n->size) {
  661 + memcpy(tmp, n->value,
  662 + sizeof(struct type_pf_telem) * n->size);
  663 + kfree(n->value);
  664 + }
  665 + n->value = tmp;
  666 + n->size += AHASH_INIT_SIZE;
  667 + }
  668 + data = ahash_tdata(n, n->pos++);
  669 + type_pf_data_copy(data, value);
  670 + type_pf_data_timeout_set(data, timeout);
  671 + return 0;
  672 +}
  673 +
  674 +/* Delete expired elements from the hashtable */
  675 +static void
  676 +type_pf_expire(struct ip_set_hash *h)
  677 +{
  678 + struct htable *t = h->table;
  679 + struct hbucket *n;
  680 + struct type_pf_elem *data;
  681 + u32 i;
  682 + int j;
  683 +
  684 + for (i = 0; i < jhash_size(t->htable_bits); i++) {
  685 + n = hbucket(t, i);
  686 + for (j = 0; j < n->pos; j++) {
  687 + data = ahash_tdata(n, j);
  688 + if (type_pf_data_expired(data)) {
  689 + pr_debug("expired %u/%u\n", i, j);
  690 +#ifdef IP_SET_HASH_WITH_NETS
  691 + del_cidr(h, data->cidr, HOST_MASK);
  692 +#endif
  693 + if (j != n->pos - 1)
  694 + /* Not last one */
  695 + type_pf_data_copy(data,
  696 + ahash_tdata(n, n->pos - 1));
  697 + n->pos--;
  698 + h->elements--;
  699 + }
  700 + }
  701 + if (n->pos + AHASH_INIT_SIZE < n->size) {
  702 + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
  703 + * sizeof(struct type_pf_telem),
  704 + GFP_ATOMIC);
  705 + if (!tmp)
  706 + /* Still try to delete expired elements */
  707 + continue;
  708 + n->size -= AHASH_INIT_SIZE;
  709 + memcpy(tmp, n->value,
  710 + n->size * sizeof(struct type_pf_telem));
  711 + kfree(n->value);
  712 + n->value = tmp;
  713 + }
  714 + }
  715 +}
  716 +
  717 +static int
  718 +type_pf_tresize(struct ip_set *set, bool retried)
  719 +{
  720 + struct ip_set_hash *h = set->data;
  721 + struct htable *t, *orig = h->table;
  722 + u8 htable_bits = orig->htable_bits;
  723 + const struct type_pf_elem *data;
  724 + struct hbucket *n, *m;
  725 + u32 i, j;
  726 + int ret;
  727 +
  728 + /* Try to cleanup once */
  729 + if (!retried) {
  730 + i = h->elements;
  731 + write_lock_bh(&set->lock);
  732 + type_pf_expire(set->data);
  733 + write_unlock_bh(&set->lock);
  734 + if (h->elements < i)
  735 + return 0;
  736 + }
  737 +
  738 +retry:
  739 + ret = 0;
  740 + htable_bits++;
  741 + if (!htable_bits)
  742 + /* In case we have plenty of memory :-) */
  743 + return -IPSET_ERR_HASH_FULL;
  744 + t = ip_set_alloc(sizeof(*t)
  745 + + jhash_size(htable_bits) * sizeof(struct hbucket));
  746 + if (!t)
  747 + return -ENOMEM;
  748 + t->htable_bits = htable_bits;
  749 +
  750 + read_lock_bh(&set->lock);
  751 + for (i = 0; i < jhash_size(orig->htable_bits); i++) {
  752 + n = hbucket(orig, i);
  753 + for (j = 0; j < n->pos; j++) {
  754 + data = ahash_tdata(n, j);
  755 + m = hbucket(t, HKEY(data, h->initval, htable_bits));
  756 + ret = type_pf_elem_tadd(m, data,
  757 + type_pf_data_timeout(data));
  758 + if (ret < 0) {
  759 + read_unlock_bh(&set->lock);
  760 + ahash_destroy(t);
  761 + if (ret == -EAGAIN)
  762 + goto retry;
  763 + return ret;
  764 + }
  765 + }
  766 + }
  767 +
  768 + rcu_assign_pointer(h->table, t);
  769 + read_unlock_bh(&set->lock);
  770 +
  771 + /* Give time to other readers of the set */
  772 + synchronize_rcu_bh();
  773 +
  774 + ahash_destroy(orig);
  775 +
  776 + return 0;
  777 +}
  778 +
  779 +static int
  780 +type_pf_tadd(struct ip_set *set, void *value, u32 timeout)
  781 +{
  782 + struct ip_set_hash *h = set->data;
  783 + struct htable *t = h->table;
  784 + const struct type_pf_elem *d = value;
  785 + struct hbucket *n;
  786 + struct type_pf_elem *data;
  787 + int ret = 0, i, j = AHASH_MAX_SIZE + 1;
  788 + u32 key;
  789 +
  790 + if (h->elements >= h->maxelem)
  791 + /* FIXME: when set is full, we slow down here */
  792 + type_pf_expire(h);
  793 + if (h->elements >= h->maxelem)
  794 + return -IPSET_ERR_HASH_FULL;
  795 +
  796 + rcu_read_lock_bh();
  797 + t = rcu_dereference_bh(h->table);
  798 + key = HKEY(d, h->initval, t->htable_bits);
  799 + n = hbucket(t, key);
  800 + for (i = 0; i < n->pos; i++) {
  801 + data = ahash_tdata(n, i);
  802 + if (type_pf_data_equal(data, d)) {
  803 + if (type_pf_data_expired(data))
  804 + j = i;
  805 + else {
  806 + ret = -IPSET_ERR_EXIST;
  807 + goto out;
  808 + }
  809 + } else if (j == AHASH_MAX_SIZE + 1 &&
  810 + type_pf_data_expired(data))
  811 + j = i;
  812 + }
  813 + if (j != AHASH_MAX_SIZE + 1) {
  814 + data = ahash_tdata(n, j);
  815 +#ifdef IP_SET_HASH_WITH_NETS
  816 + del_cidr(h, data->cidr, HOST_MASK);
  817 + add_cidr(h, d->cidr, HOST_MASK);
  818 +#endif
  819 + type_pf_data_copy(data, d);
  820 + type_pf_data_timeout_set(data, timeout);
  821 + goto out;
  822 + }
  823 + ret = type_pf_elem_tadd(n, d, timeout);
  824 + if (ret != 0)
  825 + goto out;
  826 +
  827 +#ifdef IP_SET_HASH_WITH_NETS
  828 + add_cidr(h, d->cidr, HOST_MASK);
  829 +#endif
  830 + h->elements++;
  831 +out:
  832 + rcu_read_unlock_bh();
  833 + return ret;
  834 +}
  835 +
  836 +static int
  837 +type_pf_tdel(struct ip_set *set, void *value, u32 timeout)
  838 +{
  839 + struct ip_set_hash *h = set->data;
  840 + struct htable *t = h->table;
  841 + const struct type_pf_elem *d = value;
  842 + struct hbucket *n;
  843 + int i, ret = 0;
  844 + struct type_pf_elem *data;
  845 + u32 key;
  846 +
  847 + key = HKEY(value, h->initval, t->htable_bits);
  848 + n = hbucket(t, key);
  849 + for (i = 0; i < n->pos; i++) {
  850 + data = ahash_tdata(n, i);
  851 + if (!type_pf_data_equal(data, d))
  852 + continue;
  853 + if (type_pf_data_expired(data))
  854 + ret = -IPSET_ERR_EXIST;
  855 + if (i != n->pos - 1)
  856 + /* Not last one */
  857 + type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
  858 +
  859 + n->pos--;
  860 + h->elements--;
  861 +#ifdef IP_SET_HASH_WITH_NETS
  862 + del_cidr(h, d->cidr, HOST_MASK);
  863 +#endif
  864 + if (n->pos + AHASH_INIT_SIZE < n->size) {
  865 + void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
  866 + * sizeof(struct type_pf_telem),
  867 + GFP_ATOMIC);
  868 + if (!tmp)
  869 + return 0;
  870 + n->size -= AHASH_INIT_SIZE;
  871 + memcpy(tmp, n->value,
  872 + n->size * sizeof(struct type_pf_telem));
  873 + kfree(n->value);
  874 + n->value = tmp;
  875 + }
  876 + return 0;
  877 + }
  878 +
  879 + return -IPSET_ERR_EXIST;
  880 +}
  881 +
  882 +#ifdef IP_SET_HASH_WITH_NETS
  883 +static int
  884 +type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
  885 +{
  886 + struct ip_set_hash *h = set->data;
  887 + struct htable *t = h->table;
  888 + struct type_pf_elem *data;
  889 + struct hbucket *n;
  890 + int i, j = 0;
  891 + u32 key;
  892 + u8 host_mask = SET_HOST_MASK(set->family);
  893 +
  894 + for (; j < host_mask && h->nets[j].cidr; j++) {
  895 + type_pf_data_netmask(d, h->nets[j].cidr);
  896 + key = HKEY(d, h->initval, t->htable_bits);
  897 + n = hbucket(t, key);
  898 + for (i = 0; i < n->pos; i++) {
  899 + data = ahash_tdata(n, i);
  900 + if (type_pf_data_equal(data, d))
  901 + return !type_pf_data_expired(data);
  902 + }
  903 + }
  904 + return 0;
  905 +}
  906 +#endif
  907 +
  908 +static int
  909 +type_pf_ttest(struct ip_set *set, void *value, u32 timeout)
  910 +{
  911 + struct ip_set_hash *h = set->data;
  912 + struct htable *t = h->table;
  913 + struct type_pf_elem *data, *d = value;
  914 + struct hbucket *n;
  915 + int i;
  916 + u32 key;
  917 +
  918 +#ifdef IP_SET_HASH_WITH_NETS
  919 + if (d->cidr == SET_HOST_MASK(set->family))
  920 + return type_pf_ttest_cidrs(set, d, timeout);
  921 +#endif
  922 + key = HKEY(d, h->initval, t->htable_bits);
  923 + n = hbucket(t, key);
  924 + for (i = 0; i < n->pos; i++) {
  925 + data = ahash_tdata(n, i);
  926 + if (type_pf_data_equal(data, d))
  927 + return !type_pf_data_expired(data);
  928 + }
  929 + return 0;
  930 +}
  931 +
  932 +static int
  933 +type_pf_tlist(const struct ip_set *set,
  934 + struct sk_buff *skb, struct netlink_callback *cb)
  935 +{
  936 + const struct ip_set_hash *h = set->data;
  937 + const struct htable *t = h->table;
  938 + struct nlattr *atd, *nested;
  939 + const struct hbucket *n;
  940 + const struct type_pf_elem *data;
  941 + u32 first = cb->args[2];
  942 + /* We assume that one hash bucket fills into one page */
  943 + void *incomplete;
  944 + int i;
  945 +
  946 + atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
  947 + if (!atd)
  948 + return -EMSGSIZE;
  949 + for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
  950 + incomplete = skb_tail_pointer(skb);
  951 + n = hbucket(t, cb->args[2]);
  952 + for (i = 0; i < n->pos; i++) {
  953 + data = ahash_tdata(n, i);
  954 + pr_debug("list %p %u\n", n, i);
  955 + if (type_pf_data_expired(data))
  956 + continue;
  957 + pr_debug("do list %p %u\n", n, i);
  958 + nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
  959 + if (!nested) {
  960 + if (cb->args[2] == first) {
  961 + nla_nest_cancel(skb, atd);
  962 + return -EMSGSIZE;
  963 + } else
  964 + goto nla_put_failure;
  965 + }
  966 + if (type_pf_data_tlist(skb, data))
  967 + goto nla_put_failure;
  968 + ipset_nest_end(skb, nested);
  969 + }
  970 + }
  971 + ipset_nest_end(skb, atd);
  972 + /* Set listing finished */
  973 + cb->args[2] = 0;
  974 +
  975 + return 0;
  976 +
  977 +nla_put_failure:
  978 + nlmsg_trim(skb, incomplete);
  979 + ipset_nest_end(skb, atd);
  980 + if (unlikely(first == cb->args[2])) {
  981 + pr_warning("Can't list set %s: one bucket does not fit into "
  982 + "a message. Please report it!\n", set->name);
  983 + cb->args[2] = 0;
  984 + return -EMSGSIZE;
  985 + }
  986 + return 0;
  987 +}
  988 +
  989 +static const struct ip_set_type_variant type_pf_tvariant = {
  990 + .kadt = type_pf_kadt,
  991 + .uadt = type_pf_uadt,
  992 + .adt = {
  993 + [IPSET_ADD] = type_pf_tadd,
  994 + [IPSET_DEL] = type_pf_tdel,
  995 + [IPSET_TEST] = type_pf_ttest,
  996 + },
  997 + .destroy = type_pf_destroy,
  998 + .flush = type_pf_flush,
  999 + .head = type_pf_head,
  1000 + .list = type_pf_tlist,
  1001 + .resize = type_pf_tresize,
  1002 + .same_set = type_pf_same_set,
  1003 +};
  1004 +
  1005 +static void
  1006 +type_pf_gc(unsigned long ul_set)
  1007 +{
  1008 + struct ip_set *set = (struct ip_set *) ul_set;
  1009 + struct ip_set_hash *h = set->data;
  1010 +
  1011 + pr_debug("called\n");
  1012 + write_lock_bh(&set->lock);
  1013 + type_pf_expire(h);
  1014 + write_unlock_bh(&set->lock);
  1015 +
  1016 + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
  1017 + add_timer(&h->gc);
  1018 +}
  1019 +
  1020 +static void
  1021 +type_pf_gc_init(struct ip_set *set)
  1022 +{
  1023 + struct ip_set_hash *h = set->data;
  1024 +
  1025 + init_timer(&h->gc);
  1026 + h->gc.data = (unsigned long) set;
  1027 + h->gc.function = type_pf_gc;
  1028 + h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
  1029 + add_timer(&h->gc);
  1030 + pr_debug("gc initialized, run in every %u\n",
  1031 + IPSET_GC_PERIOD(h->timeout));
  1032 +}
  1033 +
  1034 +#undef type_pf_data_equal
  1035 +#undef type_pf_data_isnull
  1036 +#undef type_pf_data_copy
  1037 +#undef type_pf_data_zero_out
  1038 +#undef type_pf_data_list
  1039 +#undef type_pf_data_tlist
  1040 +
  1041 +#undef type_pf_elem
  1042 +#undef type_pf_telem
  1043 +#undef type_pf_data_timeout
  1044 +#undef type_pf_data_expired
  1045 +#undef type_pf_data_netmask
  1046 +#undef type_pf_data_timeout_set
  1047 +
  1048 +#undef type_pf_elem_add
  1049 +#undef type_pf_add
  1050 +#undef type_pf_del
  1051 +#undef type_pf_test_cidrs
  1052 +#undef type_pf_test
  1053 +
  1054 +#undef type_pf_elem_tadd
  1055 +#undef type_pf_expire
  1056 +#undef type_pf_tadd
  1057 +#undef type_pf_tdel
  1058 +#undef type_pf_ttest_cidrs
  1059 +#undef type_pf_ttest
  1060 +
  1061 +#undef type_pf_resize
  1062 +#undef type_pf_tresize
  1063 +#undef type_pf_flush
  1064 +#undef type_pf_destroy
  1065 +#undef type_pf_head
  1066 +#undef type_pf_list
  1067 +#undef type_pf_tlist
  1068 +#undef type_pf_same_set
  1069 +#undef type_pf_kadt
  1070 +#undef type_pf_uadt
  1071 +#undef type_pf_gc
  1072 +#undef type_pf_gc_init
  1073 +#undef type_pf_variant
  1074 +#undef type_pf_tvariant
include/linux/netfilter/ipset/ip_set_hash.h
  1 +#ifndef __IP_SET_HASH_H
  2 +#define __IP_SET_HASH_H
  3 +
  4 +/* Hash type specific error codes */
  5 +enum {
  6 + /* Hash is full */
  7 + IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC,
  8 + /* Null-valued element */
  9 + IPSET_ERR_HASH_ELEM,
  10 + /* Invalid protocol */
  11 + IPSET_ERR_INVALID_PROTO,
  12 + /* Protocol missing but must be specified */
  13 + IPSET_ERR_MISSING_PROTO,
  14 +};
  15 +
  16 +#ifdef __KERNEL__
  17 +
  18 +#define IPSET_DEFAULT_HASHSIZE 1024
  19 +#define IPSET_MIMINAL_HASHSIZE 64
  20 +#define IPSET_DEFAULT_MAXELEM 65536
  21 +#define IPSET_DEFAULT_PROBES 4
  22 +#define IPSET_DEFAULT_RESIZE 100
  23 +
  24 +#endif /* __KERNEL__ */
  25 +
  26 +#endif /* __IP_SET_HASH_H */
net/netfilter/ipset/Kconfig
... ... @@ -50,5 +50,15 @@
50 50  
51 51 To compile it as a module, choose M here. If unsure, say N.
52 52  
  53 +config IP_SET_HASH_IP
  54 + tristate "hash:ip set support"
  55 + depends on IP_SET
  56 + help
  57 + This option adds the hash:ip set type support, by which one
  58 + can store arbitrary IPv4 or IPv6 addresses (or network addresses)
  59 + in a set.
  60 +
  61 + To compile it as a module, choose M here. If unsure, say N.
  62 +
53 63 endif # IP_SET
net/netfilter/ipset/Makefile
... ... @@ -11,4 +11,7 @@
11 11 obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o
12 12 obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o
13 13 obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
  14 +
  15 +# hash types
  16 +obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
net/netfilter/ipset/ip_set_hash_ip.c
  1 +/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
  2 + *
  3 + * This program is free software; you can redistribute it and/or modify
  4 + * it under the terms of the GNU General Public License version 2 as
  5 + * published by the Free Software Foundation.
  6 + */
  7 +
  8 +/* Kernel module implementing an IP set type: the hash:ip type */
  9 +
  10 +#include <linux/jhash.h>
  11 +#include <linux/module.h>
  12 +#include <linux/ip.h>
  13 +#include <linux/skbuff.h>
  14 +#include <linux/errno.h>
  15 +#include <linux/uaccess.h>
  16 +#include <linux/bitops.h>
  17 +#include <linux/spinlock.h>
  18 +#include <linux/random.h>
  19 +#include <net/ip.h>
  20 +#include <net/ipv6.h>
  21 +#include <net/netlink.h>
  22 +#include <net/tcp.h>
  23 +
  24 +#include <linux/netfilter.h>
  25 +#include <linux/netfilter/ipset/pfxlen.h>
  26 +#include <linux/netfilter/ipset/ip_set.h>
  27 +#include <linux/netfilter/ipset/ip_set_timeout.h>
  28 +#include <linux/netfilter/ipset/ip_set_hash.h>
  29 +
  30 +MODULE_LICENSE("GPL");
  31 +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
  32 +MODULE_DESCRIPTION("hash:ip type of IP sets");
  33 +MODULE_ALIAS("ip_set_hash:ip");
  34 +
  35 +/* Type specific function prefix */
  36 +#define TYPE hash_ip
  37 +
  38 +static bool
  39 +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
  40 +
  41 +#define hash_ip4_same_set hash_ip_same_set
  42 +#define hash_ip6_same_set hash_ip_same_set
  43 +
  44 +/* The type variant functions: IPv4 */
  45 +
  46 +/* Member elements without timeout */
  47 +struct hash_ip4_elem {
  48 + __be32 ip;
  49 +};
  50 +
  51 +/* Member elements with timeout support */
  52 +struct hash_ip4_telem {
  53 + __be32 ip;
  54 + unsigned long timeout;
  55 +};
  56 +
  57 +static inline bool
  58 +hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
  59 + const struct hash_ip4_elem *ip2)
  60 +{
  61 + return ip1->ip == ip2->ip;
  62 +}
  63 +
  64 +static inline bool
  65 +hash_ip4_data_isnull(const struct hash_ip4_elem *elem)
  66 +{
  67 + return elem->ip == 0;
  68 +}
  69 +
  70 +static inline void
  71 +hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
  72 +{
  73 + dst->ip = src->ip;
  74 +}
  75 +
  76 +/* Zero valued IP addresses cannot be stored */
  77 +static inline void
  78 +hash_ip4_data_zero_out(struct hash_ip4_elem *elem)
  79 +{
  80 + elem->ip = 0;
  81 +}
  82 +
  83 +static inline bool
  84 +hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data)
  85 +{
  86 + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip);
  87 + return 0;
  88 +
  89 +nla_put_failure:
  90 + return 1;
  91 +}
  92 +
  93 +static bool
  94 +hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data)
  95 +{
  96 + const struct hash_ip4_telem *tdata =
  97 + (const struct hash_ip4_telem *)data;
  98 +
  99 + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip);
  100 + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
  101 + htonl(ip_set_timeout_get(tdata->timeout)));
  102 +
  103 + return 0;
  104 +
  105 +nla_put_failure:
  106 + return 1;
  107 +}
  108 +
  109 +#define IP_SET_HASH_WITH_NETMASK
  110 +#define PF 4
  111 +#define HOST_MASK 32
  112 +#include <linux/netfilter/ipset/ip_set_ahash.h>
  113 +
  114 +static int
  115 +hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
  116 + enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
  117 +{
  118 + const struct ip_set_hash *h = set->data;
  119 + ipset_adtfn adtfn = set->variant->adt[adt];
  120 + __be32 ip;
  121 +
  122 + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip);
  123 + ip &= ip_set_netmask(h->netmask);
  124 + if (ip == 0)
  125 + return -EINVAL;
  126 +
  127 + return adtfn(set, &ip, h->timeout);
  128 +}
  129 +
  130 +static int
  131 +hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
  132 + enum ipset_adt adt, u32 *lineno, u32 flags)
  133 +{
  134 + const struct ip_set_hash *h = set->data;
  135 + ipset_adtfn adtfn = set->variant->adt[adt];
  136 + u32 ip, ip_to, hosts, timeout = h->timeout;
  137 + __be32 nip;
  138 + int ret = 0;
  139 +
  140 + if (unlikely(!tb[IPSET_ATTR_IP] ||
  141 + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
  142 + return -IPSET_ERR_PROTOCOL;
  143 +
  144 + if (tb[IPSET_ATTR_LINENO])
  145 + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
  146 +
  147 + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
  148 + if (ret)
  149 + return ret;
  150 +
  151 + ip &= ip_set_hostmask(h->netmask);
  152 +
  153 + if (tb[IPSET_ATTR_TIMEOUT]) {
  154 + if (!with_timeout(h->timeout))
  155 + return -IPSET_ERR_TIMEOUT;
  156 + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
  157 + }
  158 +
  159 + if (adt == IPSET_TEST) {
  160 + nip = htonl(ip);
  161 + if (nip == 0)
  162 + return -IPSET_ERR_HASH_ELEM;
  163 + return adtfn(set, &nip, timeout);
  164 + }
  165 +
  166 + if (tb[IPSET_ATTR_IP_TO]) {
  167 + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
  168 + if (ret)
  169 + return ret;
  170 + if (ip > ip_to)
  171 + swap(ip, ip_to);
  172 + } else if (tb[IPSET_ATTR_CIDR]) {
  173 + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
  174 +
  175 + if (cidr > 32)
  176 + return -IPSET_ERR_INVALID_CIDR;
  177 + ip &= ip_set_hostmask(cidr);
  178 + ip_to = ip | ~ip_set_hostmask(cidr);
  179 + } else
  180 + ip_to = ip;
  181 +
  182 + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
  183 +
  184 + for (; !before(ip_to, ip); ip += hosts) {
  185 + nip = htonl(ip);
  186 + if (nip == 0)
  187 + return -IPSET_ERR_HASH_ELEM;
  188 + ret = adtfn(set, &nip, timeout);
  189 +
  190 + if (ret && !ip_set_eexist(ret, flags))
  191 + return ret;
  192 + else
  193 + ret = 0;
  194 + }
  195 + return ret;
  196 +}
  197 +
  198 +static bool
  199 +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
  200 +{
  201 + const struct ip_set_hash *x = a->data;
  202 + const struct ip_set_hash *y = b->data;
  203 +
  204 + /* Resizing changes htable_bits, so we ignore it */
  205 + return x->maxelem == y->maxelem &&
  206 + x->timeout == y->timeout &&
  207 + x->netmask == y->netmask;
  208 +}
  209 +
  210 +/* The type variant functions: IPv6 */
  211 +
  212 +struct hash_ip6_elem {
  213 + union nf_inet_addr ip;
  214 +};
  215 +
  216 +struct hash_ip6_telem {
  217 + union nf_inet_addr ip;
  218 + unsigned long timeout;
  219 +};
  220 +
  221 +static inline bool
  222 +hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
  223 + const struct hash_ip6_elem *ip2)
  224 +{
  225 + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
  226 +}
  227 +
  228 +static inline bool
  229 +hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
  230 +{
  231 + return ipv6_addr_any(&elem->ip.in6);
  232 +}
  233 +
  234 +static inline void
  235 +hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
  236 +{
  237 + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
  238 +}
  239 +
  240 +static inline void
  241 +hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
  242 +{
  243 + ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
  244 +}
  245 +
  246 +static inline void
  247 +ip6_netmask(union nf_inet_addr *ip, u8 prefix)
  248 +{
  249 + ip->ip6[0] &= ip_set_netmask6(prefix)[0];
  250 + ip->ip6[1] &= ip_set_netmask6(prefix)[1];
  251 + ip->ip6[2] &= ip_set_netmask6(prefix)[2];
  252 + ip->ip6[3] &= ip_set_netmask6(prefix)[3];
  253 +}
  254 +
  255 +static bool
  256 +hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data)
  257 +{
  258 + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip);
  259 + return 0;
  260 +
  261 +nla_put_failure:
  262 + return 1;
  263 +}
  264 +
  265 +static bool
  266 +hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data)
  267 +{
  268 + const struct hash_ip6_telem *e =
  269 + (const struct hash_ip6_telem *)data;
  270 +
  271 + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip);
  272 + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT,
  273 + htonl(ip_set_timeout_get(e->timeout)));
  274 + return 0;
  275 +
  276 +nla_put_failure:
  277 + return 1;
  278 +}
  279 +
  280 +#undef PF
  281 +#undef HOST_MASK
  282 +
  283 +#define PF 6
  284 +#define HOST_MASK 128
  285 +#include <linux/netfilter/ipset/ip_set_ahash.h>
  286 +
  287 +static int
  288 +hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
  289 + enum ipset_adt adt, u8 pf, u8 dim, u8 flags)
  290 +{
  291 + const struct ip_set_hash *h = set->data;
  292 + ipset_adtfn adtfn = set->variant->adt[adt];
  293 + union nf_inet_addr ip;
  294 +
  295 + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6);
  296 + ip6_netmask(&ip, h->netmask);
  297 + if (ipv6_addr_any(&ip.in6))
  298 + return -EINVAL;
  299 +
  300 + return adtfn(set, &ip, h->timeout);
  301 +}
  302 +
  303 +static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
  304 + [IPSET_ATTR_IP] = { .type = NLA_NESTED },
  305 + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
  306 + [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
  307 +};
  308 +
  309 +static int
  310 +hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
  311 + enum ipset_adt adt, u32 *lineno, u32 flags)
  312 +{
  313 + const struct ip_set_hash *h = set->data;
  314 + ipset_adtfn adtfn = set->variant->adt[adt];
  315 + union nf_inet_addr ip;
  316 + u32 timeout = h->timeout;
  317 + int ret;
  318 +
  319 + if (unlikely(!tb[IPSET_ATTR_IP] ||
  320 + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
  321 + tb[IPSET_ATTR_IP_TO] ||
  322 + tb[IPSET_ATTR_CIDR]))
  323 + return -IPSET_ERR_PROTOCOL;
  324 +
  325 + if (tb[IPSET_ATTR_LINENO])
  326 + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
  327 +
  328 + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip);
  329 + if (ret)
  330 + return ret;
  331 +
  332 + ip6_netmask(&ip, h->netmask);
  333 + if (ipv6_addr_any(&ip.in6))
  334 + return -IPSET_ERR_HASH_ELEM;
  335 +
  336 + if (tb[IPSET_ATTR_TIMEOUT]) {
  337 + if (!with_timeout(h->timeout))
  338 + return -IPSET_ERR_TIMEOUT;
  339 + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
  340 + }
  341 +
  342 + ret = adtfn(set, &ip, timeout);
  343 +
  344 + return ip_set_eexist(ret, flags) ? 0 : ret;
  345 +}
  346 +
  347 +/* Create hash:ip type of sets */
  348 +
  349 +static int
  350 +hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
  351 +{
  352 + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
  353 + u8 netmask, hbits;
  354 + struct ip_set_hash *h;
  355 +
  356 + if (!(set->family == AF_INET || set->family == AF_INET6))
  357 + return -IPSET_ERR_INVALID_FAMILY;
  358 + netmask = set->family == AF_INET ? 32 : 128;
  359 + pr_debug("Create set %s with family %s\n",
  360 + set->name, set->family == AF_INET ? "inet" : "inet6");
  361 +
  362 + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
  363 + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
  364 + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
  365 + return -IPSET_ERR_PROTOCOL;
  366 +
  367 + if (tb[IPSET_ATTR_HASHSIZE]) {
  368 + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
  369 + if (hashsize < IPSET_MIMINAL_HASHSIZE)
  370 + hashsize = IPSET_MIMINAL_HASHSIZE;
  371 + }
  372 +
  373 + if (tb[IPSET_ATTR_MAXELEM])
  374 + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
  375 +
  376 + if (tb[IPSET_ATTR_NETMASK]) {
  377 + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
  378 +
  379 + if ((set->family == AF_INET && netmask > 32) ||
  380 + (set->family == AF_INET6 && netmask > 128) ||
  381 + netmask == 0)
  382 + return -IPSET_ERR_INVALID_NETMASK;
  383 + }
  384 +
  385 + h = kzalloc(sizeof(*h), GFP_KERNEL);
  386 + if (!h)
  387 + return -ENOMEM;
  388 +
  389 + h->maxelem = maxelem;
  390 + h->netmask = netmask;
  391 + get_random_bytes(&h->initval, sizeof(h->initval));
  392 + h->timeout = IPSET_NO_TIMEOUT;
  393 +
  394 + hbits = htable_bits(hashsize);
  395 + h->table = ip_set_alloc(
  396 + sizeof(struct htable)
  397 + + jhash_size(hbits) * sizeof(struct hbucket));
  398 + if (!h->table) {
  399 + kfree(h);
  400 + return -ENOMEM;
  401 + }
  402 + h->table->htable_bits = hbits;
  403 +
  404 + set->data = h;
  405 +
  406 + if (tb[IPSET_ATTR_TIMEOUT]) {
  407 + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
  408 +
  409 + set->variant = set->family == AF_INET
  410 + ? &hash_ip4_tvariant : &hash_ip6_tvariant;
  411 +
  412 + if (set->family == AF_INET)
  413 + hash_ip4_gc_init(set);
  414 + else
  415 + hash_ip6_gc_init(set);
  416 + } else {
  417 + set->variant = set->family == AF_INET
  418 + ? &hash_ip4_variant : &hash_ip6_variant;
  419 + }
  420 +
  421 + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
  422 + set->name, jhash_size(h->table->htable_bits),
  423 + h->table->htable_bits, h->maxelem, set->data, h->table);
  424 +
  425 + return 0;
  426 +}
  427 +
  428 +static struct ip_set_type hash_ip_type __read_mostly = {
  429 + .name = "hash:ip",
  430 + .protocol = IPSET_PROTOCOL,
  431 + .features = IPSET_TYPE_IP,
  432 + .dimension = IPSET_DIM_ONE,
  433 + .family = AF_UNSPEC,
  434 + .revision = 0,
  435 + .create = hash_ip_create,
  436 + .create_policy = {
  437 + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
  438 + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
  439 + [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
  440 + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
  441 + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
  442 + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
  443 + },
  444 + .adt_policy = {
  445 + [IPSET_ATTR_IP] = { .type = NLA_NESTED },
  446 + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
  447 + [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
  448 + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
  449 + [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
  450 + },
  451 + .me = THIS_MODULE,
  452 +};
  453 +
  454 +static int __init
  455 +hash_ip_init(void)
  456 +{
  457 + return ip_set_type_register(&hash_ip_type);
  458 +}
  459 +
  460 +static void __exit
  461 +hash_ip_fini(void)
  462 +{
  463 + ip_set_type_unregister(&hash_ip_type);
  464 +}
  465 +
  466 +module_init(hash_ip_init);
  467 +module_exit(hash_ip_fini);