Commit bbaffaca4810de1a25e32ecaf836eeaacc7a3d11

Authored by Eric Dumazet
Committed by David S. Miller
1 parent e8b2dfe9b4

rcu: Introduce hlist_nulls variant of hlist

hlist uses NULL value to finish a chain.

hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.

This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.

Two new files are added :

include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant

include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant

   Only four helpers are declared for the moment :

     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()

prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.

Example of use (extracted from __udp4_lib_lookup())

	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;

        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;

        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 204 additions and 0 deletions Side-by-side Diff

include/linux/list_nulls.h
  1 +#ifndef _LINUX_LIST_NULLS_H
  2 +#define _LINUX_LIST_NULLS_H
  3 +
  4 +/*
  5 + * Special version of lists, where end of list is not a NULL pointer,
  6 + * but a 'nulls' marker, which can have many different values.
  7 + * (up to 2^31 different values guaranteed on all platforms)
  8 + *
  9 + * In the standard hlist, termination of a list is the NULL pointer.
  10 + * In this special 'nulls' variant, we use the fact that objects stored in
  11 + * a list are aligned on a word (4 or 8 bytes alignment).
  12 + * We therefore use the last significant bit of 'ptr' :
  13 + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
  14 + * Set to 0 : This is a pointer to some object (ptr)
  15 + */
  16 +
  17 +struct hlist_nulls_head {
  18 + struct hlist_nulls_node *first;
  19 +};
  20 +
  21 +struct hlist_nulls_node {
  22 + struct hlist_nulls_node *next, **pprev;
  23 +};
  24 +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
  25 + ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1)))
  26 +
  27 +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
  28 +/**
  29 + * ptr_is_a_nulls - Test if a ptr is a nulls
  30 + * @ptr: ptr to be tested
  31 + *
  32 + */
  33 +static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
  34 +{
  35 + return ((unsigned long)ptr & 1);
  36 +}
  37 +
  38 +/**
  39 + * get_nulls_value - Get the 'nulls' value of the end of chain
  40 + * @ptr: end of chain
  41 + *
  42 + * Should be called only if is_a_nulls(ptr);
  43 + */
  44 +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
  45 +{
  46 + return ((unsigned long)ptr) >> 1;
  47 +}
  48 +
  49 +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
  50 +{
  51 + return !h->pprev;
  52 +}
  53 +
  54 +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
  55 +{
  56 + return is_a_nulls(h->first);
  57 +}
  58 +
  59 +static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
  60 +{
  61 + struct hlist_nulls_node *next = n->next;
  62 + struct hlist_nulls_node **pprev = n->pprev;
  63 + *pprev = next;
  64 + if (!is_a_nulls(next))
  65 + next->pprev = pprev;
  66 +}
  67 +
  68 +/**
  69 + * hlist_nulls_for_each_entry - iterate over list of given type
  70 + * @tpos: the type * to use as a loop cursor.
  71 + * @pos: the &struct hlist_node to use as a loop cursor.
  72 + * @head: the head for your list.
  73 + * @member: the name of the hlist_node within the struct.
  74 + *
  75 + */
  76 +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \
  77 + for (pos = (head)->first; \
  78 + (!is_a_nulls(pos)) && \
  79 + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
  80 + pos = pos->next)
  81 +
  82 +/**
  83 + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
  84 + * @tpos: the type * to use as a loop cursor.
  85 + * @pos: the &struct hlist_node to use as a loop cursor.
  86 + * @member: the name of the hlist_node within the struct.
  87 + *
  88 + */
  89 +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \
  90 + for (; (!is_a_nulls(pos)) && \
  91 + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
  92 + pos = pos->next)
  93 +
  94 +#endif
include/linux/rculist_nulls.h
  1 +#ifndef _LINUX_RCULIST_NULLS_H
  2 +#define _LINUX_RCULIST_NULLS_H
  3 +
  4 +#ifdef __KERNEL__
  5 +
  6 +/*
  7 + * RCU-protected list version
  8 + */
  9 +#include <linux/list_nulls.h>
  10 +#include <linux/rcupdate.h>
  11 +
  12 +/**
  13 + * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
  14 + * @n: the element to delete from the hash list.
  15 + *
  16 + * Note: hlist_nulls_unhashed() on the node return true after this. It is
  17 + * useful for RCU based read lockfree traversal if the writer side
  18 + * must know if the list entry is still hashed or already unhashed.
  19 + *
  20 + * In particular, it means that we can not poison the forward pointers
  21 + * that may still be used for walking the hash list and we can only
  22 + * zero the pprev pointer so list_unhashed() will return true after
  23 + * this.
  24 + *
  25 + * The caller must take whatever precautions are necessary (such as
  26 + * holding appropriate locks) to avoid racing with another
  27 + * list-mutation primitive, such as hlist_nulls_add_head_rcu() or
  28 + * hlist_nulls_del_rcu(), running on this same list. However, it is
  29 + * perfectly legal to run concurrently with the _rcu list-traversal
  30 + * primitives, such as hlist_nulls_for_each_entry_rcu().
  31 + */
  32 +static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
  33 +{
  34 + if (!hlist_nulls_unhashed(n)) {
  35 + __hlist_nulls_del(n);
  36 + n->pprev = NULL;
  37 + }
  38 +}
  39 +
  40 +/**
  41 + * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
  42 + * @n: the element to delete from the hash list.
  43 + *
  44 + * Note: hlist_nulls_unhashed() on entry does not return true after this,
  45 + * the entry is in an undefined state. It is useful for RCU based
  46 + * lockfree traversal.
  47 + *
  48 + * In particular, it means that we can not poison the forward
  49 + * pointers that may still be used for walking the hash list.
  50 + *
  51 + * The caller must take whatever precautions are necessary
  52 + * (such as holding appropriate locks) to avoid racing
  53 + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
  54 + * or hlist_nulls_del_rcu(), running on this same list.
  55 + * However, it is perfectly legal to run concurrently with
  56 + * the _rcu list-traversal primitives, such as
  57 + * hlist_nulls_for_each_entry().
  58 + */
  59 +static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
  60 +{
  61 + __hlist_nulls_del(n);
  62 + n->pprev = LIST_POISON2;
  63 +}
  64 +
  65 +/**
  66 + * hlist_nulls_add_head_rcu
  67 + * @n: the element to add to the hash list.
  68 + * @h: the list to add to.
  69 + *
  70 + * Description:
  71 + * Adds the specified element to the specified hlist_nulls,
  72 + * while permitting racing traversals.
  73 + *
  74 + * The caller must take whatever precautions are necessary
  75 + * (such as holding appropriate locks) to avoid racing
  76 + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
  77 + * or hlist_nulls_del_rcu(), running on this same list.
  78 + * However, it is perfectly legal to run concurrently with
  79 + * the _rcu list-traversal primitives, such as
  80 + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
  81 + * problems on Alpha CPUs. Regardless of the type of CPU, the
  82 + * list-traversal primitive must be guarded by rcu_read_lock().
  83 + */
  84 +static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  85 + struct hlist_nulls_head *h)
  86 +{
  87 + struct hlist_nulls_node *first = h->first;
  88 +
  89 + n->next = first;
  90 + n->pprev = &h->first;
  91 + rcu_assign_pointer(h->first, n);
  92 + if (!is_a_nulls(first))
  93 + first->pprev = &n->next;
  94 +}
  95 +/**
  96 + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
  97 + * @tpos: the type * to use as a loop cursor.
  98 + * @pos: the &struct hlist_nulls_node to use as a loop cursor.
  99 + * @head: the head for your list.
  100 + * @member: the name of the hlist_nulls_node within the struct.
  101 + *
  102 + */
  103 +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
  104 + for (pos = rcu_dereference((head)->first); \
  105 + (!is_a_nulls(pos)) && \
  106 + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
  107 + pos = rcu_dereference(pos->next))
  108 +
  109 +#endif
  110 +#endif