Commit bbaffaca4810de1a25e32ecaf836eeaacc7a3d11
Committed by
David S. Miller
1 parent
e8b2dfe9b4
Exists in
master
and in
39 other branches
rcu: Introduce hlist_nulls variant of hlist
hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 204 additions and 0 deletions Side-by-side Diff
include/linux/list_nulls.h
1 | +#ifndef _LINUX_LIST_NULLS_H | |
2 | +#define _LINUX_LIST_NULLS_H | |
3 | + | |
4 | +/* | |
5 | + * Special version of lists, where end of list is not a NULL pointer, | |
6 | + * but a 'nulls' marker, which can have many different values. | |
7 | + * (up to 2^31 different values guaranteed on all platforms) | |
8 | + * | |
9 | + * In the standard hlist, termination of a list is the NULL pointer. | |
10 | + * In this special 'nulls' variant, we use the fact that objects stored in | |
11 | + * a list are aligned on a word (4 or 8 bytes alignment). | |
12 | + * We therefore use the last significant bit of 'ptr' : | |
13 | + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) | |
14 | + * Set to 0 : This is a pointer to some object (ptr) | |
15 | + */ | |
16 | + | |
17 | +struct hlist_nulls_head { | |
18 | + struct hlist_nulls_node *first; | |
19 | +}; | |
20 | + | |
21 | +struct hlist_nulls_node { | |
22 | + struct hlist_nulls_node *next, **pprev; | |
23 | +}; | |
24 | +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ | |
25 | + ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) | |
26 | + | |
27 | +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) | |
28 | +/** | |
29 | + * ptr_is_a_nulls - Test if a ptr is a nulls | |
30 | + * @ptr: ptr to be tested | |
31 | + * | |
32 | + */ | |
33 | +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) | |
34 | +{ | |
35 | + return ((unsigned long)ptr & 1); | |
36 | +} | |
37 | + | |
38 | +/** | |
39 | + * get_nulls_value - Get the 'nulls' value of the end of chain | |
40 | + * @ptr: end of chain | |
41 | + * | |
42 | + * Should be called only if is_a_nulls(ptr); | |
43 | + */ | |
44 | +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) | |
45 | +{ | |
46 | + return ((unsigned long)ptr) >> 1; | |
47 | +} | |
48 | + | |
49 | +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) | |
50 | +{ | |
51 | + return !h->pprev; | |
52 | +} | |
53 | + | |
54 | +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) | |
55 | +{ | |
56 | + return is_a_nulls(h->first); | |
57 | +} | |
58 | + | |
59 | +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) | |
60 | +{ | |
61 | + struct hlist_nulls_node *next = n->next; | |
62 | + struct hlist_nulls_node **pprev = n->pprev; | |
63 | + *pprev = next; | |
64 | + if (!is_a_nulls(next)) | |
65 | + next->pprev = pprev; | |
66 | +} | |
67 | + | |
68 | +/** | |
69 | + * hlist_nulls_for_each_entry - iterate over list of given type | |
70 | + * @tpos: the type * to use as a loop cursor. | |
71 | + * @pos: the &struct hlist_node to use as a loop cursor. | |
72 | + * @head: the head for your list. | |
73 | + * @member: the name of the hlist_node within the struct. | |
74 | + * | |
75 | + */ | |
76 | +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ | |
77 | + for (pos = (head)->first; \ | |
78 | + (!is_a_nulls(pos)) && \ | |
79 | + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ | |
80 | + pos = pos->next) | |
81 | + | |
82 | +/** | |
83 | + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point | |
84 | + * @tpos: the type * to use as a loop cursor. | |
85 | + * @pos: the &struct hlist_node to use as a loop cursor. | |
86 | + * @member: the name of the hlist_node within the struct. | |
87 | + * | |
88 | + */ | |
89 | +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ | |
90 | + for (; (!is_a_nulls(pos)) && \ | |
91 | + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ | |
92 | + pos = pos->next) | |
93 | + | |
94 | +#endif |
include/linux/rculist_nulls.h
1 | +#ifndef _LINUX_RCULIST_NULLS_H | |
2 | +#define _LINUX_RCULIST_NULLS_H | |
3 | + | |
4 | +#ifdef __KERNEL__ | |
5 | + | |
6 | +/* | |
7 | + * RCU-protected list version | |
8 | + */ | |
9 | +#include <linux/list_nulls.h> | |
10 | +#include <linux/rcupdate.h> | |
11 | + | |
12 | +/** | |
13 | + * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization | |
14 | + * @n: the element to delete from the hash list. | |
15 | + * | |
16 | + * Note: hlist_nulls_unhashed() on the node return true after this. It is | |
17 | + * useful for RCU based read lockfree traversal if the writer side | |
18 | + * must know if the list entry is still hashed or already unhashed. | |
19 | + * | |
20 | + * In particular, it means that we can not poison the forward pointers | |
21 | + * that may still be used for walking the hash list and we can only | |
22 | + * zero the pprev pointer so list_unhashed() will return true after | |
23 | + * this. | |
24 | + * | |
25 | + * The caller must take whatever precautions are necessary (such as | |
26 | + * holding appropriate locks) to avoid racing with another | |
27 | + * list-mutation primitive, such as hlist_nulls_add_head_rcu() or | |
28 | + * hlist_nulls_del_rcu(), running on this same list. However, it is | |
29 | + * perfectly legal to run concurrently with the _rcu list-traversal | |
30 | + * primitives, such as hlist_nulls_for_each_entry_rcu(). | |
31 | + */ | |
32 | +static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) | |
33 | +{ | |
34 | + if (!hlist_nulls_unhashed(n)) { | |
35 | + __hlist_nulls_del(n); | |
36 | + n->pprev = NULL; | |
37 | + } | |
38 | +} | |
39 | + | |
40 | +/** | |
41 | + * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization | |
42 | + * @n: the element to delete from the hash list. | |
43 | + * | |
44 | + * Note: hlist_nulls_unhashed() on entry does not return true after this, | |
45 | + * the entry is in an undefined state. It is useful for RCU based | |
46 | + * lockfree traversal. | |
47 | + * | |
48 | + * In particular, it means that we can not poison the forward | |
49 | + * pointers that may still be used for walking the hash list. | |
50 | + * | |
51 | + * The caller must take whatever precautions are necessary | |
52 | + * (such as holding appropriate locks) to avoid racing | |
53 | + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | |
54 | + * or hlist_nulls_del_rcu(), running on this same list. | |
55 | + * However, it is perfectly legal to run concurrently with | |
56 | + * the _rcu list-traversal primitives, such as | |
57 | + * hlist_nulls_for_each_entry(). | |
58 | + */ | |
59 | +static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) | |
60 | +{ | |
61 | + __hlist_nulls_del(n); | |
62 | + n->pprev = LIST_POISON2; | |
63 | +} | |
64 | + | |
65 | +/** | |
66 | + * hlist_nulls_add_head_rcu | |
67 | + * @n: the element to add to the hash list. | |
68 | + * @h: the list to add to. | |
69 | + * | |
70 | + * Description: | |
71 | + * Adds the specified element to the specified hlist_nulls, | |
72 | + * while permitting racing traversals. | |
73 | + * | |
74 | + * The caller must take whatever precautions are necessary | |
75 | + * (such as holding appropriate locks) to avoid racing | |
76 | + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | |
77 | + * or hlist_nulls_del_rcu(), running on this same list. | |
78 | + * However, it is perfectly legal to run concurrently with | |
79 | + * the _rcu list-traversal primitives, such as | |
80 | + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency | |
81 | + * problems on Alpha CPUs. Regardless of the type of CPU, the | |
82 | + * list-traversal primitive must be guarded by rcu_read_lock(). | |
83 | + */ | |
84 | +static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, | |
85 | + struct hlist_nulls_head *h) | |
86 | +{ | |
87 | + struct hlist_nulls_node *first = h->first; | |
88 | + | |
89 | + n->next = first; | |
90 | + n->pprev = &h->first; | |
91 | + rcu_assign_pointer(h->first, n); | |
92 | + if (!is_a_nulls(first)) | |
93 | + first->pprev = &n->next; | |
94 | +} | |
95 | +/** | |
96 | + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type | |
97 | + * @tpos: the type * to use as a loop cursor. | |
98 | + * @pos: the &struct hlist_nulls_node to use as a loop cursor. | |
99 | + * @head: the head for your list. | |
100 | + * @member: the name of the hlist_nulls_node within the struct. | |
101 | + * | |
102 | + */ | |
103 | +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ | |
104 | + for (pos = rcu_dereference((head)->first); \ | |
105 | + (!is_a_nulls(pos)) && \ | |
106 | + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ | |
107 | + pos = rcu_dereference(pos->next)) | |
108 | + | |
109 | +#endif | |
110 | +#endif |