Commit b914c4ea929a4ba6fb97967800dc473c31552b98

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 7a2b03c517

inetpeer: __rcu annotations

Adds __rcu annotations to inetpeer
	(struct inet_peer)->avl_left
	(struct inet_peer)->avl_right

This is a tedious cleanup, but removes one smp_wmb() from link_to_pool()
since we now use more self documenting rcu_assign_pointer().

Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in
all cases we dont need a memory barrier.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 2 changed files with 81 additions and 59 deletions Side-by-side Diff

include/net/inetpeer.h
... ... @@ -15,7 +15,7 @@
15 15  
16 16 struct inet_peer {
17 17 /* group together avl_left,avl_right,v4daddr to speedup lookups */
18   - struct inet_peer *avl_left, *avl_right;
  18 + struct inet_peer __rcu *avl_left, *avl_right;
19 19 __be32 v4daddr; /* peer's address */
20 20 __u32 avl_height;
21 21 struct list_head unused;
... ... @@ -72,18 +72,19 @@
72 72 #define node_height(x) x->avl_height
73 73  
74 74 #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
  75 +#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75 76 static const struct inet_peer peer_fake_node = {
76   - .avl_left = peer_avl_empty,
77   - .avl_right = peer_avl_empty,
  77 + .avl_left = peer_avl_empty_rcu,
  78 + .avl_right = peer_avl_empty_rcu,
78 79 .avl_height = 0
79 80 };
80 81  
81 82 static struct {
82   - struct inet_peer *root;
  83 + struct inet_peer __rcu *root;
83 84 spinlock_t lock;
84 85 int total;
85 86 } peers = {
86   - .root = peer_avl_empty,
  87 + .root = peer_avl_empty_rcu,
87 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 89 .total = 0,
89 90 };
90 91  
... ... @@ -156,11 +157,14 @@
156 157 */
157 158 #define lookup(_daddr, _stack) \
158 159 ({ \
159   - struct inet_peer *u, **v; \
  160 + struct inet_peer *u; \
  161 + struct inet_peer __rcu **v; \
160 162 \
161 163 stackptr = _stack; \
162 164 *stackptr++ = &peers.root; \
163   - for (u = peers.root; u != peer_avl_empty; ) { \
  165 + for (u = rcu_dereference_protected(peers.root, \
  166 + lockdep_is_held(&peers.lock)); \
  167 + u != peer_avl_empty; ) { \
164 168 if (_daddr == u->v4daddr) \
165 169 break; \
166 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
... ... @@ -168,7 +172,8 @@
168 172 else \
169 173 v = &u->avl_right; \
170 174 *stackptr++ = v; \
171   - u = *v; \
  175 + u = rcu_dereference_protected(*v, \
  176 + lockdep_is_held(&peers.lock)); \
172 177 } \
173 178 u; \
174 179 })
175 180  
176 181  
... ... @@ -209,13 +214,17 @@
209 214 /* Called with local BH disabled and the pool lock held. */
210 215 #define lookup_rightempty(start) \
211 216 ({ \
212   - struct inet_peer *u, **v; \
  217 + struct inet_peer *u; \
  218 + struct inet_peer __rcu **v; \
213 219 *stackptr++ = &start->avl_left; \
214 220 v = &start->avl_left; \
215   - for (u = *v; u->avl_right != peer_avl_empty; ) { \
  221 + for (u = rcu_dereference_protected(*v, \
  222 + lockdep_is_held(&peers.lock)); \
  223 + u->avl_right != peer_avl_empty_rcu; ) { \
216 224 v = &u->avl_right; \
217 225 *stackptr++ = v; \
218   - u = *v; \
  226 + u = rcu_dereference_protected(*v, \
  227 + lockdep_is_held(&peers.lock)); \
219 228 } \
220 229 u; \
221 230 })
222 231  
223 232  
224 233  
225 234  
226 235  
227 236  
228 237  
229 238  
230 239  
231 240  
232 241  
233 242  
234 243  
235 244  
236 245  
237 246  
238 247  
239 248  
... ... @@ -224,74 +233,86 @@
224 233 * Variable names are the proof of operation correctness.
225 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 235 */
227   -static void peer_avl_rebalance(struct inet_peer **stack[],
228   - struct inet_peer ***stackend)
  236 +static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
  237 + struct inet_peer __rcu ***stackend)
229 238 {
230   - struct inet_peer **nodep, *node, *l, *r;
  239 + struct inet_peer __rcu **nodep;
  240 + struct inet_peer *node, *l, *r;
231 241 int lh, rh;
232 242  
233 243 while (stackend > stack) {
234 244 nodep = *--stackend;
235   - node = *nodep;
236   - l = node->avl_left;
237   - r = node->avl_right;
  245 + node = rcu_dereference_protected(*nodep,
  246 + lockdep_is_held(&peers.lock));
  247 + l = rcu_dereference_protected(node->avl_left,
  248 + lockdep_is_held(&peers.lock));
  249 + r = rcu_dereference_protected(node->avl_right,
  250 + lockdep_is_held(&peers.lock));
238 251 lh = node_height(l);
239 252 rh = node_height(r);
240 253 if (lh > rh + 1) { /* l: RH+2 */
241 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 255 int lrh;
243   - ll = l->avl_left;
244   - lr = l->avl_right;
  256 + ll = rcu_dereference_protected(l->avl_left,
  257 + lockdep_is_held(&peers.lock));
  258 + lr = rcu_dereference_protected(l->avl_right,
  259 + lockdep_is_held(&peers.lock));
245 260 lrh = node_height(lr);
246 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247   - node->avl_left = lr; /* lr: RH or RH+1 */
248   - node->avl_right = r; /* r: RH */
  262 + RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
  263 + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250   - l->avl_left = ll; /* ll: RH+1 */
251   - l->avl_right = node; /* node: RH+1 or RH+2 */
  265 + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
  266 + RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 267 l->avl_height = node->avl_height + 1;
253   - *nodep = l;
  268 + RCU_INIT_POINTER(*nodep, l);
254 269 } else { /* ll: RH, lr: RH+1 */
255   - lrl = lr->avl_left; /* lrl: RH or RH-1 */
256   - lrr = lr->avl_right; /* lrr: RH or RH-1 */
257   - node->avl_left = lrr; /* lrr: RH or RH-1 */
258   - node->avl_right = r; /* r: RH */
  270 + lrl = rcu_dereference_protected(lr->avl_left,
  271 + lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
  272 + lrr = rcu_dereference_protected(lr->avl_right,
  273 + lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
  274 + RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
  275 + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 276 node->avl_height = rh + 1; /* node: RH+1 */
260   - l->avl_left = ll; /* ll: RH */
261   - l->avl_right = lrl; /* lrl: RH or RH-1 */
  277 + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
  278 + RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 279 l->avl_height = rh + 1; /* l: RH+1 */
263   - lr->avl_left = l; /* l: RH+1 */
264   - lr->avl_right = node; /* node: RH+1 */
  280 + RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
  281 + RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 282 lr->avl_height = rh + 2;
266   - *nodep = lr;
  283 + RCU_INIT_POINTER(*nodep, lr);
267 284 }
268 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 287 int rlh;
271   - rr = r->avl_right;
272   - rl = r->avl_left;
  288 + rr = rcu_dereference_protected(r->avl_right,
  289 + lockdep_is_held(&peers.lock));
  290 + rl = rcu_dereference_protected(r->avl_left,
  291 + lockdep_is_held(&peers.lock));
273 292 rlh = node_height(rl);
274 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275   - node->avl_right = rl; /* rl: LH or LH+1 */
276   - node->avl_left = l; /* l: LH */
  294 + RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
  295 + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278   - r->avl_right = rr; /* rr: LH+1 */
279   - r->avl_left = node; /* node: LH+1 or LH+2 */
  297 + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
  298 + RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 299 r->avl_height = node->avl_height + 1;
281   - *nodep = r;
  300 + RCU_INIT_POINTER(*nodep, r);
282 301 } else { /* rr: RH, rl: RH+1 */
283   - rlr = rl->avl_right; /* rlr: LH or LH-1 */
284   - rll = rl->avl_left; /* rll: LH or LH-1 */
285   - node->avl_right = rll; /* rll: LH or LH-1 */
286   - node->avl_left = l; /* l: LH */
  302 + rlr = rcu_dereference_protected(rl->avl_right,
  303 + lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
  304 + rll = rcu_dereference_protected(rl->avl_left,
  305 + lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
  306 + RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
  307 + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 308 node->avl_height = lh + 1; /* node: LH+1 */
288   - r->avl_right = rr; /* rr: LH */
289   - r->avl_left = rlr; /* rlr: LH or LH-1 */
  309 + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
  310 + RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 311 r->avl_height = lh + 1; /* r: LH+1 */
291   - rl->avl_right = r; /* r: LH+1 */
292   - rl->avl_left = node; /* node: LH+1 */
  312 + RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
  313 + RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 314 rl->avl_height = lh + 2;
294   - *nodep = rl;
  315 + RCU_INIT_POINTER(*nodep, rl);
295 316 }
296 317 } else {
297 318 node->avl_height = (lh > rh ? lh : rh) + 1;
... ... @@ -303,10 +324,10 @@
303 324 #define link_to_pool(n) \
304 325 do { \
305 326 n->avl_height = 1; \
306   - n->avl_left = peer_avl_empty; \
307   - n->avl_right = peer_avl_empty; \
308   - smp_wmb(); /* lockless readers can catch us now */ \
309   - **--stackptr = n; \
  327 + n->avl_left = peer_avl_empty_rcu; \
  328 + n->avl_right = peer_avl_empty_rcu; \
  329 + /* lockless readers can catch us now */ \
  330 + rcu_assign_pointer(**--stackptr, n); \
310 331 peer_avl_rebalance(stack, stackptr); \
311 332 } while (0)
312 333  
313 334  
314 335  
315 336  
... ... @@ -330,24 +351,25 @@
330 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 352 */
332 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333   - struct inet_peer **stack[PEER_MAXDEPTH];
334   - struct inet_peer ***stackptr, ***delp;
  354 + struct inet_peer __rcu **stack[PEER_MAXDEPTH];
  355 + struct inet_peer __rcu ***stackptr, ***delp;
335 356 if (lookup(p->v4daddr, stack) != p)
336 357 BUG();
337 358 delp = stackptr - 1; /* *delp[0] == p */
338   - if (p->avl_left == peer_avl_empty) {
  359 + if (p->avl_left == peer_avl_empty_rcu) {
339 360 *delp[0] = p->avl_right;
340 361 --stackptr;
341 362 } else {
342 363 /* look for a node to insert instead of p */
343 364 struct inet_peer *t;
344 365 t = lookup_rightempty(p);
345   - BUG_ON(*stackptr[-1] != t);
  366 + BUG_ON(rcu_dereference_protected(*stackptr[-1],
  367 + lockdep_is_held(&peers.lock)) != t);
346 368 **--stackptr = t->avl_left;
347 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 370 * x in p->avl_left subtree.
349 371 * Put t in the old place of p. */
350   - *delp[0] = t;
  372 + RCU_INIT_POINTER(*delp[0], t);
351 373 t->avl_left = p->avl_left;
352 374 t->avl_right = p->avl_right;
353 375 t->avl_height = p->avl_height;
... ... @@ -414,7 +436,7 @@
414 436 struct inet_peer *inet_getpeer(__be32 daddr, int create)
415 437 {
416 438 struct inet_peer *p;
417   - struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
  439 + struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440  
419 441 /* Look up for the address quickly, lockless.
420 442 * Because of a concurrent writer, we might not find an existing entry.