Commit b914c4ea929a4ba6fb97967800dc473c31552b98
Committed by
David S. Miller
1 parent
7a2b03c517
Exists in
master
and in
7 other branches
inetpeer: __rcu annotations
Adds __rcu annotations to inetpeer (struct inet_peer)->avl_left (struct inet_peer)->avl_right This is a tedious cleanup, but removes one smp_wmb() from link_to_pool() since we now use more self documenting rcu_assign_pointer(). Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in all cases we dont need a memory barrier. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 81 additions and 59 deletions Side-by-side Diff
include/net/inetpeer.h
... | ... | @@ -15,7 +15,7 @@ |
15 | 15 | |
16 | 16 | struct inet_peer { |
17 | 17 | /* group together avl_left,avl_right,v4daddr to speedup lookups */ |
18 | - struct inet_peer *avl_left, *avl_right; | |
18 | + struct inet_peer __rcu *avl_left, *avl_right; | |
19 | 19 | __be32 v4daddr; /* peer's address */ |
20 | 20 | __u32 avl_height; |
21 | 21 | struct list_head unused; |
net/ipv4/inetpeer.c
... | ... | @@ -72,18 +72,19 @@ |
72 | 72 | #define node_height(x) x->avl_height |
73 | 73 | |
74 | 74 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) |
75 | +#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) | |
75 | 76 | static const struct inet_peer peer_fake_node = { |
76 | - .avl_left = peer_avl_empty, | |
77 | - .avl_right = peer_avl_empty, | |
77 | + .avl_left = peer_avl_empty_rcu, | |
78 | + .avl_right = peer_avl_empty_rcu, | |
78 | 79 | .avl_height = 0 |
79 | 80 | }; |
80 | 81 | |
81 | 82 | static struct { |
82 | - struct inet_peer *root; | |
83 | + struct inet_peer __rcu *root; | |
83 | 84 | spinlock_t lock; |
84 | 85 | int total; |
85 | 86 | } peers = { |
86 | - .root = peer_avl_empty, | |
87 | + .root = peer_avl_empty_rcu, | |
87 | 88 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), |
88 | 89 | .total = 0, |
89 | 90 | }; |
90 | 91 | |
... | ... | @@ -156,11 +157,14 @@ |
156 | 157 | */ |
157 | 158 | #define lookup(_daddr, _stack) \ |
158 | 159 | ({ \ |
159 | - struct inet_peer *u, **v; \ | |
160 | + struct inet_peer *u; \ | |
161 | + struct inet_peer __rcu **v; \ | |
160 | 162 | \ |
161 | 163 | stackptr = _stack; \ |
162 | 164 | *stackptr++ = &peers.root; \ |
163 | - for (u = peers.root; u != peer_avl_empty; ) { \ | |
165 | + for (u = rcu_dereference_protected(peers.root, \ | |
166 | + lockdep_is_held(&peers.lock)); \ | |
167 | + u != peer_avl_empty; ) { \ | |
164 | 168 | if (_daddr == u->v4daddr) \ |
165 | 169 | break; \ |
166 | 170 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ |
... | ... | @@ -168,7 +172,8 @@ |
168 | 172 | else \ |
169 | 173 | v = &u->avl_right; \ |
170 | 174 | *stackptr++ = v; \ |
171 | - u = *v; \ | |
175 | + u = rcu_dereference_protected(*v, \ | |
176 | + lockdep_is_held(&peers.lock)); \ | |
172 | 177 | } \ |
173 | 178 | u; \ |
174 | 179 | }) |
175 | 180 | |
176 | 181 | |
... | ... | @@ -209,13 +214,17 @@ |
209 | 214 | /* Called with local BH disabled and the pool lock held. */ |
210 | 215 | #define lookup_rightempty(start) \ |
211 | 216 | ({ \ |
212 | - struct inet_peer *u, **v; \ | |
217 | + struct inet_peer *u; \ | |
218 | + struct inet_peer __rcu **v; \ | |
213 | 219 | *stackptr++ = &start->avl_left; \ |
214 | 220 | v = &start->avl_left; \ |
215 | - for (u = *v; u->avl_right != peer_avl_empty; ) { \ | |
221 | + for (u = rcu_dereference_protected(*v, \ | |
222 | + lockdep_is_held(&peers.lock)); \ | |
223 | + u->avl_right != peer_avl_empty_rcu; ) { \ | |
216 | 224 | v = &u->avl_right; \ |
217 | 225 | *stackptr++ = v; \ |
218 | - u = *v; \ | |
226 | + u = rcu_dereference_protected(*v, \ | |
227 | + lockdep_is_held(&peers.lock)); \ | |
219 | 228 | } \ |
220 | 229 | u; \ |
221 | 230 | }) |
222 | 231 | |
223 | 232 | |
224 | 233 | |
225 | 234 | |
226 | 235 | |
227 | 236 | |
228 | 237 | |
229 | 238 | |
230 | 239 | |
231 | 240 | |
232 | 241 | |
233 | 242 | |
234 | 243 | |
235 | 244 | |
236 | 245 | |
237 | 246 | |
238 | 247 | |
239 | 248 | |
... | ... | @@ -224,74 +233,86 @@ |
224 | 233 | * Variable names are the proof of operation correctness. |
225 | 234 | * Look into mm/map_avl.c for more detail description of the ideas. |
226 | 235 | */ |
227 | -static void peer_avl_rebalance(struct inet_peer **stack[], | |
228 | - struct inet_peer ***stackend) | |
236 | +static void peer_avl_rebalance(struct inet_peer __rcu **stack[], | |
237 | + struct inet_peer __rcu ***stackend) | |
229 | 238 | { |
230 | - struct inet_peer **nodep, *node, *l, *r; | |
239 | + struct inet_peer __rcu **nodep; | |
240 | + struct inet_peer *node, *l, *r; | |
231 | 241 | int lh, rh; |
232 | 242 | |
233 | 243 | while (stackend > stack) { |
234 | 244 | nodep = *--stackend; |
235 | - node = *nodep; | |
236 | - l = node->avl_left; | |
237 | - r = node->avl_right; | |
245 | + node = rcu_dereference_protected(*nodep, | |
246 | + lockdep_is_held(&peers.lock)); | |
247 | + l = rcu_dereference_protected(node->avl_left, | |
248 | + lockdep_is_held(&peers.lock)); | |
249 | + r = rcu_dereference_protected(node->avl_right, | |
250 | + lockdep_is_held(&peers.lock)); | |
238 | 251 | lh = node_height(l); |
239 | 252 | rh = node_height(r); |
240 | 253 | if (lh > rh + 1) { /* l: RH+2 */ |
241 | 254 | struct inet_peer *ll, *lr, *lrl, *lrr; |
242 | 255 | int lrh; |
243 | - ll = l->avl_left; | |
244 | - lr = l->avl_right; | |
256 | + ll = rcu_dereference_protected(l->avl_left, | |
257 | + lockdep_is_held(&peers.lock)); | |
258 | + lr = rcu_dereference_protected(l->avl_right, | |
259 | + lockdep_is_held(&peers.lock)); | |
245 | 260 | lrh = node_height(lr); |
246 | 261 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ |
247 | - node->avl_left = lr; /* lr: RH or RH+1 */ | |
248 | - node->avl_right = r; /* r: RH */ | |
262 | + RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ | |
263 | + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | |
249 | 264 | node->avl_height = lrh + 1; /* RH+1 or RH+2 */ |
250 | - l->avl_left = ll; /* ll: RH+1 */ | |
251 | - l->avl_right = node; /* node: RH+1 or RH+2 */ | |
265 | + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ | |
266 | + RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ | |
252 | 267 | l->avl_height = node->avl_height + 1; |
253 | - *nodep = l; | |
268 | + RCU_INIT_POINTER(*nodep, l); | |
254 | 269 | } else { /* ll: RH, lr: RH+1 */ |
255 | - lrl = lr->avl_left; /* lrl: RH or RH-1 */ | |
256 | - lrr = lr->avl_right; /* lrr: RH or RH-1 */ | |
257 | - node->avl_left = lrr; /* lrr: RH or RH-1 */ | |
258 | - node->avl_right = r; /* r: RH */ | |
270 | + lrl = rcu_dereference_protected(lr->avl_left, | |
271 | + lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ | |
272 | + lrr = rcu_dereference_protected(lr->avl_right, | |
273 | + lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ | |
274 | + RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ | |
275 | + RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | |
259 | 276 | node->avl_height = rh + 1; /* node: RH+1 */ |
260 | - l->avl_left = ll; /* ll: RH */ | |
261 | - l->avl_right = lrl; /* lrl: RH or RH-1 */ | |
277 | + RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ | |
278 | + RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ | |
262 | 279 | l->avl_height = rh + 1; /* l: RH+1 */ |
263 | - lr->avl_left = l; /* l: RH+1 */ | |
264 | - lr->avl_right = node; /* node: RH+1 */ | |
280 | + RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ | |
281 | + RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ | |
265 | 282 | lr->avl_height = rh + 2; |
266 | - *nodep = lr; | |
283 | + RCU_INIT_POINTER(*nodep, lr); | |
267 | 284 | } |
268 | 285 | } else if (rh > lh + 1) { /* r: LH+2 */ |
269 | 286 | struct inet_peer *rr, *rl, *rlr, *rll; |
270 | 287 | int rlh; |
271 | - rr = r->avl_right; | |
272 | - rl = r->avl_left; | |
288 | + rr = rcu_dereference_protected(r->avl_right, | |
289 | + lockdep_is_held(&peers.lock)); | |
290 | + rl = rcu_dereference_protected(r->avl_left, | |
291 | + lockdep_is_held(&peers.lock)); | |
273 | 292 | rlh = node_height(rl); |
274 | 293 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ |
275 | - node->avl_right = rl; /* rl: LH or LH+1 */ | |
276 | - node->avl_left = l; /* l: LH */ | |
294 | + RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ | |
295 | + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | |
277 | 296 | node->avl_height = rlh + 1; /* LH+1 or LH+2 */ |
278 | - r->avl_right = rr; /* rr: LH+1 */ | |
279 | - r->avl_left = node; /* node: LH+1 or LH+2 */ | |
297 | + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ | |
298 | + RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ | |
280 | 299 | r->avl_height = node->avl_height + 1; |
281 | - *nodep = r; | |
300 | + RCU_INIT_POINTER(*nodep, r); | |
282 | 301 | } else { /* rr: RH, rl: RH+1 */ |
283 | - rlr = rl->avl_right; /* rlr: LH or LH-1 */ | |
284 | - rll = rl->avl_left; /* rll: LH or LH-1 */ | |
285 | - node->avl_right = rll; /* rll: LH or LH-1 */ | |
286 | - node->avl_left = l; /* l: LH */ | |
302 | + rlr = rcu_dereference_protected(rl->avl_right, | |
303 | + lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ | |
304 | + rll = rcu_dereference_protected(rl->avl_left, | |
305 | + lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ | |
306 | + RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ | |
307 | + RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | |
287 | 308 | node->avl_height = lh + 1; /* node: LH+1 */ |
288 | - r->avl_right = rr; /* rr: LH */ | |
289 | - r->avl_left = rlr; /* rlr: LH or LH-1 */ | |
309 | + RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ | |
310 | + RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ | |
290 | 311 | r->avl_height = lh + 1; /* r: LH+1 */ |
291 | - rl->avl_right = r; /* r: LH+1 */ | |
292 | - rl->avl_left = node; /* node: LH+1 */ | |
312 | + RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ | |
313 | + RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ | |
293 | 314 | rl->avl_height = lh + 2; |
294 | - *nodep = rl; | |
315 | + RCU_INIT_POINTER(*nodep, rl); | |
295 | 316 | } |
296 | 317 | } else { |
297 | 318 | node->avl_height = (lh > rh ? lh : rh) + 1; |
... | ... | @@ -303,10 +324,10 @@ |
303 | 324 | #define link_to_pool(n) \ |
304 | 325 | do { \ |
305 | 326 | n->avl_height = 1; \ |
306 | - n->avl_left = peer_avl_empty; \ | |
307 | - n->avl_right = peer_avl_empty; \ | |
308 | - smp_wmb(); /* lockless readers can catch us now */ \ | |
309 | - **--stackptr = n; \ | |
327 | + n->avl_left = peer_avl_empty_rcu; \ | |
328 | + n->avl_right = peer_avl_empty_rcu; \ | |
329 | + /* lockless readers can catch us now */ \ | |
330 | + rcu_assign_pointer(**--stackptr, n); \ | |
310 | 331 | peer_avl_rebalance(stack, stackptr); \ |
311 | 332 | } while (0) |
312 | 333 | |
313 | 334 | |
314 | 335 | |
315 | 336 | |
... | ... | @@ -330,24 +351,25 @@ |
330 | 351 | * We use refcnt=-1 to alert lockless readers this entry is deleted. |
331 | 352 | */ |
332 | 353 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { |
333 | - struct inet_peer **stack[PEER_MAXDEPTH]; | |
334 | - struct inet_peer ***stackptr, ***delp; | |
354 | + struct inet_peer __rcu **stack[PEER_MAXDEPTH]; | |
355 | + struct inet_peer __rcu ***stackptr, ***delp; | |
335 | 356 | if (lookup(p->v4daddr, stack) != p) |
336 | 357 | BUG(); |
337 | 358 | delp = stackptr - 1; /* *delp[0] == p */ |
338 | - if (p->avl_left == peer_avl_empty) { | |
359 | + if (p->avl_left == peer_avl_empty_rcu) { | |
339 | 360 | *delp[0] = p->avl_right; |
340 | 361 | --stackptr; |
341 | 362 | } else { |
342 | 363 | /* look for a node to insert instead of p */ |
343 | 364 | struct inet_peer *t; |
344 | 365 | t = lookup_rightempty(p); |
345 | - BUG_ON(*stackptr[-1] != t); | |
366 | + BUG_ON(rcu_dereference_protected(*stackptr[-1], | |
367 | + lockdep_is_held(&peers.lock)) != t); | |
346 | 368 | **--stackptr = t->avl_left; |
347 | 369 | /* t is removed, t->v4daddr > x->v4daddr for any |
348 | 370 | * x in p->avl_left subtree. |
349 | 371 | * Put t in the old place of p. */ |
350 | - *delp[0] = t; | |
372 | + RCU_INIT_POINTER(*delp[0], t); | |
351 | 373 | t->avl_left = p->avl_left; |
352 | 374 | t->avl_right = p->avl_right; |
353 | 375 | t->avl_height = p->avl_height; |
... | ... | @@ -414,7 +436,7 @@ |
414 | 436 | struct inet_peer *inet_getpeer(__be32 daddr, int create) |
415 | 437 | { |
416 | 438 | struct inet_peer *p; |
417 | - struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; | |
439 | + struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | |
418 | 440 | |
419 | 441 | /* Look up for the address quickly, lockless. |
420 | 442 | * Because of a concurrent writer, we might not find an existing entry. |