Commit 026ace060dfe29275d2188297a62fa37d6c1a02c
Committed by
Pablo Neira Ayuso
1 parent
4115ded131
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
ipvs: optimize dst usage for real server
Currently when forwarding requests to real servers we use dst_lock and atomic operations when cloning the dst_cache value. As the dst_cache value does not change most of the time it is better to use RCU and to lock dst_lock only when we need to replace the obsoleted dst. For this to work we keep dst_cache in new structure protected by RCU. For packets to remote real servers we will use noref version of dst_cache, it will be valid while we are in RCU read-side critical section because now dst_release for replaced dsts will be invoked after the grace period. Packets to local real servers that are passed to local stack with NF_ACCEPT need a dst clone. Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off by: Hans Schillstrom <hans@schillstrom.com> Signed-off-by: Simon Horman <horms@verge.net.au>
Showing 4 changed files with 177 additions and 59 deletions Side-by-side Diff
include/net/ip_vs.h
... | ... | @@ -724,6 +724,13 @@ |
724 | 724 | struct ip_vs_pe *pe; |
725 | 725 | }; |
726 | 726 | |
727 | +/* Information for cached dst */ | |
728 | +struct ip_vs_dest_dst { | |
729 | + struct dst_entry *dst_cache; /* destination cache entry */ | |
730 | + u32 dst_cookie; | |
731 | + union nf_inet_addr dst_saddr; | |
732 | + struct rcu_head rcu_head; | |
733 | +}; | |
727 | 734 | |
728 | 735 | /* |
729 | 736 | * The real server destination forwarding entry |
... | ... | @@ -752,9 +759,7 @@ |
752 | 759 | |
753 | 760 | /* for destination cache */ |
754 | 761 | spinlock_t dst_lock; /* lock of dst_cache */ |
755 | - struct dst_entry *dst_cache; /* destination cache entry */ | |
756 | - u32 dst_cookie; | |
757 | - union nf_inet_addr dst_saddr; | |
762 | + struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ | |
758 | 763 | |
759 | 764 | /* for virtual service */ |
760 | 765 | struct ip_vs_service *svc; /* service it belongs to */ |
... | ... | @@ -1427,6 +1432,7 @@ |
1427 | 1432 | extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, |
1428 | 1433 | struct ip_vs_protocol *pp, int offset, |
1429 | 1434 | unsigned int hooknum, struct ip_vs_iphdr *iph); |
1435 | +extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head); | |
1430 | 1436 | |
1431 | 1437 | #ifdef CONFIG_IP_VS_IPV6 |
1432 | 1438 | extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, |
net/netfilter/ipvs/ip_vs_core.c
... | ... | @@ -1395,10 +1395,13 @@ |
1395 | 1395 | goto ignore_ipip; |
1396 | 1396 | /* Prefer the resulting PMTU */ |
1397 | 1397 | if (dest) { |
1398 | - spin_lock(&dest->dst_lock); | |
1399 | - if (dest->dst_cache) | |
1400 | - mtu = dst_mtu(dest->dst_cache); | |
1401 | - spin_unlock(&dest->dst_lock); | |
1398 | + struct ip_vs_dest_dst *dest_dst; | |
1399 | + | |
1400 | + rcu_read_lock(); | |
1401 | + dest_dst = rcu_dereference(dest->dest_dst); | |
1402 | + if (dest_dst) | |
1403 | + mtu = dst_mtu(dest_dst->dst_cache); | |
1404 | + rcu_read_unlock(); | |
1402 | 1405 | } |
1403 | 1406 | if (mtu > 68 + sizeof(struct iphdr)) |
1404 | 1407 | mtu -= sizeof(struct iphdr); |
net/netfilter/ipvs/ip_vs_ctl.c
... | ... | @@ -641,15 +641,26 @@ |
641 | 641 | return dest; |
642 | 642 | } |
643 | 643 | |
644 | -/* Release dst_cache for dest in user context */ | |
644 | +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) | |
645 | +{ | |
646 | + struct ip_vs_dest_dst *dest_dst = container_of(head, | |
647 | + struct ip_vs_dest_dst, | |
648 | + rcu_head); | |
649 | + | |
650 | + dst_release(dest_dst->dst_cache); | |
651 | + kfree(dest_dst); | |
652 | +} | |
653 | + | |
654 | +/* Release dest_dst and dst_cache for dest in user context */ | |
645 | 655 | static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) |
646 | 656 | { |
647 | - struct dst_entry *old_dst; | |
657 | + struct ip_vs_dest_dst *old; | |
648 | 658 | |
649 | - old_dst = dest->dst_cache; | |
650 | - dest->dst_cache = NULL; | |
651 | - dst_release(old_dst); | |
652 | - dest->dst_saddr.ip = 0; | |
659 | + old = rcu_dereference_protected(dest->dest_dst, 1); | |
660 | + if (old) { | |
661 | + RCU_INIT_POINTER(dest->dest_dst, NULL); | |
662 | + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); | |
663 | + } | |
653 | 664 | } |
654 | 665 | |
655 | 666 | /* |
... | ... | @@ -1513,7 +1524,7 @@ |
1513 | 1524 | ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) |
1514 | 1525 | { |
1515 | 1526 | spin_lock_bh(&dest->dst_lock); |
1516 | - if (dest->dst_cache && dest->dst_cache->dev == dev) { | |
1527 | + if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) { | |
1517 | 1528 | IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", |
1518 | 1529 | dev->name, |
1519 | 1530 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
net/netfilter/ipvs/ip_vs_xmit.c
... | ... | @@ -17,6 +17,8 @@ |
17 | 17 | * - not all connections have destination server, for example, |
18 | 18 | * connections in backup server when fwmark is used |
19 | 19 | * - bypass connections use daddr from packet |
20 | + * - we can use dst without ref while sending in RCU section, we use | |
21 | + * ref when returning NF_ACCEPT for NAT-ed packet via loopback | |
20 | 22 | * LOCAL_OUT rules: |
21 | 23 | * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) |
22 | 24 | * - skb->pkt_type is not set yet |
23 | 25 | |
24 | 26 | |
25 | 27 | |
26 | 28 | |
27 | 29 | |
28 | 30 | |
29 | 31 | |
30 | 32 | |
... | ... | @@ -54,34 +56,51 @@ |
54 | 56 | IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */ |
55 | 57 | }; |
56 | 58 | |
59 | +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) | |
60 | +{ | |
61 | + return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); | |
62 | +} | |
63 | + | |
64 | +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) | |
65 | +{ | |
66 | + kfree(dest_dst); | |
67 | +} | |
68 | + | |
57 | 69 | /* |
58 | 70 | * Destination cache to speed up outgoing route lookup |
59 | 71 | */ |
60 | 72 | static inline void |
61 | -__ip_vs_dst_set(struct ip_vs_dest *dest, struct dst_entry *dst, u32 dst_cookie) | |
73 | +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, | |
74 | + struct dst_entry *dst, u32 dst_cookie) | |
62 | 75 | { |
63 | - struct dst_entry *old_dst; | |
76 | + struct ip_vs_dest_dst *old; | |
64 | 77 | |
65 | - old_dst = dest->dst_cache; | |
66 | - dest->dst_cache = dst; | |
67 | - dest->dst_cookie = dst_cookie; | |
68 | - dst_release(old_dst); | |
78 | + old = rcu_dereference_protected(dest->dest_dst, | |
79 | + lockdep_is_held(&dest->dst_lock)); | |
80 | + | |
81 | + if (dest_dst) { | |
82 | + dest_dst->dst_cache = dst; | |
83 | + dest_dst->dst_cookie = dst_cookie; | |
84 | + } | |
85 | + rcu_assign_pointer(dest->dest_dst, dest_dst); | |
86 | + | |
87 | + if (old) | |
88 | + call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); | |
69 | 89 | } |
70 | 90 | |
71 | -static inline struct dst_entry * | |
91 | +static inline struct ip_vs_dest_dst * | |
72 | 92 | __ip_vs_dst_check(struct ip_vs_dest *dest) |
73 | 93 | { |
74 | - struct dst_entry *dst = dest->dst_cache; | |
94 | + struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); | |
95 | + struct dst_entry *dst; | |
75 | 96 | |
76 | - if (!dst) | |
97 | + if (!dest_dst) | |
77 | 98 | return NULL; |
78 | - if (dst->obsolete && dst->ops->check(dst, dest->dst_cookie) == NULL) { | |
79 | - dest->dst_cache = NULL; | |
80 | - dst_release(dst); | |
99 | + dst = dest_dst->dst_cache; | |
100 | + if (dst->obsolete && | |
101 | + dst->ops->check(dst, dest_dst->dst_cookie) == NULL) | |
81 | 102 | return NULL; |
82 | - } | |
83 | - dst_hold(dst); | |
84 | - return dst; | |
103 | + return dest_dst; | |
85 | 104 | } |
86 | 105 | |
87 | 106 | static inline bool |
88 | 107 | |
89 | 108 | |
90 | 109 | |
91 | 110 | |
92 | 111 | |
93 | 112 | |
94 | 113 | |
95 | 114 | |
96 | 115 | |
... | ... | @@ -144,35 +163,48 @@ |
144 | 163 | { |
145 | 164 | struct net *net = dev_net(skb_dst(skb)->dev); |
146 | 165 | struct netns_ipvs *ipvs = net_ipvs(net); |
166 | + struct ip_vs_dest_dst *dest_dst; | |
147 | 167 | struct rtable *rt; /* Route to the other host */ |
148 | 168 | struct rtable *ort; /* Original route */ |
149 | 169 | struct iphdr *iph; |
150 | 170 | __be16 df; |
151 | 171 | int mtu; |
152 | - int local; | |
172 | + int local, noref = 1; | |
153 | 173 | |
154 | 174 | if (dest) { |
155 | - spin_lock(&dest->dst_lock); | |
156 | - rt = (struct rtable *) __ip_vs_dst_check(dest); | |
157 | - if (!rt) { | |
175 | + dest_dst = __ip_vs_dst_check(dest); | |
176 | + if (likely(dest_dst)) | |
177 | + rt = (struct rtable *) dest_dst->dst_cache; | |
178 | + else { | |
179 | + dest_dst = ip_vs_dest_dst_alloc(); | |
180 | + spin_lock(&dest->dst_lock); | |
181 | + if (!dest_dst) { | |
182 | + __ip_vs_dst_set(dest, NULL, NULL, 0); | |
183 | + spin_unlock(&dest->dst_lock); | |
184 | + goto err_unreach; | |
185 | + } | |
158 | 186 | rt = do_output_route4(net, dest->addr.ip, rt_mode, |
159 | - &dest->dst_saddr.ip); | |
187 | + &dest_dst->dst_saddr.ip); | |
160 | 188 | if (!rt) { |
189 | + __ip_vs_dst_set(dest, NULL, NULL, 0); | |
161 | 190 | spin_unlock(&dest->dst_lock); |
191 | + ip_vs_dest_dst_free(dest_dst); | |
162 | 192 | goto err_unreach; |
163 | 193 | } |
164 | - __ip_vs_dst_set(dest, dst_clone(&rt->dst), 0); | |
194 | + __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); | |
195 | + spin_unlock(&dest->dst_lock); | |
165 | 196 | IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", |
166 | - &dest->addr.ip, &dest->dst_saddr.ip, | |
197 | + &dest->addr.ip, &dest_dst->dst_saddr.ip, | |
167 | 198 | atomic_read(&rt->dst.__refcnt)); |
168 | 199 | } |
169 | 200 | daddr = dest->addr.ip; |
170 | 201 | if (ret_saddr) |
171 | - *ret_saddr = dest->dst_saddr.ip; | |
172 | - spin_unlock(&dest->dst_lock); | |
202 | + *ret_saddr = dest_dst->dst_saddr.ip; | |
173 | 203 | } else { |
174 | 204 | __be32 saddr = htonl(INADDR_ANY); |
175 | 205 | |
206 | + noref = 0; | |
207 | + | |
176 | 208 | /* For such unconfigured boxes avoid many route lookups |
177 | 209 | * for performance reasons because we do not remember saddr |
178 | 210 | */ |
... | ... | @@ -210,7 +242,8 @@ |
210 | 242 | goto err_put; |
211 | 243 | } |
212 | 244 | /* skb to local stack, preserve old route */ |
213 | - ip_rt_put(rt); | |
245 | + if (!noref) | |
246 | + ip_rt_put(rt); | |
214 | 247 | return local; |
215 | 248 | } |
216 | 249 | |
217 | 250 | |
... | ... | @@ -240,12 +273,19 @@ |
240 | 273 | } |
241 | 274 | |
242 | 275 | skb_dst_drop(skb); |
243 | - skb_dst_set(skb, &rt->dst); | |
276 | + if (noref) { | |
277 | + if (!local) | |
278 | + skb_dst_set_noref_force(skb, &rt->dst); | |
279 | + else | |
280 | + skb_dst_set(skb, dst_clone(&rt->dst)); | |
281 | + } else | |
282 | + skb_dst_set(skb, &rt->dst); | |
244 | 283 | |
245 | 284 | return local; |
246 | 285 | |
247 | 286 | err_put: |
248 | - ip_rt_put(rt); | |
287 | + if (!noref) | |
288 | + ip_rt_put(rt); | |
249 | 289 | return -1; |
250 | 290 | |
251 | 291 | err_unreach: |
252 | 292 | |
253 | 293 | |
254 | 294 | |
255 | 295 | |
256 | 296 | |
257 | 297 | |
258 | 298 | |
259 | 299 | |
260 | 300 | |
261 | 301 | |
... | ... | @@ -303,36 +343,48 @@ |
303 | 343 | struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) |
304 | 344 | { |
305 | 345 | struct net *net = dev_net(skb_dst(skb)->dev); |
346 | + struct ip_vs_dest_dst *dest_dst; | |
306 | 347 | struct rt6_info *rt; /* Route to the other host */ |
307 | 348 | struct rt6_info *ort; /* Original route */ |
308 | 349 | struct dst_entry *dst; |
309 | 350 | int mtu; |
310 | - int local; | |
351 | + int local, noref = 1; | |
311 | 352 | |
312 | 353 | if (dest) { |
313 | - spin_lock(&dest->dst_lock); | |
314 | - rt = (struct rt6_info *)__ip_vs_dst_check(dest); | |
315 | - if (!rt) { | |
354 | + dest_dst = __ip_vs_dst_check(dest); | |
355 | + if (likely(dest_dst)) | |
356 | + rt = (struct rt6_info *) dest_dst->dst_cache; | |
357 | + else { | |
316 | 358 | u32 cookie; |
317 | 359 | |
360 | + dest_dst = ip_vs_dest_dst_alloc(); | |
361 | + spin_lock(&dest->dst_lock); | |
362 | + if (!dest_dst) { | |
363 | + __ip_vs_dst_set(dest, NULL, NULL, 0); | |
364 | + spin_unlock(&dest->dst_lock); | |
365 | + goto err_unreach; | |
366 | + } | |
318 | 367 | dst = __ip_vs_route_output_v6(net, &dest->addr.in6, |
319 | - &dest->dst_saddr.in6, | |
368 | + &dest_dst->dst_saddr.in6, | |
320 | 369 | do_xfrm); |
321 | 370 | if (!dst) { |
371 | + __ip_vs_dst_set(dest, NULL, NULL, 0); | |
322 | 372 | spin_unlock(&dest->dst_lock); |
373 | + ip_vs_dest_dst_free(dest_dst); | |
323 | 374 | goto err_unreach; |
324 | 375 | } |
325 | 376 | rt = (struct rt6_info *) dst; |
326 | 377 | cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; |
327 | - __ip_vs_dst_set(dest, dst_clone(&rt->dst), cookie); | |
378 | + __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); | |
379 | + spin_unlock(&dest->dst_lock); | |
328 | 380 | IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", |
329 | - &dest->addr.in6, &dest->dst_saddr.in6, | |
381 | + &dest->addr.in6, &dest_dst->dst_saddr.in6, | |
330 | 382 | atomic_read(&rt->dst.__refcnt)); |
331 | 383 | } |
332 | 384 | if (ret_saddr) |
333 | - *ret_saddr = dest->dst_saddr.in6; | |
334 | - spin_unlock(&dest->dst_lock); | |
385 | + *ret_saddr = dest_dst->dst_saddr.in6; | |
335 | 386 | } else { |
387 | + noref = 0; | |
336 | 388 | dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); |
337 | 389 | if (!dst) |
338 | 390 | goto err_unreach; |
... | ... | @@ -367,7 +419,8 @@ |
367 | 419 | goto err_put; |
368 | 420 | } |
369 | 421 | /* skb to local stack, preserve old route */ |
370 | - dst_release(&rt->dst); | |
422 | + if (!noref) | |
423 | + dst_release(&rt->dst); | |
371 | 424 | return local; |
372 | 425 | } |
373 | 426 | |
374 | 427 | |
... | ... | @@ -399,12 +452,19 @@ |
399 | 452 | } |
400 | 453 | |
401 | 454 | skb_dst_drop(skb); |
402 | - skb_dst_set(skb, &rt->dst); | |
455 | + if (noref) { | |
456 | + if (!local) | |
457 | + skb_dst_set_noref_force(skb, &rt->dst); | |
458 | + else | |
459 | + skb_dst_set(skb, dst_clone(&rt->dst)); | |
460 | + } else | |
461 | + skb_dst_set(skb, &rt->dst); | |
403 | 462 | |
404 | 463 | return local; |
405 | 464 | |
406 | 465 | err_put: |
407 | - dst_release(&rt->dst); | |
466 | + if (!noref) | |
467 | + dst_release(&rt->dst); | |
408 | 468 | return -1; |
409 | 469 | |
410 | 470 | err_unreach: |
... | ... | @@ -494,6 +554,7 @@ |
494 | 554 | |
495 | 555 | EnterFunction(10); |
496 | 556 | |
557 | + rcu_read_lock(); | |
497 | 558 | if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, |
498 | 559 | NULL) < 0) |
499 | 560 | goto tx_error; |
500 | 561 | |
... | ... | @@ -504,12 +565,14 @@ |
504 | 565 | skb->local_df = 1; |
505 | 566 | |
506 | 567 | ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); |
568 | + rcu_read_unlock(); | |
507 | 569 | |
508 | 570 | LeaveFunction(10); |
509 | 571 | return NF_STOLEN; |
510 | 572 | |
511 | 573 | tx_error: |
512 | 574 | kfree_skb(skb); |
575 | + rcu_read_unlock(); | |
513 | 576 | LeaveFunction(10); |
514 | 577 | return NF_STOLEN; |
515 | 578 | } |
... | ... | @@ -521,6 +584,7 @@ |
521 | 584 | { |
522 | 585 | EnterFunction(10); |
523 | 586 | |
587 | + rcu_read_lock(); | |
524 | 588 | if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, |
525 | 589 | ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) |
526 | 590 | goto tx_error; |
527 | 591 | |
... | ... | @@ -529,12 +593,14 @@ |
529 | 593 | skb->local_df = 1; |
530 | 594 | |
531 | 595 | ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); |
596 | + rcu_read_unlock(); | |
532 | 597 | |
533 | 598 | LeaveFunction(10); |
534 | 599 | return NF_STOLEN; |
535 | 600 | |
536 | 601 | tx_error: |
537 | 602 | kfree_skb(skb); |
603 | + rcu_read_unlock(); | |
538 | 604 | LeaveFunction(10); |
539 | 605 | return NF_STOLEN; |
540 | 606 | } |
... | ... | @@ -553,6 +619,7 @@ |
553 | 619 | |
554 | 620 | EnterFunction(10); |
555 | 621 | |
622 | + rcu_read_lock(); | |
556 | 623 | /* check if it is a connection of no-client-port */ |
557 | 624 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { |
558 | 625 | __be16 _pt, *p; |
559 | 626 | |
... | ... | @@ -620,12 +687,14 @@ |
620 | 687 | skb->local_df = 1; |
621 | 688 | |
622 | 689 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); |
690 | + rcu_read_unlock(); | |
623 | 691 | |
624 | 692 | LeaveFunction(10); |
625 | 693 | return rc; |
626 | 694 | |
627 | 695 | tx_error: |
628 | 696 | kfree_skb(skb); |
697 | + rcu_read_unlock(); | |
629 | 698 | LeaveFunction(10); |
630 | 699 | return NF_STOLEN; |
631 | 700 | } |
... | ... | @@ -640,6 +709,7 @@ |
640 | 709 | |
641 | 710 | EnterFunction(10); |
642 | 711 | |
712 | + rcu_read_lock(); | |
643 | 713 | /* check if it is a connection of no-client-port */ |
644 | 714 | if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { |
645 | 715 | __be16 _pt, *p; |
... | ... | @@ -707,6 +777,7 @@ |
707 | 777 | skb->local_df = 1; |
708 | 778 | |
709 | 779 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); |
780 | + rcu_read_unlock(); | |
710 | 781 | |
711 | 782 | LeaveFunction(10); |
712 | 783 | return rc; |
... | ... | @@ -714,6 +785,7 @@ |
714 | 785 | tx_error: |
715 | 786 | LeaveFunction(10); |
716 | 787 | kfree_skb(skb); |
788 | + rcu_read_unlock(); | |
717 | 789 | return NF_STOLEN; |
718 | 790 | } |
719 | 791 | #endif |
... | ... | @@ -755,6 +827,7 @@ |
755 | 827 | |
756 | 828 | EnterFunction(10); |
757 | 829 | |
830 | + rcu_read_lock(); | |
758 | 831 | local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
759 | 832 | IP_VS_RT_MODE_LOCAL | |
760 | 833 | IP_VS_RT_MODE_NON_LOCAL | |
761 | 834 | |
... | ... | @@ -762,8 +835,10 @@ |
762 | 835 | IP_VS_RT_MODE_TUNNEL, &saddr); |
763 | 836 | if (local < 0) |
764 | 837 | goto tx_error; |
765 | - if (local) | |
838 | + if (local) { | |
839 | + rcu_read_unlock(); | |
766 | 840 | return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); |
841 | + } | |
767 | 842 | |
768 | 843 | rt = skb_rtable(skb); |
769 | 844 | tdev = rt->dst.dev; |
... | ... | @@ -818,6 +893,7 @@ |
818 | 893 | ip_local_out(skb); |
819 | 894 | else if (ret == NF_DROP) |
820 | 895 | kfree_skb(skb); |
896 | + rcu_read_unlock(); | |
821 | 897 | |
822 | 898 | LeaveFunction(10); |
823 | 899 | |
... | ... | @@ -825,6 +901,7 @@ |
825 | 901 | |
826 | 902 | tx_error: |
827 | 903 | kfree_skb(skb); |
904 | + rcu_read_unlock(); | |
828 | 905 | LeaveFunction(10); |
829 | 906 | return NF_STOLEN; |
830 | 907 | } |
... | ... | @@ -844,6 +921,7 @@ |
844 | 921 | |
845 | 922 | EnterFunction(10); |
846 | 923 | |
924 | + rcu_read_lock(); | |
847 | 925 | local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, |
848 | 926 | &saddr, ipvsh, 1, |
849 | 927 | IP_VS_RT_MODE_LOCAL | |
850 | 928 | |
... | ... | @@ -851,8 +929,10 @@ |
851 | 929 | IP_VS_RT_MODE_TUNNEL); |
852 | 930 | if (local < 0) |
853 | 931 | goto tx_error; |
854 | - if (local) | |
932 | + if (local) { | |
933 | + rcu_read_unlock(); | |
855 | 934 | return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); |
935 | + } | |
856 | 936 | |
857 | 937 | rt = (struct rt6_info *) skb_dst(skb); |
858 | 938 | tdev = rt->dst.dev; |
... | ... | @@ -901,6 +981,7 @@ |
901 | 981 | ip6_local_out(skb); |
902 | 982 | else if (ret == NF_DROP) |
903 | 983 | kfree_skb(skb); |
984 | + rcu_read_unlock(); | |
904 | 985 | |
905 | 986 | LeaveFunction(10); |
906 | 987 | |
... | ... | @@ -908,6 +989,7 @@ |
908 | 989 | |
909 | 990 | tx_error: |
910 | 991 | kfree_skb(skb); |
992 | + rcu_read_unlock(); | |
911 | 993 | LeaveFunction(10); |
912 | 994 | return NF_STOLEN; |
913 | 995 | } |
914 | 996 | |
915 | 997 | |
... | ... | @@ -926,14 +1008,17 @@ |
926 | 1008 | |
927 | 1009 | EnterFunction(10); |
928 | 1010 | |
1011 | + rcu_read_lock(); | |
929 | 1012 | local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, |
930 | 1013 | IP_VS_RT_MODE_LOCAL | |
931 | 1014 | IP_VS_RT_MODE_NON_LOCAL | |
932 | 1015 | IP_VS_RT_MODE_KNOWN_NH, NULL); |
933 | 1016 | if (local < 0) |
934 | 1017 | goto tx_error; |
935 | - if (local) | |
1018 | + if (local) { | |
1019 | + rcu_read_unlock(); | |
936 | 1020 | return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); |
1021 | + } | |
937 | 1022 | |
938 | 1023 | ip_send_check(ip_hdr(skb)); |
939 | 1024 | |
940 | 1025 | |
... | ... | @@ -941,12 +1026,14 @@ |
941 | 1026 | skb->local_df = 1; |
942 | 1027 | |
943 | 1028 | ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); |
1029 | + rcu_read_unlock(); | |
944 | 1030 | |
945 | 1031 | LeaveFunction(10); |
946 | 1032 | return NF_STOLEN; |
947 | 1033 | |
948 | 1034 | tx_error: |
949 | 1035 | kfree_skb(skb); |
1036 | + rcu_read_unlock(); | |
950 | 1037 | LeaveFunction(10); |
951 | 1038 | return NF_STOLEN; |
952 | 1039 | } |
953 | 1040 | |
954 | 1041 | |
955 | 1042 | |
956 | 1043 | |
... | ... | @@ -960,25 +1047,30 @@ |
960 | 1047 | |
961 | 1048 | EnterFunction(10); |
962 | 1049 | |
1050 | + rcu_read_lock(); | |
963 | 1051 | local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
964 | 1052 | ipvsh, 0, |
965 | 1053 | IP_VS_RT_MODE_LOCAL | |
966 | 1054 | IP_VS_RT_MODE_NON_LOCAL); |
967 | 1055 | if (local < 0) |
968 | 1056 | goto tx_error; |
969 | - if (local) | |
1057 | + if (local) { | |
1058 | + rcu_read_unlock(); | |
970 | 1059 | return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); |
1060 | + } | |
971 | 1061 | |
972 | 1062 | /* Another hack: avoid icmp_send in ip_fragment */ |
973 | 1063 | skb->local_df = 1; |
974 | 1064 | |
975 | 1065 | ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); |
1066 | + rcu_read_unlock(); | |
976 | 1067 | |
977 | 1068 | LeaveFunction(10); |
978 | 1069 | return NF_STOLEN; |
979 | 1070 | |
980 | 1071 | tx_error: |
981 | 1072 | kfree_skb(skb); |
1073 | + rcu_read_unlock(); | |
982 | 1074 | LeaveFunction(10); |
983 | 1075 | return NF_STOLEN; |
984 | 1076 | } |
... | ... | @@ -1023,6 +1115,7 @@ |
1023 | 1115 | rt_mode = (hooknum != NF_INET_FORWARD) ? |
1024 | 1116 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | |
1025 | 1117 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; |
1118 | + rcu_read_lock(); | |
1026 | 1119 | local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); |
1027 | 1120 | if (local < 0) |
1028 | 1121 | goto tx_error; |
1029 | 1122 | |
... | ... | @@ -1067,10 +1160,12 @@ |
1067 | 1160 | skb->local_df = 1; |
1068 | 1161 | |
1069 | 1162 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); |
1163 | + rcu_read_unlock(); | |
1070 | 1164 | goto out; |
1071 | 1165 | |
1072 | 1166 | tx_error: |
1073 | - dev_kfree_skb(skb); | |
1167 | + kfree_skb(skb); | |
1168 | + rcu_read_unlock(); | |
1074 | 1169 | rc = NF_STOLEN; |
1075 | 1170 | out: |
1076 | 1171 | LeaveFunction(10); |
... | ... | @@ -1111,6 +1206,7 @@ |
1111 | 1206 | rt_mode = (hooknum != NF_INET_FORWARD) ? |
1112 | 1207 | IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | |
1113 | 1208 | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; |
1209 | + rcu_read_lock(); | |
1114 | 1210 | local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, |
1115 | 1211 | ipvsh, 0, rt_mode); |
1116 | 1212 | if (local < 0) |
1117 | 1213 | |
... | ... | @@ -1156,10 +1252,12 @@ |
1156 | 1252 | skb->local_df = 1; |
1157 | 1253 | |
1158 | 1254 | rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); |
1255 | + rcu_read_unlock(); | |
1159 | 1256 | goto out; |
1160 | 1257 | |
1161 | 1258 | tx_error: |
1162 | - dev_kfree_skb(skb); | |
1259 | + kfree_skb(skb); | |
1260 | + rcu_read_unlock(); | |
1163 | 1261 | rc = NF_STOLEN; |
1164 | 1262 | out: |
1165 | 1263 | LeaveFunction(10); |