Commit ac69269a45e84c1772dcb9e77db976a932f4af22

Authored by Julian Anastasov
Committed by Pablo Neira Ayuso
1 parent ceec4c3816

ipvs: do not disable bh for long time

We used a global BH disable in LOCAL_OUT hook.
Add _bh suffix to all places that need it and remove
the disabling from LOCAL_OUT and sync code.

Functions like ip_defrag need protection from
BH, so add it. As for nf_nat_mangle_tcp_packet, it needs
RCU lock.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>

Showing 12 changed files with 64 additions and 87 deletions Side-by-side Diff

net/netfilter/ipvs/ip_vs_app.c
... ... @@ -352,14 +352,14 @@
352 352 unsigned int flag, __u32 seq, int diff)
353 353 {
354 354 /* spinlock is to keep updating cp->flags atomic */
355   - spin_lock(&cp->lock);
  355 + spin_lock_bh(&cp->lock);
356 356 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
357 357 vseq->previous_delta = vseq->delta;
358 358 vseq->delta += diff;
359 359 vseq->init_seq = seq;
360 360 cp->flags |= flag;
361 361 }
362   - spin_unlock(&cp->lock);
  362 + spin_unlock_bh(&cp->lock);
363 363 }
364 364  
365 365 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
net/netfilter/ipvs/ip_vs_conn.c
... ... @@ -86,14 +86,14 @@
86 86 static struct ip_vs_aligned_lock
87 87 __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
88 88  
89   -static inline void ct_write_lock(unsigned int key)
  89 +static inline void ct_write_lock_bh(unsigned int key)
90 90 {
91   - spin_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
  91 + spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
92 92 }
93 93  
94   -static inline void ct_write_unlock(unsigned int key)
  94 +static inline void ct_write_unlock_bh(unsigned int key)
95 95 {
96   - spin_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
  96 + spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
97 97 }
98 98  
99 99  
... ... @@ -167,7 +167,7 @@
167 167 /* Hash by protocol, client address and port */
168 168 hash = ip_vs_conn_hashkey_conn(cp);
169 169  
170   - ct_write_lock(hash);
  170 + ct_write_lock_bh(hash);
171 171 spin_lock(&cp->lock);
172 172  
173 173 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
... ... @@ -182,7 +182,7 @@
182 182 }
183 183  
184 184 spin_unlock(&cp->lock);
185   - ct_write_unlock(hash);
  185 + ct_write_unlock_bh(hash);
186 186  
187 187 return ret;
188 188 }
... ... @@ -200,7 +200,7 @@
200 200 /* unhash it and decrease its reference counter */
201 201 hash = ip_vs_conn_hashkey_conn(cp);
202 202  
203   - ct_write_lock(hash);
  203 + ct_write_lock_bh(hash);
204 204 spin_lock(&cp->lock);
205 205  
206 206 if (cp->flags & IP_VS_CONN_F_HASHED) {
... ... @@ -212,7 +212,7 @@
212 212 ret = 0;
213 213  
214 214 spin_unlock(&cp->lock);
215   - ct_write_unlock(hash);
  215 + ct_write_unlock_bh(hash);
216 216  
217 217 return ret;
218 218 }
... ... @@ -227,7 +227,7 @@
227 227  
228 228 hash = ip_vs_conn_hashkey_conn(cp);
229 229  
230   - ct_write_lock(hash);
  230 + ct_write_lock_bh(hash);
231 231 spin_lock(&cp->lock);
232 232  
233 233 if (cp->flags & IP_VS_CONN_F_HASHED) {
... ... @@ -242,7 +242,7 @@
242 242 ret = atomic_read(&cp->refcnt) ? false : true;
243 243  
244 244 spin_unlock(&cp->lock);
245   - ct_write_unlock(hash);
  245 + ct_write_unlock_bh(hash);
246 246  
247 247 return ret;
248 248 }
249 249  
... ... @@ -462,13 +462,13 @@
462 462 void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
463 463 {
464 464 if (ip_vs_conn_unhash(cp)) {
465   - spin_lock(&cp->lock);
  465 + spin_lock_bh(&cp->lock);
466 466 if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
467 467 atomic_dec(&ip_vs_conn_no_cport_cnt);
468 468 cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
469 469 cp->cport = cport;
470 470 }
471   - spin_unlock(&cp->lock);
  471 + spin_unlock_bh(&cp->lock);
472 472  
473 473 /* hash on new dport */
474 474 ip_vs_conn_hash(cp);
475 475  
... ... @@ -622,9 +622,9 @@
622 622 if (dest) {
623 623 struct ip_vs_proto_data *pd;
624 624  
625   - spin_lock(&cp->lock);
  625 + spin_lock_bh(&cp->lock);
626 626 if (cp->dest) {
627   - spin_unlock(&cp->lock);
  627 + spin_unlock_bh(&cp->lock);
628 628 rcu_read_unlock();
629 629 return;
630 630 }
... ... @@ -635,7 +635,7 @@
635 635 ip_vs_unbind_app(cp);
636 636  
637 637 ip_vs_bind_dest(cp, dest);
638   - spin_unlock(&cp->lock);
  638 + spin_unlock_bh(&cp->lock);
639 639  
640 640 /* Update its packet transmitter */
641 641 cp->packet_xmit = NULL;
net/netfilter/ipvs/ip_vs_core.c
... ... @@ -638,8 +638,11 @@
638 638  
639 639 static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
640 640 {
641   - int err = ip_defrag(skb, user);
  641 + int err;
642 642  
  643 + local_bh_disable();
  644 + err = ip_defrag(skb, user);
  645 + local_bh_enable();
643 646 if (!err)
644 647 ip_send_check(ip_hdr(skb));
645 648  
... ... @@ -1217,13 +1220,7 @@
1217 1220 const struct net_device *in, const struct net_device *out,
1218 1221 int (*okfn)(struct sk_buff *))
1219 1222 {
1220   - unsigned int verdict;
1221   -
1222   - /* Disable BH in LOCAL_OUT until all places are fixed */
1223   - local_bh_disable();
1224   - verdict = ip_vs_out(hooknum, skb, AF_INET);
1225   - local_bh_enable();
1226   - return verdict;
  1223 + return ip_vs_out(hooknum, skb, AF_INET);
1227 1224 }
1228 1225  
1229 1226 #ifdef CONFIG_IP_VS_IPV6
... ... @@ -1250,13 +1247,7 @@
1250 1247 const struct net_device *in, const struct net_device *out,
1251 1248 int (*okfn)(struct sk_buff *))
1252 1249 {
1253   - unsigned int verdict;
1254   -
1255   - /* Disable BH in LOCAL_OUT until all places are fixed */
1256   - local_bh_disable();
1257   - verdict = ip_vs_out(hooknum, skb, AF_INET6);
1258   - local_bh_enable();
1259   - return verdict;
  1250 + return ip_vs_out(hooknum, skb, AF_INET6);
1260 1251 }
1261 1252  
1262 1253 #endif
... ... @@ -1714,13 +1705,7 @@
1714 1705 const struct net_device *in, const struct net_device *out,
1715 1706 int (*okfn)(struct sk_buff *))
1716 1707 {
1717   - unsigned int verdict;
1718   -
1719   - /* Disable BH in LOCAL_OUT until all places are fixed */
1720   - local_bh_disable();
1721   - verdict = ip_vs_in(hooknum, skb, AF_INET);
1722   - local_bh_enable();
1723   - return verdict;
  1708 + return ip_vs_in(hooknum, skb, AF_INET);
1724 1709 }
1725 1710  
1726 1711 #ifdef CONFIG_IP_VS_IPV6
... ... @@ -1779,13 +1764,7 @@
1779 1764 const struct net_device *in, const struct net_device *out,
1780 1765 int (*okfn)(struct sk_buff *))
1781 1766 {
1782   - unsigned int verdict;
1783   -
1784   - /* Disable BH in LOCAL_OUT until all places are fixed */
1785   - local_bh_disable();
1786   - verdict = ip_vs_in(hooknum, skb, AF_INET6);
1787   - local_bh_enable();
1788   - return verdict;
  1767 + return ip_vs_in(hooknum, skb, AF_INET6);
1789 1768 }
1790 1769  
1791 1770 #endif
net/netfilter/ipvs/ip_vs_ftp.c
... ... @@ -267,10 +267,12 @@
267 267 * hopefully it will succeed on the retransmitted
268 268 * packet.
269 269 */
  270 + rcu_read_lock();
270 271 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
271 272 iph->ihl * 4,
272 273 start-data, end-start,
273 274 buf, buf_len);
  275 + rcu_read_unlock();
274 276 if (ret) {
275 277 ip_vs_nfct_expect_related(skb, ct, n_cp,
276 278 IPPROTO_TCP, 0, 0);
net/netfilter/ipvs/ip_vs_lblc.c
... ... @@ -527,10 +527,10 @@
527 527 }
528 528  
529 529 /* If we fail to create a cache entry, we'll just use the valid dest */
530   - spin_lock(&svc->sched_lock);
  530 + spin_lock_bh(&svc->sched_lock);
531 531 if (!tbl->dead)
532 532 ip_vs_lblc_new(tbl, &iph.daddr, dest);
533   - spin_unlock(&svc->sched_lock);
  533 + spin_unlock_bh(&svc->sched_lock);
534 534  
535 535 out:
536 536 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
net/netfilter/ipvs/ip_vs_lblcr.c
... ... @@ -678,7 +678,7 @@
678 678 if (atomic_read(&en->set.size) > 1 &&
679 679 time_after(jiffies, en->set.lastmod +
680 680 sysctl_lblcr_expiration(svc))) {
681   - spin_lock(&svc->sched_lock);
  681 + spin_lock_bh(&svc->sched_lock);
682 682 if (atomic_read(&en->set.size) > 1) {
683 683 struct ip_vs_dest *m;
684 684  
... ... @@ -686,7 +686,7 @@
686 686 if (m)
687 687 ip_vs_dest_set_erase(&en->set, m);
688 688 }
689   - spin_unlock(&svc->sched_lock);
  689 + spin_unlock_bh(&svc->sched_lock);
690 690 }
691 691  
692 692 /* If the destination is not overloaded, use it */
693 693  
... ... @@ -701,10 +701,10 @@
701 701 }
702 702  
703 703 /* Update our cache entry */
704   - spin_lock(&svc->sched_lock);
  704 + spin_lock_bh(&svc->sched_lock);
705 705 if (!tbl->dead)
706 706 ip_vs_dest_set_insert(&en->set, dest, true);
707   - spin_unlock(&svc->sched_lock);
  707 + spin_unlock_bh(&svc->sched_lock);
708 708 goto out;
709 709 }
710 710  
711 711  
... ... @@ -716,10 +716,10 @@
716 716 }
717 717  
718 718 /* If we fail to create a cache entry, we'll just use the valid dest */
719   - spin_lock(&svc->sched_lock);
  719 + spin_lock_bh(&svc->sched_lock);
720 720 if (!tbl->dead)
721 721 ip_vs_lblcr_new(tbl, &iph.daddr, dest);
722   - spin_unlock(&svc->sched_lock);
  722 + spin_unlock_bh(&svc->sched_lock);
723 723  
724 724 out:
725 725 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
net/netfilter/ipvs/ip_vs_proto_sctp.c
... ... @@ -994,9 +994,9 @@
994 994 sctp_state_transition(struct ip_vs_conn *cp, int direction,
995 995 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
996 996 {
997   - spin_lock(&cp->lock);
  997 + spin_lock_bh(&cp->lock);
998 998 set_sctp_state(pd, cp, direction, skb);
999   - spin_unlock(&cp->lock);
  999 + spin_unlock_bh(&cp->lock);
1000 1000 }
1001 1001  
1002 1002 static inline __u16 sctp_app_hashkey(__be16 port)
net/netfilter/ipvs/ip_vs_proto_tcp.c
... ... @@ -557,9 +557,9 @@
557 557 if (th == NULL)
558 558 return;
559 559  
560   - spin_lock(&cp->lock);
  560 + spin_lock_bh(&cp->lock);
561 561 set_tcp_state(pd, cp, direction, th);
562   - spin_unlock(&cp->lock);
  562 + spin_unlock_bh(&cp->lock);
563 563 }
564 564  
565 565 static inline __u16 tcp_app_hashkey(__be16 port)
566 566  
... ... @@ -655,11 +655,11 @@
655 655 {
656 656 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
657 657  
658   - spin_lock(&cp->lock);
  658 + spin_lock_bh(&cp->lock);
659 659 cp->state = IP_VS_TCP_S_LISTEN;
660 660 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
661 661 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
662   - spin_unlock(&cp->lock);
  662 + spin_unlock_bh(&cp->lock);
663 663 }
664 664  
665 665 /* ---------------------------------------------
net/netfilter/ipvs/ip_vs_rr.c
... ... @@ -63,7 +63,7 @@
63 63  
64 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
65 65  
66   - spin_lock(&svc->sched_lock);
  66 + spin_lock_bh(&svc->sched_lock);
67 67 p = (struct list_head *) svc->sched_data;
68 68 last = dest = list_entry(p, struct ip_vs_dest, n_list);
69 69  
70 70  
... ... @@ -85,13 +85,13 @@
85 85 } while (pass < 2 && p != &svc->destinations);
86 86  
87 87 stop:
88   - spin_unlock(&svc->sched_lock);
  88 + spin_unlock_bh(&svc->sched_lock);
89 89 ip_vs_scheduler_err(svc, "no destination available");
90 90 return NULL;
91 91  
92 92 out:
93 93 svc->sched_data = &dest->n_list;
94   - spin_unlock(&svc->sched_lock);
  94 + spin_unlock_bh(&svc->sched_lock);
95 95 IP_VS_DBG_BUF(6, "RR: server %s:%u "
96 96 "activeconns %d refcnt %d weight %d\n",
97 97 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
net/netfilter/ipvs/ip_vs_sync.c
... ... @@ -531,9 +531,9 @@
531 531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 532 return;
533 533  
534   - spin_lock(&ipvs->sync_buff_lock);
  534 + spin_lock_bh(&ipvs->sync_buff_lock);
535 535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
536   - spin_unlock(&ipvs->sync_buff_lock);
  536 + spin_unlock_bh(&ipvs->sync_buff_lock);
537 537 return;
538 538 }
539 539  
... ... @@ -552,7 +552,7 @@
552 552 if (!buff) {
553 553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 554 if (!buff) {
555   - spin_unlock(&ipvs->sync_buff_lock);
  555 + spin_unlock_bh(&ipvs->sync_buff_lock);
556 556 pr_err("ip_vs_sync_buff_create failed.\n");
557 557 return;
558 558 }
... ... @@ -590,7 +590,7 @@
590 590 sb_queue_tail(ipvs, ms);
591 591 ms->sync_buff = NULL;
592 592 }
593   - spin_unlock(&ipvs->sync_buff_lock);
  593 + spin_unlock_bh(&ipvs->sync_buff_lock);
594 594  
595 595 /* synchronize its controller if it has */
596 596 cp = cp->control;
597 597  
... ... @@ -641,9 +641,9 @@
641 641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
642 642 }
643 643  
644   - spin_lock(&ipvs->sync_buff_lock);
  644 + spin_lock_bh(&ipvs->sync_buff_lock);
645 645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646   - spin_unlock(&ipvs->sync_buff_lock);
  646 + spin_unlock_bh(&ipvs->sync_buff_lock);
647 647 return;
648 648 }
649 649  
... ... @@ -683,7 +683,7 @@
683 683 if (!buff) {
684 684 buff = ip_vs_sync_buff_create(ipvs);
685 685 if (!buff) {
686   - spin_unlock(&ipvs->sync_buff_lock);
  686 + spin_unlock_bh(&ipvs->sync_buff_lock);
687 687 pr_err("ip_vs_sync_buff_create failed.\n");
688 688 return;
689 689 }
... ... @@ -750,7 +750,7 @@
750 750 }
751 751 }
752 752  
753   - spin_unlock(&ipvs->sync_buff_lock);
  753 + spin_unlock_bh(&ipvs->sync_buff_lock);
754 754  
755 755 control:
756 756 /* synchronize its controller if it has */
... ... @@ -843,7 +843,7 @@
843 843 kfree(param->pe_data);
844 844  
845 845 dest = cp->dest;
846   - spin_lock(&cp->lock);
  846 + spin_lock_bh(&cp->lock);
847 847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
848 848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
849 849 if (flags & IP_VS_CONN_F_INACTIVE) {
... ... @@ -857,7 +857,7 @@
857 857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
858 858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
859 859 cp->flags = flags;
860   - spin_unlock(&cp->lock);
  860 + spin_unlock_bh(&cp->lock);
861 861 if (!dest)
862 862 ip_vs_try_bind_dest(cp);
863 863 } else {
864 864  
... ... @@ -1689,11 +1689,7 @@
1689 1689 break;
1690 1690 }
1691 1691  
1692   - /* disable bottom half, because it accesses the data
1693   - shared by softirq while getting/creating conns */
1694   - local_bh_disable();
1695 1692 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1696   - local_bh_enable();
1697 1693 }
1698 1694 }
1699 1695  
net/netfilter/ipvs/ip_vs_wrr.c
... ... @@ -170,7 +170,7 @@
170 170  
171 171 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
172 172  
173   - spin_lock(&svc->sched_lock);
  173 + spin_lock_bh(&svc->sched_lock);
174 174 dest = mark->cl;
175 175 /* No available dests? */
176 176 if (mark->mw == 0)
... ... @@ -222,7 +222,7 @@
222 222 mark->cl = dest;
223 223  
224 224 out:
225   - spin_unlock(&svc->sched_lock);
  225 + spin_unlock_bh(&svc->sched_lock);
226 226 return dest;
227 227  
228 228 err_noavail:
net/netfilter/ipvs/ip_vs_xmit.c
... ... @@ -177,22 +177,22 @@
177 177 rt = (struct rtable *) dest_dst->dst_cache;
178 178 else {
179 179 dest_dst = ip_vs_dest_dst_alloc();
180   - spin_lock(&dest->dst_lock);
  180 + spin_lock_bh(&dest->dst_lock);
181 181 if (!dest_dst) {
182 182 __ip_vs_dst_set(dest, NULL, NULL, 0);
183   - spin_unlock(&dest->dst_lock);
  183 + spin_unlock_bh(&dest->dst_lock);
184 184 goto err_unreach;
185 185 }
186 186 rt = do_output_route4(net, dest->addr.ip, rt_mode,
187 187 &dest_dst->dst_saddr.ip);
188 188 if (!rt) {
189 189 __ip_vs_dst_set(dest, NULL, NULL, 0);
190   - spin_unlock(&dest->dst_lock);
  190 + spin_unlock_bh(&dest->dst_lock);
191 191 ip_vs_dest_dst_free(dest_dst);
192 192 goto err_unreach;
193 193 }
194 194 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
195   - spin_unlock(&dest->dst_lock);
  195 + spin_unlock_bh(&dest->dst_lock);
196 196 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
197 197 &dest->addr.ip, &dest_dst->dst_saddr.ip,
198 198 atomic_read(&rt->dst.__refcnt));
199 199  
... ... @@ -358,10 +358,10 @@
358 358 u32 cookie;
359 359  
360 360 dest_dst = ip_vs_dest_dst_alloc();
361   - spin_lock(&dest->dst_lock);
  361 + spin_lock_bh(&dest->dst_lock);
362 362 if (!dest_dst) {
363 363 __ip_vs_dst_set(dest, NULL, NULL, 0);
364   - spin_unlock(&dest->dst_lock);
  364 + spin_unlock_bh(&dest->dst_lock);
365 365 goto err_unreach;
366 366 }
367 367 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
368 368  
... ... @@ -369,14 +369,14 @@
369 369 do_xfrm);
370 370 if (!dst) {
371 371 __ip_vs_dst_set(dest, NULL, NULL, 0);
372   - spin_unlock(&dest->dst_lock);
  372 + spin_unlock_bh(&dest->dst_lock);
373 373 ip_vs_dest_dst_free(dest_dst);
374 374 goto err_unreach;
375 375 }
376 376 rt = (struct rt6_info *) dst;
377 377 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
378 378 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
379   - spin_unlock(&dest->dst_lock);
  379 + spin_unlock_bh(&dest->dst_lock);
380 380 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
381 381 &dest->addr.in6, &dest_dst->dst_saddr.in6,
382 382 atomic_read(&rt->dst.__refcnt));