Commit 440f0d588555892601cfe511728a0fc0c8204063

Authored by Patrick McHardy
1 parent a31e1ffd22

netfilter: nf_conntrack: use per-conntrack locks for protocol data

Introduce per-conntrack locks and use them instead of the global protocol
locks to avoid contention. Especially tcp_lock shows up very high in
profiles on larger machines.

This will also allow to simplify the upcoming reliable event delivery patches.

Signed-off-by: Patrick McHardy <kaber@trash.net>

Showing 8 changed files with 49 additions and 55 deletions Side-by-side Diff

include/net/netfilter/nf_conntrack.h
... ... @@ -93,6 +93,8 @@
93 93 plus 1 for any connection(s) we are `master' for */
94 94 struct nf_conntrack ct_general;
95 95  
  96 + spinlock_t lock;
  97 +
96 98 /* XXX should I move this to the tail ? - Y.K */
97 99 /* These are my tuples; original and reply */
98 100 struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
include/net/netfilter/nf_conntrack_l4proto.h
... ... @@ -59,11 +59,11 @@
59 59 const struct nf_conntrack_tuple *);
60 60  
61 61 /* Print out the private part of the conntrack. */
62   - int (*print_conntrack)(struct seq_file *s, const struct nf_conn *);
  62 + int (*print_conntrack)(struct seq_file *s, struct nf_conn *);
63 63  
64 64 /* convert protoinfo to nfnetink attributes */
65 65 int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
66   - const struct nf_conn *ct);
  66 + struct nf_conn *ct);
67 67 /* Calculate protoinfo nlattr size */
68 68 int (*nlattr_size)(void);
69 69  
net/netfilter/nf_conntrack_core.c
... ... @@ -519,6 +519,7 @@
519 519 return ERR_PTR(-ENOMEM);
520 520 }
521 521  
  522 + spin_lock_init(&ct->lock);
522 523 atomic_set(&ct->ct_general.use, 1);
523 524 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
524 525 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
net/netfilter/nf_conntrack_netlink.c
... ... @@ -143,7 +143,7 @@
143 143 }
144 144  
145 145 static inline int
146   -ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
  146 +ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
147 147 {
148 148 struct nf_conntrack_l4proto *l4proto;
149 149 struct nlattr *nest_proto;
... ... @@ -347,7 +347,7 @@
347 347  
348 348 static int
349 349 ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
350   - int event, const struct nf_conn *ct)
  350 + int event, struct nf_conn *ct)
351 351 {
352 352 struct nlmsghdr *nlh;
353 353 struct nfgenmsg *nfmsg;
net/netfilter/nf_conntrack_proto_dccp.c
... ... @@ -24,8 +24,6 @@
24 24 #include <net/netfilter/nf_conntrack_l4proto.h>
25 25 #include <net/netfilter/nf_log.h>
26 26  
27   -static DEFINE_RWLOCK(dccp_lock);
28   -
29 27 /* Timeouts are based on values from RFC4340:
30 28 *
31 29 * - REQUEST:
... ... @@ -491,7 +489,7 @@
491 489 return NF_ACCEPT;
492 490 }
493 491  
494   - write_lock_bh(&dccp_lock);
  492 + spin_lock_bh(&ct->lock);
495 493  
496 494 role = ct->proto.dccp.role[dir];
497 495 old_state = ct->proto.dccp.state;
498 496  
... ... @@ -535,13 +533,13 @@
535 533 ct->proto.dccp.last_dir = dir;
536 534 ct->proto.dccp.last_pkt = type;
537 535  
538   - write_unlock_bh(&dccp_lock);
  536 + spin_unlock_bh(&ct->lock);
539 537 if (LOG_INVALID(net, IPPROTO_DCCP))
540 538 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
541 539 "nf_ct_dccp: invalid packet ignored ");
542 540 return NF_ACCEPT;
543 541 case CT_DCCP_INVALID:
544   - write_unlock_bh(&dccp_lock);
  542 + spin_unlock_bh(&ct->lock);
545 543 if (LOG_INVALID(net, IPPROTO_DCCP))
546 544 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
547 545 "nf_ct_dccp: invalid state transition ");
... ... @@ -551,7 +549,7 @@
551 549 ct->proto.dccp.last_dir = dir;
552 550 ct->proto.dccp.last_pkt = type;
553 551 ct->proto.dccp.state = new_state;
554   - write_unlock_bh(&dccp_lock);
  552 + spin_unlock_bh(&ct->lock);
555 553  
556 554 dn = dccp_pernet(net);
557 555 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
558 556  
559 557  
... ... @@ -617,18 +615,18 @@
617 615 ntohs(tuple->dst.u.dccp.port));
618 616 }
619 617  
620   -static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
  618 +static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
621 619 {
622 620 return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
623 621 }
624 622  
625 623 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
626 624 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
627   - const struct nf_conn *ct)
  625 + struct nf_conn *ct)
628 626 {
629 627 struct nlattr *nest_parms;
630 628  
631   - read_lock_bh(&dccp_lock);
  629 + spin_lock_bh(&ct->lock);
632 630 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
633 631 if (!nest_parms)
634 632 goto nla_put_failure;
635 633  
... ... @@ -638,11 +636,11 @@
638 636 NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
639 637 cpu_to_be64(ct->proto.dccp.handshake_seq));
640 638 nla_nest_end(skb, nest_parms);
641   - read_unlock_bh(&dccp_lock);
  639 + spin_unlock_bh(&ct->lock);
642 640 return 0;
643 641  
644 642 nla_put_failure:
645   - read_unlock_bh(&dccp_lock);
  643 + spin_unlock_bh(&ct->lock);
646 644 return -1;
647 645 }
648 646  
... ... @@ -673,7 +671,7 @@
673 671 return -EINVAL;
674 672 }
675 673  
676   - write_lock_bh(&dccp_lock);
  674 + spin_lock_bh(&ct->lock);
677 675 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
678 676 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
679 677 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
... ... @@ -686,7 +684,7 @@
686 684 ct->proto.dccp.handshake_seq =
687 685 be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
688 686 }
689   - write_unlock_bh(&dccp_lock);
  687 + spin_unlock_bh(&ct->lock);
690 688 return 0;
691 689 }
692 690  
net/netfilter/nf_conntrack_proto_gre.c
... ... @@ -219,8 +219,7 @@
219 219 }
220 220  
221 221 /* print private data for conntrack */
222   -static int gre_print_conntrack(struct seq_file *s,
223   - const struct nf_conn *ct)
  222 +static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
224 223 {
225 224 return seq_printf(s, "timeout=%u, stream_timeout=%u ",
226 225 (ct->proto.gre.timeout / HZ),
net/netfilter/nf_conntrack_proto_sctp.c
... ... @@ -25,9 +25,6 @@
25 25 #include <net/netfilter/nf_conntrack_l4proto.h>
26 26 #include <net/netfilter/nf_conntrack_ecache.h>
27 27  
28   -/* Protects ct->proto.sctp */
29   -static DEFINE_RWLOCK(sctp_lock);
30   -
31 28 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
32 29 closely. They're more complex. --RR
33 30  
34 31  
35 32  
... ... @@ -164,13 +161,13 @@
164 161 }
165 162  
166 163 /* Print out the private part of the conntrack. */
167   -static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
  164 +static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
168 165 {
169 166 enum sctp_conntrack state;
170 167  
171   - read_lock_bh(&sctp_lock);
  168 + spin_lock_bh(&ct->lock);
172 169 state = ct->proto.sctp.state;
173   - read_unlock_bh(&sctp_lock);
  170 + spin_unlock_bh(&ct->lock);
174 171  
175 172 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
176 173 }
... ... @@ -318,7 +315,7 @@
318 315 }
319 316  
320 317 old_state = new_state = SCTP_CONNTRACK_NONE;
321   - write_lock_bh(&sctp_lock);
  318 + spin_lock_bh(&ct->lock);
322 319 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
323 320 /* Special cases of Verification tag check (Sec 8.5.1) */
324 321 if (sch->type == SCTP_CID_INIT) {
... ... @@ -371,7 +368,7 @@
371 368 if (old_state != new_state)
372 369 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
373 370 }
374   - write_unlock_bh(&sctp_lock);
  371 + spin_unlock_bh(&ct->lock);
375 372  
376 373 nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
377 374  
... ... @@ -386,7 +383,7 @@
386 383 return NF_ACCEPT;
387 384  
388 385 out_unlock:
389   - write_unlock_bh(&sctp_lock);
  386 + spin_unlock_bh(&ct->lock);
390 387 out:
391 388 return -NF_ACCEPT;
392 389 }
393 390  
... ... @@ -469,11 +466,11 @@
469 466 #include <linux/netfilter/nfnetlink_conntrack.h>
470 467  
471 468 static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
472   - const struct nf_conn *ct)
  469 + struct nf_conn *ct)
473 470 {
474 471 struct nlattr *nest_parms;
475 472  
476   - read_lock_bh(&sctp_lock);
  473 + spin_lock_bh(&ct->lock);
477 474 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
478 475 if (!nest_parms)
479 476 goto nla_put_failure;
480 477  
... ... @@ -488,14 +485,14 @@
488 485 CTA_PROTOINFO_SCTP_VTAG_REPLY,
489 486 ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
490 487  
491   - read_unlock_bh(&sctp_lock);
  488 + spin_unlock_bh(&ct->lock);
492 489  
493 490 nla_nest_end(skb, nest_parms);
494 491  
495 492 return 0;
496 493  
497 494 nla_put_failure:
498   - read_unlock_bh(&sctp_lock);
  495 + spin_unlock_bh(&ct->lock);
499 496 return -1;
500 497 }
501 498  
502 499  
... ... @@ -527,13 +524,13 @@
527 524 !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
528 525 return -EINVAL;
529 526  
530   - write_lock_bh(&sctp_lock);
  527 + spin_lock_bh(&ct->lock);
531 528 ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
532 529 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
533 530 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
534 531 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
535 532 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
536   - write_unlock_bh(&sctp_lock);
  533 + spin_unlock_bh(&ct->lock);
537 534  
538 535 return 0;
539 536 }
net/netfilter/nf_conntrack_proto_tcp.c
... ... @@ -29,9 +29,6 @@
29 29 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30 30 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31 31  
32   -/* Protects ct->proto.tcp */
33   -static DEFINE_RWLOCK(tcp_lock);
34   -
35 32 /* "Be conservative in what you do,
36 33 be liberal in what you accept from others."
37 34 If it's non-zero, we mark only out of window RST segments as INVALID. */
38 35  
39 36  
... ... @@ -309,13 +306,13 @@
309 306 }
310 307  
311 308 /* Print out the private part of the conntrack. */
312   -static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
  309 +static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
313 310 {
314 311 enum tcp_conntrack state;
315 312  
316   - read_lock_bh(&tcp_lock);
  313 + spin_lock_bh(&ct->lock);
317 314 state = ct->proto.tcp.state;
318   - read_unlock_bh(&tcp_lock);
  315 + spin_unlock_bh(&ct->lock);
319 316  
320 317 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
321 318 }
322 319  
... ... @@ -725,14 +722,14 @@
725 722  
726 723 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
727 724  
728   - write_lock_bh(&tcp_lock);
  725 + spin_lock_bh(&ct->lock);
729 726 /*
730 727 * We have to worry for the ack in the reply packet only...
731 728 */
732 729 if (after(end, ct->proto.tcp.seen[dir].td_end))
733 730 ct->proto.tcp.seen[dir].td_end = end;
734 731 ct->proto.tcp.last_end = end;
735   - write_unlock_bh(&tcp_lock);
  732 + spin_unlock_bh(&ct->lock);
736 733 pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
737 734 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
738 735 sender->td_end, sender->td_maxend, sender->td_maxwin,
... ... @@ -841,7 +838,7 @@
841 838 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
842 839 BUG_ON(th == NULL);
843 840  
844   - write_lock_bh(&tcp_lock);
  841 + spin_lock_bh(&ct->lock);
845 842 old_state = ct->proto.tcp.state;
846 843 dir = CTINFO2DIR(ctinfo);
847 844 index = get_conntrack_index(th);
... ... @@ -871,7 +868,7 @@
871 868 && ct->proto.tcp.last_index == TCP_RST_SET)) {
872 869 /* Attempt to reopen a closed/aborted connection.
873 870 * Delete this connection and look up again. */
874   - write_unlock_bh(&tcp_lock);
  871 + spin_unlock_bh(&ct->lock);
875 872  
876 873 /* Only repeat if we can actually remove the timer.
877 874 * Destruction may already be in progress in process
... ... @@ -907,7 +904,7 @@
907 904 * that the client cannot but retransmit its SYN and
908 905 * thus initiate a clean new session.
909 906 */
910   - write_unlock_bh(&tcp_lock);
  907 + spin_unlock_bh(&ct->lock);
911 908 if (LOG_INVALID(net, IPPROTO_TCP))
912 909 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
913 910 "nf_ct_tcp: killing out of sync session ");
... ... @@ -920,7 +917,7 @@
920 917 ct->proto.tcp.last_end =
921 918 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
922 919  
923   - write_unlock_bh(&tcp_lock);
  920 + spin_unlock_bh(&ct->lock);
924 921 if (LOG_INVALID(net, IPPROTO_TCP))
925 922 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
926 923 "nf_ct_tcp: invalid packet ignored ");
... ... @@ -929,7 +926,7 @@
929 926 /* Invalid packet */
930 927 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
931 928 dir, get_conntrack_index(th), old_state);
932   - write_unlock_bh(&tcp_lock);
  929 + spin_unlock_bh(&ct->lock);
933 930 if (LOG_INVALID(net, IPPROTO_TCP))
934 931 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
935 932 "nf_ct_tcp: invalid state ");
... ... @@ -960,7 +957,7 @@
960 957  
961 958 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
962 959 skb, dataoff, th, pf)) {
963   - write_unlock_bh(&tcp_lock);
  960 + spin_unlock_bh(&ct->lock);
964 961 return -NF_ACCEPT;
965 962 }
966 963 in_window:
... ... @@ -989,7 +986,7 @@
989 986 timeout = nf_ct_tcp_timeout_unacknowledged;
990 987 else
991 988 timeout = tcp_timeouts[new_state];
992   - write_unlock_bh(&tcp_lock);
  989 + spin_unlock_bh(&ct->lock);
993 990  
994 991 if (new_state != old_state)
995 992 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
996 993  
... ... @@ -1106,12 +1103,12 @@
1106 1103 #include <linux/netfilter/nfnetlink_conntrack.h>
1107 1104  
1108 1105 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1109   - const struct nf_conn *ct)
  1106 + struct nf_conn *ct)
1110 1107 {
1111 1108 struct nlattr *nest_parms;
1112 1109 struct nf_ct_tcp_flags tmp = {};
1113 1110  
1114   - read_lock_bh(&tcp_lock);
  1111 + spin_lock_bh(&ct->lock);
1115 1112 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1116 1113 if (!nest_parms)
1117 1114 goto nla_put_failure;
1118 1115  
... ... @@ -1131,14 +1128,14 @@
1131 1128 tmp.flags = ct->proto.tcp.seen[1].flags;
1132 1129 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1133 1130 sizeof(struct nf_ct_tcp_flags), &tmp);
1134   - read_unlock_bh(&tcp_lock);
  1131 + spin_unlock_bh(&ct->lock);
1135 1132  
1136 1133 nla_nest_end(skb, nest_parms);
1137 1134  
1138 1135 return 0;
1139 1136  
1140 1137 nla_put_failure:
1141   - read_unlock_bh(&tcp_lock);
  1138 + spin_unlock_bh(&ct->lock);
1142 1139 return -1;
1143 1140 }
1144 1141  
... ... @@ -1169,7 +1166,7 @@
1169 1166 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1170 1167 return -EINVAL;
1171 1168  
1172   - write_lock_bh(&tcp_lock);
  1169 + spin_lock_bh(&ct->lock);
1173 1170 if (tb[CTA_PROTOINFO_TCP_STATE])
1174 1171 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1175 1172  
... ... @@ -1196,7 +1193,7 @@
1196 1193 ct->proto.tcp.seen[1].td_scale =
1197 1194 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1198 1195 }
1199   - write_unlock_bh(&tcp_lock);
  1196 + spin_unlock_bh(&ct->lock);
1200 1197  
1201 1198 return 0;
1202 1199 }