Commit c779e849608a875448f6ffc2a5c2a15523bdcd00
Committed by
Pablo Neira Ayuso
1 parent
97e08caec3
netfilter: conntrack: remove get_timeout() indirection
Not needed, we can have the l4trackers fetch it themselvs. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Showing 12 changed files with 94 additions and 104 deletions Inline Diff
- include/net/netfilter/nf_conntrack_l4proto.h
- include/net/netfilter/nf_conntrack_timeout.h
- net/ipv4/netfilter/nf_conntrack_proto_icmp.c
- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
- net/netfilter/nf_conntrack_core.c
- net/netfilter/nf_conntrack_proto_dccp.c
- net/netfilter/nf_conntrack_proto_generic.c
- net/netfilter/nf_conntrack_proto_gre.c
- net/netfilter/nf_conntrack_proto_sctp.c
- net/netfilter/nf_conntrack_proto_tcp.c
- net/netfilter/nf_conntrack_proto_udp.c
- net/netfilter/nfnetlink_cttimeout.c
include/net/netfilter/nf_conntrack_l4proto.h
1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | 2 | /* |
3 | * Header for use in defining a given L4 protocol for connection tracking. | 3 | * Header for use in defining a given L4 protocol for connection tracking. |
4 | * | 4 | * |
5 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | 5 | * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> |
6 | * - generalized L3 protocol dependent part. | 6 | * - generalized L3 protocol dependent part. |
7 | * | 7 | * |
8 | * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h | 8 | * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #ifndef _NF_CONNTRACK_L4PROTO_H | 11 | #ifndef _NF_CONNTRACK_L4PROTO_H |
12 | #define _NF_CONNTRACK_L4PROTO_H | 12 | #define _NF_CONNTRACK_L4PROTO_H |
13 | #include <linux/netlink.h> | 13 | #include <linux/netlink.h> |
14 | #include <net/netlink.h> | 14 | #include <net/netlink.h> |
15 | #include <net/netfilter/nf_conntrack.h> | 15 | #include <net/netfilter/nf_conntrack.h> |
16 | #include <net/netns/generic.h> | 16 | #include <net/netns/generic.h> |
17 | 17 | ||
18 | struct seq_file; | 18 | struct seq_file; |
19 | 19 | ||
20 | struct nf_conntrack_l4proto { | 20 | struct nf_conntrack_l4proto { |
21 | /* L3 Protocol number. */ | 21 | /* L3 Protocol number. */ |
22 | u_int16_t l3proto; | 22 | u_int16_t l3proto; |
23 | 23 | ||
24 | /* L4 Protocol number. */ | 24 | /* L4 Protocol number. */ |
25 | u_int8_t l4proto; | 25 | u_int8_t l4proto; |
26 | 26 | ||
27 | /* Resolve clashes on insertion races. */ | 27 | /* Resolve clashes on insertion races. */ |
28 | bool allow_clash; | 28 | bool allow_clash; |
29 | 29 | ||
30 | /* protoinfo nlattr size, closes a hole */ | 30 | /* protoinfo nlattr size, closes a hole */ |
31 | u16 nlattr_size; | 31 | u16 nlattr_size; |
32 | 32 | ||
33 | /* Try to fill in the third arg: dataoff is offset past network protocol | 33 | /* Try to fill in the third arg: dataoff is offset past network protocol |
34 | hdr. Return true if possible. */ | 34 | hdr. Return true if possible. */ |
35 | bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, | 35 | bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, |
36 | struct net *net, struct nf_conntrack_tuple *tuple); | 36 | struct net *net, struct nf_conntrack_tuple *tuple); |
37 | 37 | ||
38 | /* Invert the per-proto part of the tuple: ie. turn xmit into reply. | 38 | /* Invert the per-proto part of the tuple: ie. turn xmit into reply. |
39 | * Only used by icmp, most protocols use a generic version. | 39 | * Only used by icmp, most protocols use a generic version. |
40 | */ | 40 | */ |
41 | bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, | 41 | bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, |
42 | const struct nf_conntrack_tuple *orig); | 42 | const struct nf_conntrack_tuple *orig); |
43 | 43 | ||
44 | /* Returns verdict for packet, or -1 for invalid. */ | 44 | /* Returns verdict for packet, or -1 for invalid. */ |
45 | int (*packet)(struct nf_conn *ct, | 45 | int (*packet)(struct nf_conn *ct, |
46 | const struct sk_buff *skb, | 46 | const struct sk_buff *skb, |
47 | unsigned int dataoff, | 47 | unsigned int dataoff, |
48 | enum ip_conntrack_info ctinfo, | 48 | enum ip_conntrack_info ctinfo); |
49 | unsigned int *timeouts); | ||
50 | 49 | ||
51 | /* Called when a new connection for this protocol found; | 50 | /* Called when a new connection for this protocol found; |
52 | * returns TRUE if it's OK. If so, packet() called next. */ | 51 | * returns TRUE if it's OK. If so, packet() called next. */ |
53 | bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, | 52 | bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, |
54 | unsigned int dataoff, unsigned int *timeouts); | 53 | unsigned int dataoff); |
55 | 54 | ||
56 | /* Called when a conntrack entry is destroyed */ | 55 | /* Called when a conntrack entry is destroyed */ |
57 | void (*destroy)(struct nf_conn *ct); | 56 | void (*destroy)(struct nf_conn *ct); |
58 | 57 | ||
59 | int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, | 58 | int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, |
60 | unsigned int dataoff, | 59 | unsigned int dataoff, |
61 | u_int8_t pf, unsigned int hooknum); | 60 | u_int8_t pf, unsigned int hooknum); |
62 | 61 | ||
63 | /* called by gc worker if table is full */ | 62 | /* called by gc worker if table is full */ |
64 | bool (*can_early_drop)(const struct nf_conn *ct); | 63 | bool (*can_early_drop)(const struct nf_conn *ct); |
65 | |||
66 | /* Return the array of timeouts for this protocol. */ | ||
67 | unsigned int *(*get_timeouts)(struct net *net); | ||
68 | 64 | ||
69 | /* convert protoinfo to nfnetink attributes */ | 65 | /* convert protoinfo to nfnetink attributes */ |
70 | int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, | 66 | int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, |
71 | struct nf_conn *ct); | 67 | struct nf_conn *ct); |
72 | 68 | ||
73 | /* convert nfnetlink attributes to protoinfo */ | 69 | /* convert nfnetlink attributes to protoinfo */ |
74 | int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); | 70 | int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); |
75 | 71 | ||
76 | int (*tuple_to_nlattr)(struct sk_buff *skb, | 72 | int (*tuple_to_nlattr)(struct sk_buff *skb, |
77 | const struct nf_conntrack_tuple *t); | 73 | const struct nf_conntrack_tuple *t); |
78 | /* Calculate tuple nlattr size */ | 74 | /* Calculate tuple nlattr size */ |
79 | unsigned int (*nlattr_tuple_size)(void); | 75 | unsigned int (*nlattr_tuple_size)(void); |
80 | int (*nlattr_to_tuple)(struct nlattr *tb[], | 76 | int (*nlattr_to_tuple)(struct nlattr *tb[], |
81 | struct nf_conntrack_tuple *t); | 77 | struct nf_conntrack_tuple *t); |
82 | const struct nla_policy *nla_policy; | 78 | const struct nla_policy *nla_policy; |
83 | 79 | ||
84 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 80 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
85 | struct { | 81 | struct { |
86 | int (*nlattr_to_obj)(struct nlattr *tb[], | 82 | int (*nlattr_to_obj)(struct nlattr *tb[], |
87 | struct net *net, void *data); | 83 | struct net *net, void *data); |
88 | int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); | 84 | int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); |
89 | 85 | ||
90 | u16 obj_size; | 86 | u16 obj_size; |
91 | u16 nlattr_max; | 87 | u16 nlattr_max; |
92 | const struct nla_policy *nla_policy; | 88 | const struct nla_policy *nla_policy; |
93 | } ctnl_timeout; | 89 | } ctnl_timeout; |
94 | #endif | 90 | #endif |
95 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 91 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
96 | /* Print out the private part of the conntrack. */ | 92 | /* Print out the private part of the conntrack. */ |
97 | void (*print_conntrack)(struct seq_file *s, struct nf_conn *); | 93 | void (*print_conntrack)(struct seq_file *s, struct nf_conn *); |
98 | #endif | 94 | #endif |
99 | unsigned int *net_id; | 95 | unsigned int *net_id; |
100 | /* Init l4proto pernet data */ | 96 | /* Init l4proto pernet data */ |
101 | int (*init_net)(struct net *net, u_int16_t proto); | 97 | int (*init_net)(struct net *net, u_int16_t proto); |
102 | 98 | ||
103 | /* Return the per-net protocol part. */ | 99 | /* Return the per-net protocol part. */ |
104 | struct nf_proto_net *(*get_net_proto)(struct net *net); | 100 | struct nf_proto_net *(*get_net_proto)(struct net *net); |
105 | 101 | ||
106 | /* Module (if any) which this is connected to. */ | 102 | /* Module (if any) which this is connected to. */ |
107 | struct module *me; | 103 | struct module *me; |
108 | }; | 104 | }; |
109 | 105 | ||
110 | /* Existing built-in generic protocol */ | 106 | /* Existing built-in generic protocol */ |
111 | extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; | 107 | extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; |
112 | 108 | ||
113 | #define MAX_NF_CT_PROTO 256 | 109 | #define MAX_NF_CT_PROTO 256 |
114 | 110 | ||
115 | const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, | 111 | const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, |
116 | u_int8_t l4proto); | 112 | u_int8_t l4proto); |
117 | 113 | ||
118 | const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, | 114 | const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, |
119 | u_int8_t l4proto); | 115 | u_int8_t l4proto); |
120 | void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); | 116 | void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); |
121 | 117 | ||
122 | /* Protocol pernet registration. */ | 118 | /* Protocol pernet registration. */ |
123 | int nf_ct_l4proto_pernet_register_one(struct net *net, | 119 | int nf_ct_l4proto_pernet_register_one(struct net *net, |
124 | const struct nf_conntrack_l4proto *proto); | 120 | const struct nf_conntrack_l4proto *proto); |
125 | void nf_ct_l4proto_pernet_unregister_one(struct net *net, | 121 | void nf_ct_l4proto_pernet_unregister_one(struct net *net, |
126 | const struct nf_conntrack_l4proto *proto); | 122 | const struct nf_conntrack_l4proto *proto); |
127 | int nf_ct_l4proto_pernet_register(struct net *net, | 123 | int nf_ct_l4proto_pernet_register(struct net *net, |
128 | const struct nf_conntrack_l4proto *const proto[], | 124 | const struct nf_conntrack_l4proto *const proto[], |
129 | unsigned int num_proto); | 125 | unsigned int num_proto); |
130 | void nf_ct_l4proto_pernet_unregister(struct net *net, | 126 | void nf_ct_l4proto_pernet_unregister(struct net *net, |
131 | const struct nf_conntrack_l4proto *const proto[], | 127 | const struct nf_conntrack_l4proto *const proto[], |
132 | unsigned int num_proto); | 128 | unsigned int num_proto); |
133 | 129 | ||
134 | /* Protocol global registration. */ | 130 | /* Protocol global registration. */ |
135 | int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); | 131 | int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); |
136 | void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); | 132 | void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); |
137 | int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], | 133 | int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], |
138 | unsigned int num_proto); | 134 | unsigned int num_proto); |
139 | void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], | 135 | void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], |
140 | unsigned int num_proto); | 136 | unsigned int num_proto); |
141 | 137 | ||
142 | /* Generic netlink helpers */ | 138 | /* Generic netlink helpers */ |
143 | int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, | 139 | int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, |
144 | const struct nf_conntrack_tuple *tuple); | 140 | const struct nf_conntrack_tuple *tuple); |
145 | int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], | 141 | int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], |
146 | struct nf_conntrack_tuple *t); | 142 | struct nf_conntrack_tuple *t); |
147 | unsigned int nf_ct_port_nlattr_tuple_size(void); | 143 | unsigned int nf_ct_port_nlattr_tuple_size(void); |
148 | extern const struct nla_policy nf_ct_port_nla_policy[]; | 144 | extern const struct nla_policy nf_ct_port_nla_policy[]; |
149 | 145 | ||
150 | #ifdef CONFIG_SYSCTL | 146 | #ifdef CONFIG_SYSCTL |
151 | __printf(3, 4) __cold | 147 | __printf(3, 4) __cold |
152 | void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, | 148 | void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, |
153 | const struct nf_conn *ct, | 149 | const struct nf_conn *ct, |
154 | const char *fmt, ...); | 150 | const char *fmt, ...); |
155 | __printf(5, 6) __cold | 151 | __printf(5, 6) __cold |
156 | void nf_l4proto_log_invalid(const struct sk_buff *skb, | 152 | void nf_l4proto_log_invalid(const struct sk_buff *skb, |
157 | struct net *net, | 153 | struct net *net, |
158 | u16 pf, u8 protonum, | 154 | u16 pf, u8 protonum, |
159 | const char *fmt, ...); | 155 | const char *fmt, ...); |
160 | #else | 156 | #else |
161 | static inline __printf(5, 6) __cold | 157 | static inline __printf(5, 6) __cold |
162 | void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, | 158 | void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, |
163 | u16 pf, u8 protonum, const char *fmt, ...) {} | 159 | u16 pf, u8 protonum, const char *fmt, ...) {} |
164 | static inline __printf(3, 4) __cold | 160 | static inline __printf(3, 4) __cold |
165 | void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, | 161 | void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, |
166 | const struct nf_conn *ct, | 162 | const struct nf_conn *ct, |
167 | const char *fmt, ...) { } | 163 | const char *fmt, ...) { } |
168 | #endif /* CONFIG_SYSCTL */ | 164 | #endif /* CONFIG_SYSCTL */ |
169 | 165 | ||
170 | #endif /*_NF_CONNTRACK_PROTOCOL_H*/ | 166 | #endif /*_NF_CONNTRACK_PROTOCOL_H*/ |
171 | 167 |
include/net/netfilter/nf_conntrack_timeout.h
1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _NF_CONNTRACK_TIMEOUT_H | 2 | #ifndef _NF_CONNTRACK_TIMEOUT_H |
3 | #define _NF_CONNTRACK_TIMEOUT_H | 3 | #define _NF_CONNTRACK_TIMEOUT_H |
4 | 4 | ||
5 | #include <net/net_namespace.h> | 5 | #include <net/net_namespace.h> |
6 | #include <linux/netfilter/nf_conntrack_common.h> | 6 | #include <linux/netfilter/nf_conntrack_common.h> |
7 | #include <linux/netfilter/nf_conntrack_tuple_common.h> | 7 | #include <linux/netfilter/nf_conntrack_tuple_common.h> |
8 | #include <linux/refcount.h> | 8 | #include <linux/refcount.h> |
9 | #include <net/netfilter/nf_conntrack.h> | 9 | #include <net/netfilter/nf_conntrack.h> |
10 | #include <net/netfilter/nf_conntrack_extend.h> | 10 | #include <net/netfilter/nf_conntrack_extend.h> |
11 | 11 | ||
12 | #define CTNL_TIMEOUT_NAME_MAX 32 | 12 | #define CTNL_TIMEOUT_NAME_MAX 32 |
13 | 13 | ||
14 | struct ctnl_timeout { | 14 | struct ctnl_timeout { |
15 | struct list_head head; | 15 | struct list_head head; |
16 | struct rcu_head rcu_head; | 16 | struct rcu_head rcu_head; |
17 | refcount_t refcnt; | 17 | refcount_t refcnt; |
18 | char name[CTNL_TIMEOUT_NAME_MAX]; | 18 | char name[CTNL_TIMEOUT_NAME_MAX]; |
19 | __u16 l3num; | 19 | __u16 l3num; |
20 | const struct nf_conntrack_l4proto *l4proto; | 20 | const struct nf_conntrack_l4proto *l4proto; |
21 | char data[0]; | 21 | char data[0]; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct nf_conn_timeout { | 24 | struct nf_conn_timeout { |
25 | struct ctnl_timeout __rcu *timeout; | 25 | struct ctnl_timeout __rcu *timeout; |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static inline unsigned int * | 28 | static inline unsigned int * |
29 | nf_ct_timeout_data(struct nf_conn_timeout *t) | 29 | nf_ct_timeout_data(struct nf_conn_timeout *t) |
30 | { | 30 | { |
31 | struct ctnl_timeout *timeout; | 31 | struct ctnl_timeout *timeout; |
32 | 32 | ||
33 | timeout = rcu_dereference(t->timeout); | 33 | timeout = rcu_dereference(t->timeout); |
34 | if (timeout == NULL) | 34 | if (timeout == NULL) |
35 | return NULL; | 35 | return NULL; |
36 | 36 | ||
37 | return (unsigned int *)timeout->data; | 37 | return (unsigned int *)timeout->data; |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline | 40 | static inline |
41 | struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct) | 41 | struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct) |
42 | { | 42 | { |
43 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 43 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
44 | return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); | 44 | return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); |
45 | #else | 45 | #else |
46 | return NULL; | 46 | return NULL; |
47 | #endif | 47 | #endif |
48 | } | 48 | } |
49 | 49 | ||
50 | static inline | 50 | static inline |
51 | struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, | 51 | struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, |
52 | struct ctnl_timeout *timeout, | 52 | struct ctnl_timeout *timeout, |
53 | gfp_t gfp) | 53 | gfp_t gfp) |
54 | { | 54 | { |
55 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 55 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
56 | struct nf_conn_timeout *timeout_ext; | 56 | struct nf_conn_timeout *timeout_ext; |
57 | 57 | ||
58 | timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); | 58 | timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); |
59 | if (timeout_ext == NULL) | 59 | if (timeout_ext == NULL) |
60 | return NULL; | 60 | return NULL; |
61 | 61 | ||
62 | rcu_assign_pointer(timeout_ext->timeout, timeout); | 62 | rcu_assign_pointer(timeout_ext->timeout, timeout); |
63 | 63 | ||
64 | return timeout_ext; | 64 | return timeout_ext; |
65 | #else | 65 | #else |
66 | return NULL; | 66 | return NULL; |
67 | #endif | 67 | #endif |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static inline unsigned int * | 70 | static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) |
71 | nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, | ||
72 | const struct nf_conntrack_l4proto *l4proto) | ||
73 | { | 71 | { |
72 | unsigned int *timeouts = NULL; | ||
74 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 73 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
75 | struct nf_conn_timeout *timeout_ext; | 74 | struct nf_conn_timeout *timeout_ext; |
76 | unsigned int *timeouts; | ||
77 | 75 | ||
78 | timeout_ext = nf_ct_timeout_find(ct); | 76 | timeout_ext = nf_ct_timeout_find(ct); |
79 | if (timeout_ext) { | 77 | if (timeout_ext) |
80 | timeouts = nf_ct_timeout_data(timeout_ext); | 78 | timeouts = nf_ct_timeout_data(timeout_ext); |
81 | if (unlikely(!timeouts)) | ||
82 | timeouts = l4proto->get_timeouts(net); | ||
83 | } else { | ||
84 | timeouts = l4proto->get_timeouts(net); | ||
85 | } | ||
86 | |||
87 | return timeouts; | ||
88 | #else | ||
89 | return l4proto->get_timeouts(net); | ||
90 | #endif | 79 | #endif |
80 | return timeouts; | ||
91 | } | 81 | } |
92 | 82 | ||
93 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 83 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
94 | int nf_conntrack_timeout_init(void); | 84 | int nf_conntrack_timeout_init(void); |
95 | void nf_conntrack_timeout_fini(void); | 85 | void nf_conntrack_timeout_fini(void); |
96 | #else | 86 | #else |
97 | static inline int nf_conntrack_timeout_init(void) | 87 | static inline int nf_conntrack_timeout_init(void) |
98 | { | 88 | { |
99 | return 0; | 89 | return 0; |
100 | } | 90 | } |
101 | 91 | ||
102 | static inline void nf_conntrack_timeout_fini(void) | 92 | static inline void nf_conntrack_timeout_fini(void) |
103 | { | 93 | { |
104 | return; | 94 | return; |
105 | } | 95 | } |
106 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ | 96 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ |
107 | 97 | ||
108 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 98 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
109 | extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); | 99 | extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); |
110 | extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); | 100 | extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); |
111 | #endif | 101 | #endif |
112 | 102 |
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> |
3 | * (C) 2006-2010 Patrick McHardy <kaber@trash.net> | 3 | * (C) 2006-2010 Patrick McHardy <kaber@trash.net> |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/timer.h> | 11 | #include <linux/timer.h> |
12 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
14 | #include <linux/icmp.h> | 14 | #include <linux/icmp.h> |
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <net/ip.h> | 16 | #include <net/ip.h> |
17 | #include <net/checksum.h> | 17 | #include <net/checksum.h> |
18 | #include <linux/netfilter_ipv4.h> | 18 | #include <linux/netfilter_ipv4.h> |
19 | #include <net/netfilter/nf_conntrack_tuple.h> | 19 | #include <net/netfilter/nf_conntrack_tuple.h> |
20 | #include <net/netfilter/nf_conntrack_l4proto.h> | 20 | #include <net/netfilter/nf_conntrack_l4proto.h> |
21 | #include <net/netfilter/nf_conntrack_core.h> | 21 | #include <net/netfilter/nf_conntrack_core.h> |
22 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
22 | #include <net/netfilter/nf_conntrack_zones.h> | 23 | #include <net/netfilter/nf_conntrack_zones.h> |
23 | #include <net/netfilter/nf_log.h> | 24 | #include <net/netfilter/nf_log.h> |
24 | 25 | ||
25 | static const unsigned int nf_ct_icmp_timeout = 30*HZ; | 26 | static const unsigned int nf_ct_icmp_timeout = 30*HZ; |
26 | 27 | ||
27 | static inline struct nf_icmp_net *icmp_pernet(struct net *net) | 28 | static inline struct nf_icmp_net *icmp_pernet(struct net *net) |
28 | { | 29 | { |
29 | return &net->ct.nf_ct_proto.icmp; | 30 | return &net->ct.nf_ct_proto.icmp; |
30 | } | 31 | } |
31 | 32 | ||
32 | static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, | 33 | static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, |
33 | struct net *net, struct nf_conntrack_tuple *tuple) | 34 | struct net *net, struct nf_conntrack_tuple *tuple) |
34 | { | 35 | { |
35 | const struct icmphdr *hp; | 36 | const struct icmphdr *hp; |
36 | struct icmphdr _hdr; | 37 | struct icmphdr _hdr; |
37 | 38 | ||
38 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 39 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
39 | if (hp == NULL) | 40 | if (hp == NULL) |
40 | return false; | 41 | return false; |
41 | 42 | ||
42 | tuple->dst.u.icmp.type = hp->type; | 43 | tuple->dst.u.icmp.type = hp->type; |
43 | tuple->src.u.icmp.id = hp->un.echo.id; | 44 | tuple->src.u.icmp.id = hp->un.echo.id; |
44 | tuple->dst.u.icmp.code = hp->code; | 45 | tuple->dst.u.icmp.code = hp->code; |
45 | 46 | ||
46 | return true; | 47 | return true; |
47 | } | 48 | } |
48 | 49 | ||
49 | /* Add 1; spaces filled with 0. */ | 50 | /* Add 1; spaces filled with 0. */ |
50 | static const u_int8_t invmap[] = { | 51 | static const u_int8_t invmap[] = { |
51 | [ICMP_ECHO] = ICMP_ECHOREPLY + 1, | 52 | [ICMP_ECHO] = ICMP_ECHOREPLY + 1, |
52 | [ICMP_ECHOREPLY] = ICMP_ECHO + 1, | 53 | [ICMP_ECHOREPLY] = ICMP_ECHO + 1, |
53 | [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, | 54 | [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, |
54 | [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, | 55 | [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, |
55 | [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, | 56 | [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, |
56 | [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, | 57 | [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, |
57 | [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, | 58 | [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, |
58 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 | 59 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 |
59 | }; | 60 | }; |
60 | 61 | ||
61 | static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, | 62 | static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, |
62 | const struct nf_conntrack_tuple *orig) | 63 | const struct nf_conntrack_tuple *orig) |
63 | { | 64 | { |
64 | if (orig->dst.u.icmp.type >= sizeof(invmap) || | 65 | if (orig->dst.u.icmp.type >= sizeof(invmap) || |
65 | !invmap[orig->dst.u.icmp.type]) | 66 | !invmap[orig->dst.u.icmp.type]) |
66 | return false; | 67 | return false; |
67 | 68 | ||
68 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | 69 | tuple->src.u.icmp.id = orig->src.u.icmp.id; |
69 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; | 70 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; |
70 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | 71 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; |
71 | return true; | 72 | return true; |
72 | } | 73 | } |
73 | 74 | ||
74 | static unsigned int *icmp_get_timeouts(struct net *net) | 75 | static unsigned int *icmp_get_timeouts(struct net *net) |
75 | { | 76 | { |
76 | return &icmp_pernet(net)->timeout; | 77 | return &icmp_pernet(net)->timeout; |
77 | } | 78 | } |
78 | 79 | ||
79 | /* Returns verdict for packet, or -1 for invalid. */ | 80 | /* Returns verdict for packet, or -1 for invalid. */ |
80 | static int icmp_packet(struct nf_conn *ct, | 81 | static int icmp_packet(struct nf_conn *ct, |
81 | const struct sk_buff *skb, | 82 | const struct sk_buff *skb, |
82 | unsigned int dataoff, | 83 | unsigned int dataoff, |
83 | enum ip_conntrack_info ctinfo, | 84 | enum ip_conntrack_info ctinfo) |
84 | unsigned int *timeout) | ||
85 | { | 85 | { |
86 | /* Do not immediately delete the connection after the first | 86 | /* Do not immediately delete the connection after the first |
87 | successful reply to avoid excessive conntrackd traffic | 87 | successful reply to avoid excessive conntrackd traffic |
88 | and also to handle correctly ICMP echo reply duplicates. */ | 88 | and also to handle correctly ICMP echo reply duplicates. */ |
89 | unsigned int *timeout = nf_ct_timeout_lookup(ct); | ||
90 | |||
91 | if (!timeout) | ||
92 | timeout = icmp_get_timeouts(nf_ct_net(ct)); | ||
93 | |||
89 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); | 94 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); |
90 | 95 | ||
91 | return NF_ACCEPT; | 96 | return NF_ACCEPT; |
92 | } | 97 | } |
93 | 98 | ||
94 | /* Called when a new connection for this protocol found. */ | 99 | /* Called when a new connection for this protocol found. */ |
95 | static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, | 100 | static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, |
96 | unsigned int dataoff, unsigned int *timeouts) | 101 | unsigned int dataoff) |
97 | { | 102 | { |
98 | static const u_int8_t valid_new[] = { | 103 | static const u_int8_t valid_new[] = { |
99 | [ICMP_ECHO] = 1, | 104 | [ICMP_ECHO] = 1, |
100 | [ICMP_TIMESTAMP] = 1, | 105 | [ICMP_TIMESTAMP] = 1, |
101 | [ICMP_INFO_REQUEST] = 1, | 106 | [ICMP_INFO_REQUEST] = 1, |
102 | [ICMP_ADDRESS] = 1 | 107 | [ICMP_ADDRESS] = 1 |
103 | }; | 108 | }; |
104 | 109 | ||
105 | if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || | 110 | if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || |
106 | !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { | 111 | !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { |
107 | /* Can't create a new ICMP `conn' with this. */ | 112 | /* Can't create a new ICMP `conn' with this. */ |
108 | pr_debug("icmp: can't create new conn with type %u\n", | 113 | pr_debug("icmp: can't create new conn with type %u\n", |
109 | ct->tuplehash[0].tuple.dst.u.icmp.type); | 114 | ct->tuplehash[0].tuple.dst.u.icmp.type); |
110 | nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); | 115 | nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); |
111 | return false; | 116 | return false; |
112 | } | 117 | } |
113 | return true; | 118 | return true; |
114 | } | 119 | } |
115 | 120 | ||
116 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | 121 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ |
117 | static int | 122 | static int |
118 | icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, | 123 | icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, |
119 | unsigned int hooknum) | 124 | unsigned int hooknum) |
120 | { | 125 | { |
121 | struct nf_conntrack_tuple innertuple, origtuple; | 126 | struct nf_conntrack_tuple innertuple, origtuple; |
122 | const struct nf_conntrack_l4proto *innerproto; | 127 | const struct nf_conntrack_l4proto *innerproto; |
123 | const struct nf_conntrack_tuple_hash *h; | 128 | const struct nf_conntrack_tuple_hash *h; |
124 | const struct nf_conntrack_zone *zone; | 129 | const struct nf_conntrack_zone *zone; |
125 | enum ip_conntrack_info ctinfo; | 130 | enum ip_conntrack_info ctinfo; |
126 | struct nf_conntrack_zone tmp; | 131 | struct nf_conntrack_zone tmp; |
127 | 132 | ||
128 | WARN_ON(skb_nfct(skb)); | 133 | WARN_ON(skb_nfct(skb)); |
129 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); | 134 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); |
130 | 135 | ||
131 | /* Are they talking about one of our connections? */ | 136 | /* Are they talking about one of our connections? */ |
132 | if (!nf_ct_get_tuplepr(skb, | 137 | if (!nf_ct_get_tuplepr(skb, |
133 | skb_network_offset(skb) + ip_hdrlen(skb) | 138 | skb_network_offset(skb) + ip_hdrlen(skb) |
134 | + sizeof(struct icmphdr), | 139 | + sizeof(struct icmphdr), |
135 | PF_INET, net, &origtuple)) { | 140 | PF_INET, net, &origtuple)) { |
136 | pr_debug("icmp_error_message: failed to get tuple\n"); | 141 | pr_debug("icmp_error_message: failed to get tuple\n"); |
137 | return -NF_ACCEPT; | 142 | return -NF_ACCEPT; |
138 | } | 143 | } |
139 | 144 | ||
140 | /* rcu_read_lock()ed by nf_hook_thresh */ | 145 | /* rcu_read_lock()ed by nf_hook_thresh */ |
141 | innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); | 146 | innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); |
142 | 147 | ||
143 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | 148 | /* Ordinarily, we'd expect the inverted tupleproto, but it's |
144 | been preserved inside the ICMP. */ | 149 | been preserved inside the ICMP. */ |
145 | if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { | 150 | if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { |
146 | pr_debug("icmp_error_message: no match\n"); | 151 | pr_debug("icmp_error_message: no match\n"); |
147 | return -NF_ACCEPT; | 152 | return -NF_ACCEPT; |
148 | } | 153 | } |
149 | 154 | ||
150 | ctinfo = IP_CT_RELATED; | 155 | ctinfo = IP_CT_RELATED; |
151 | 156 | ||
152 | h = nf_conntrack_find_get(net, zone, &innertuple); | 157 | h = nf_conntrack_find_get(net, zone, &innertuple); |
153 | if (!h) { | 158 | if (!h) { |
154 | pr_debug("icmp_error_message: no match\n"); | 159 | pr_debug("icmp_error_message: no match\n"); |
155 | return -NF_ACCEPT; | 160 | return -NF_ACCEPT; |
156 | } | 161 | } |
157 | 162 | ||
158 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | 163 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) |
159 | ctinfo += IP_CT_IS_REPLY; | 164 | ctinfo += IP_CT_IS_REPLY; |
160 | 165 | ||
161 | /* Update skb to refer to this connection */ | 166 | /* Update skb to refer to this connection */ |
162 | nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); | 167 | nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); |
163 | return NF_ACCEPT; | 168 | return NF_ACCEPT; |
164 | } | 169 | } |
165 | 170 | ||
166 | static void icmp_error_log(const struct sk_buff *skb, struct net *net, | 171 | static void icmp_error_log(const struct sk_buff *skb, struct net *net, |
167 | u8 pf, const char *msg) | 172 | u8 pf, const char *msg) |
168 | { | 173 | { |
169 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); | 174 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); |
170 | } | 175 | } |
171 | 176 | ||
172 | /* Small and modified version of icmp_rcv */ | 177 | /* Small and modified version of icmp_rcv */ |
173 | static int | 178 | static int |
174 | icmp_error(struct net *net, struct nf_conn *tmpl, | 179 | icmp_error(struct net *net, struct nf_conn *tmpl, |
175 | struct sk_buff *skb, unsigned int dataoff, | 180 | struct sk_buff *skb, unsigned int dataoff, |
176 | u8 pf, unsigned int hooknum) | 181 | u8 pf, unsigned int hooknum) |
177 | { | 182 | { |
178 | const struct icmphdr *icmph; | 183 | const struct icmphdr *icmph; |
179 | struct icmphdr _ih; | 184 | struct icmphdr _ih; |
180 | 185 | ||
181 | /* Not enough header? */ | 186 | /* Not enough header? */ |
182 | icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); | 187 | icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); |
183 | if (icmph == NULL) { | 188 | if (icmph == NULL) { |
184 | icmp_error_log(skb, net, pf, "short packet"); | 189 | icmp_error_log(skb, net, pf, "short packet"); |
185 | return -NF_ACCEPT; | 190 | return -NF_ACCEPT; |
186 | } | 191 | } |
187 | 192 | ||
188 | /* See ip_conntrack_proto_tcp.c */ | 193 | /* See ip_conntrack_proto_tcp.c */ |
189 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 194 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
190 | nf_ip_checksum(skb, hooknum, dataoff, 0)) { | 195 | nf_ip_checksum(skb, hooknum, dataoff, 0)) { |
191 | icmp_error_log(skb, net, pf, "bad hw icmp checksum"); | 196 | icmp_error_log(skb, net, pf, "bad hw icmp checksum"); |
192 | return -NF_ACCEPT; | 197 | return -NF_ACCEPT; |
193 | } | 198 | } |
194 | 199 | ||
195 | /* | 200 | /* |
196 | * 18 is the highest 'known' ICMP type. Anything else is a mystery | 201 | * 18 is the highest 'known' ICMP type. Anything else is a mystery |
197 | * | 202 | * |
198 | * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently | 203 | * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently |
199 | * discarded. | 204 | * discarded. |
200 | */ | 205 | */ |
201 | if (icmph->type > NR_ICMP_TYPES) { | 206 | if (icmph->type > NR_ICMP_TYPES) { |
202 | icmp_error_log(skb, net, pf, "invalid icmp type"); | 207 | icmp_error_log(skb, net, pf, "invalid icmp type"); |
203 | return -NF_ACCEPT; | 208 | return -NF_ACCEPT; |
204 | } | 209 | } |
205 | 210 | ||
206 | /* Need to track icmp error message? */ | 211 | /* Need to track icmp error message? */ |
207 | if (icmph->type != ICMP_DEST_UNREACH && | 212 | if (icmph->type != ICMP_DEST_UNREACH && |
208 | icmph->type != ICMP_SOURCE_QUENCH && | 213 | icmph->type != ICMP_SOURCE_QUENCH && |
209 | icmph->type != ICMP_TIME_EXCEEDED && | 214 | icmph->type != ICMP_TIME_EXCEEDED && |
210 | icmph->type != ICMP_PARAMETERPROB && | 215 | icmph->type != ICMP_PARAMETERPROB && |
211 | icmph->type != ICMP_REDIRECT) | 216 | icmph->type != ICMP_REDIRECT) |
212 | return NF_ACCEPT; | 217 | return NF_ACCEPT; |
213 | 218 | ||
214 | return icmp_error_message(net, tmpl, skb, hooknum); | 219 | return icmp_error_message(net, tmpl, skb, hooknum); |
215 | } | 220 | } |
216 | 221 | ||
217 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 222 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
218 | 223 | ||
219 | #include <linux/netfilter/nfnetlink.h> | 224 | #include <linux/netfilter/nfnetlink.h> |
220 | #include <linux/netfilter/nfnetlink_conntrack.h> | 225 | #include <linux/netfilter/nfnetlink_conntrack.h> |
221 | 226 | ||
222 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, | 227 | static int icmp_tuple_to_nlattr(struct sk_buff *skb, |
223 | const struct nf_conntrack_tuple *t) | 228 | const struct nf_conntrack_tuple *t) |
224 | { | 229 | { |
225 | if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || | 230 | if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || |
226 | nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || | 231 | nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || |
227 | nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) | 232 | nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) |
228 | goto nla_put_failure; | 233 | goto nla_put_failure; |
229 | return 0; | 234 | return 0; |
230 | 235 | ||
231 | nla_put_failure: | 236 | nla_put_failure: |
232 | return -1; | 237 | return -1; |
233 | } | 238 | } |
234 | 239 | ||
235 | static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { | 240 | static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { |
236 | [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, | 241 | [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, |
237 | [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, | 242 | [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, |
238 | [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, | 243 | [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, |
239 | }; | 244 | }; |
240 | 245 | ||
241 | static int icmp_nlattr_to_tuple(struct nlattr *tb[], | 246 | static int icmp_nlattr_to_tuple(struct nlattr *tb[], |
242 | struct nf_conntrack_tuple *tuple) | 247 | struct nf_conntrack_tuple *tuple) |
243 | { | 248 | { |
244 | if (!tb[CTA_PROTO_ICMP_TYPE] || | 249 | if (!tb[CTA_PROTO_ICMP_TYPE] || |
245 | !tb[CTA_PROTO_ICMP_CODE] || | 250 | !tb[CTA_PROTO_ICMP_CODE] || |
246 | !tb[CTA_PROTO_ICMP_ID]) | 251 | !tb[CTA_PROTO_ICMP_ID]) |
247 | return -EINVAL; | 252 | return -EINVAL; |
248 | 253 | ||
249 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); | 254 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); |
250 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); | 255 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); |
251 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); | 256 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); |
252 | 257 | ||
253 | if (tuple->dst.u.icmp.type >= sizeof(invmap) || | 258 | if (tuple->dst.u.icmp.type >= sizeof(invmap) || |
254 | !invmap[tuple->dst.u.icmp.type]) | 259 | !invmap[tuple->dst.u.icmp.type]) |
255 | return -EINVAL; | 260 | return -EINVAL; |
256 | 261 | ||
257 | return 0; | 262 | return 0; |
258 | } | 263 | } |
259 | 264 | ||
260 | static unsigned int icmp_nlattr_tuple_size(void) | 265 | static unsigned int icmp_nlattr_tuple_size(void) |
261 | { | 266 | { |
262 | static unsigned int size __read_mostly; | 267 | static unsigned int size __read_mostly; |
263 | 268 | ||
264 | if (!size) | 269 | if (!size) |
265 | size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); | 270 | size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); |
266 | 271 | ||
267 | return size; | 272 | return size; |
268 | } | 273 | } |
269 | #endif | 274 | #endif |
270 | 275 | ||
271 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 276 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
272 | 277 | ||
273 | #include <linux/netfilter/nfnetlink.h> | 278 | #include <linux/netfilter/nfnetlink.h> |
274 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 279 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
275 | 280 | ||
276 | static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], | 281 | static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], |
277 | struct net *net, void *data) | 282 | struct net *net, void *data) |
278 | { | 283 | { |
279 | unsigned int *timeout = data; | 284 | unsigned int *timeout = data; |
280 | struct nf_icmp_net *in = icmp_pernet(net); | 285 | struct nf_icmp_net *in = icmp_pernet(net); |
281 | 286 | ||
282 | if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { | 287 | if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { |
288 | if (!timeout) | ||
289 | timeout = &in->timeout; | ||
283 | *timeout = | 290 | *timeout = |
284 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; | 291 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; |
285 | } else { | 292 | } else if (timeout) { |
286 | /* Set default ICMP timeout. */ | 293 | /* Set default ICMP timeout. */ |
287 | *timeout = in->timeout; | 294 | *timeout = in->timeout; |
288 | } | 295 | } |
289 | return 0; | 296 | return 0; |
290 | } | 297 | } |
291 | 298 | ||
292 | static int | 299 | static int |
293 | icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 300 | icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
294 | { | 301 | { |
295 | const unsigned int *timeout = data; | 302 | const unsigned int *timeout = data; |
296 | 303 | ||
297 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) | 304 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) |
298 | goto nla_put_failure; | 305 | goto nla_put_failure; |
299 | return 0; | 306 | return 0; |
300 | 307 | ||
301 | nla_put_failure: | 308 | nla_put_failure: |
302 | return -ENOSPC; | 309 | return -ENOSPC; |
303 | } | 310 | } |
304 | 311 | ||
305 | static const struct nla_policy | 312 | static const struct nla_policy |
306 | icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { | 313 | icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { |
307 | [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, | 314 | [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, |
308 | }; | 315 | }; |
309 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 316 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
310 | 317 | ||
311 | #ifdef CONFIG_SYSCTL | 318 | #ifdef CONFIG_SYSCTL |
312 | static struct ctl_table icmp_sysctl_table[] = { | 319 | static struct ctl_table icmp_sysctl_table[] = { |
313 | { | 320 | { |
314 | .procname = "nf_conntrack_icmp_timeout", | 321 | .procname = "nf_conntrack_icmp_timeout", |
315 | .maxlen = sizeof(unsigned int), | 322 | .maxlen = sizeof(unsigned int), |
316 | .mode = 0644, | 323 | .mode = 0644, |
317 | .proc_handler = proc_dointvec_jiffies, | 324 | .proc_handler = proc_dointvec_jiffies, |
318 | }, | 325 | }, |
319 | { } | 326 | { } |
320 | }; | 327 | }; |
321 | #endif /* CONFIG_SYSCTL */ | 328 | #endif /* CONFIG_SYSCTL */ |
322 | 329 | ||
323 | static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, | 330 | static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, |
324 | struct nf_icmp_net *in) | 331 | struct nf_icmp_net *in) |
325 | { | 332 | { |
326 | #ifdef CONFIG_SYSCTL | 333 | #ifdef CONFIG_SYSCTL |
327 | pn->ctl_table = kmemdup(icmp_sysctl_table, | 334 | pn->ctl_table = kmemdup(icmp_sysctl_table, |
328 | sizeof(icmp_sysctl_table), | 335 | sizeof(icmp_sysctl_table), |
329 | GFP_KERNEL); | 336 | GFP_KERNEL); |
330 | if (!pn->ctl_table) | 337 | if (!pn->ctl_table) |
331 | return -ENOMEM; | 338 | return -ENOMEM; |
332 | 339 | ||
333 | pn->ctl_table[0].data = &in->timeout; | 340 | pn->ctl_table[0].data = &in->timeout; |
334 | #endif | 341 | #endif |
335 | return 0; | 342 | return 0; |
336 | } | 343 | } |
337 | 344 | ||
338 | static int icmp_init_net(struct net *net, u_int16_t proto) | 345 | static int icmp_init_net(struct net *net, u_int16_t proto) |
339 | { | 346 | { |
340 | struct nf_icmp_net *in = icmp_pernet(net); | 347 | struct nf_icmp_net *in = icmp_pernet(net); |
341 | struct nf_proto_net *pn = &in->pn; | 348 | struct nf_proto_net *pn = &in->pn; |
342 | 349 | ||
343 | in->timeout = nf_ct_icmp_timeout; | 350 | in->timeout = nf_ct_icmp_timeout; |
344 | 351 | ||
345 | return icmp_kmemdup_sysctl_table(pn, in); | 352 | return icmp_kmemdup_sysctl_table(pn, in); |
346 | } | 353 | } |
347 | 354 | ||
348 | static struct nf_proto_net *icmp_get_net_proto(struct net *net) | 355 | static struct nf_proto_net *icmp_get_net_proto(struct net *net) |
349 | { | 356 | { |
350 | return &net->ct.nf_ct_proto.icmp.pn; | 357 | return &net->ct.nf_ct_proto.icmp.pn; |
351 | } | 358 | } |
352 | 359 | ||
353 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = | 360 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = |
354 | { | 361 | { |
355 | .l3proto = PF_INET, | 362 | .l3proto = PF_INET, |
356 | .l4proto = IPPROTO_ICMP, | 363 | .l4proto = IPPROTO_ICMP, |
357 | .pkt_to_tuple = icmp_pkt_to_tuple, | 364 | .pkt_to_tuple = icmp_pkt_to_tuple, |
358 | .invert_tuple = icmp_invert_tuple, | 365 | .invert_tuple = icmp_invert_tuple, |
359 | .packet = icmp_packet, | 366 | .packet = icmp_packet, |
360 | .get_timeouts = icmp_get_timeouts, | ||
361 | .new = icmp_new, | 367 | .new = icmp_new, |
362 | .error = icmp_error, | 368 | .error = icmp_error, |
363 | .destroy = NULL, | 369 | .destroy = NULL, |
364 | .me = NULL, | 370 | .me = NULL, |
365 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 371 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
366 | .tuple_to_nlattr = icmp_tuple_to_nlattr, | 372 | .tuple_to_nlattr = icmp_tuple_to_nlattr, |
367 | .nlattr_tuple_size = icmp_nlattr_tuple_size, | 373 | .nlattr_tuple_size = icmp_nlattr_tuple_size, |
368 | .nlattr_to_tuple = icmp_nlattr_to_tuple, | 374 | .nlattr_to_tuple = icmp_nlattr_to_tuple, |
369 | .nla_policy = icmp_nla_policy, | 375 | .nla_policy = icmp_nla_policy, |
370 | #endif | 376 | #endif |
371 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 377 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
372 | .ctnl_timeout = { | 378 | .ctnl_timeout = { |
373 | .nlattr_to_obj = icmp_timeout_nlattr_to_obj, | 379 | .nlattr_to_obj = icmp_timeout_nlattr_to_obj, |
374 | .obj_to_nlattr = icmp_timeout_obj_to_nlattr, | 380 | .obj_to_nlattr = icmp_timeout_obj_to_nlattr, |
375 | .nlattr_max = CTA_TIMEOUT_ICMP_MAX, | 381 | .nlattr_max = CTA_TIMEOUT_ICMP_MAX, |
376 | .obj_size = sizeof(unsigned int), | 382 | .obj_size = sizeof(unsigned int), |
377 | .nla_policy = icmp_timeout_nla_policy, | 383 | .nla_policy = icmp_timeout_nla_policy, |
378 | }, | 384 | }, |
379 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 385 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
380 | .init_net = icmp_init_net, | 386 | .init_net = icmp_init_net, |
381 | .get_net_proto = icmp_get_net_proto, | 387 | .get_net_proto = icmp_get_net_proto, |
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
1 | /* | 1 | /* |
2 | * Copyright (C)2003,2004 USAGI/WIDE Project | 2 | * Copyright (C)2003,2004 USAGI/WIDE Project |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | * | 7 | * |
8 | * Author: | 8 | * Author: |
9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> | 9 | * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/timer.h> | 13 | #include <linux/timer.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/netfilter.h> | 15 | #include <linux/netfilter.h> |
16 | #include <linux/in6.h> | 16 | #include <linux/in6.h> |
17 | #include <linux/icmpv6.h> | 17 | #include <linux/icmpv6.h> |
18 | #include <linux/ipv6.h> | 18 | #include <linux/ipv6.h> |
19 | #include <net/ipv6.h> | 19 | #include <net/ipv6.h> |
20 | #include <net/ip6_checksum.h> | 20 | #include <net/ip6_checksum.h> |
21 | #include <linux/seq_file.h> | 21 | #include <linux/seq_file.h> |
22 | #include <linux/netfilter_ipv6.h> | 22 | #include <linux/netfilter_ipv6.h> |
23 | #include <net/netfilter/nf_conntrack_tuple.h> | 23 | #include <net/netfilter/nf_conntrack_tuple.h> |
24 | #include <net/netfilter/nf_conntrack_l4proto.h> | 24 | #include <net/netfilter/nf_conntrack_l4proto.h> |
25 | #include <net/netfilter/nf_conntrack_core.h> | 25 | #include <net/netfilter/nf_conntrack_core.h> |
26 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
26 | #include <net/netfilter/nf_conntrack_zones.h> | 27 | #include <net/netfilter/nf_conntrack_zones.h> |
27 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> | 28 | #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> |
28 | #include <net/netfilter/nf_log.h> | 29 | #include <net/netfilter/nf_log.h> |
29 | 30 | ||
30 | static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; | 31 | static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; |
31 | 32 | ||
32 | static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) | 33 | static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) |
33 | { | 34 | { |
34 | return &net->ct.nf_ct_proto.icmpv6; | 35 | return &net->ct.nf_ct_proto.icmpv6; |
35 | } | 36 | } |
36 | 37 | ||
37 | static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, | 38 | static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, |
38 | unsigned int dataoff, | 39 | unsigned int dataoff, |
39 | struct net *net, | 40 | struct net *net, |
40 | struct nf_conntrack_tuple *tuple) | 41 | struct nf_conntrack_tuple *tuple) |
41 | { | 42 | { |
42 | const struct icmp6hdr *hp; | 43 | const struct icmp6hdr *hp; |
43 | struct icmp6hdr _hdr; | 44 | struct icmp6hdr _hdr; |
44 | 45 | ||
45 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 46 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
46 | if (hp == NULL) | 47 | if (hp == NULL) |
47 | return false; | 48 | return false; |
48 | tuple->dst.u.icmp.type = hp->icmp6_type; | 49 | tuple->dst.u.icmp.type = hp->icmp6_type; |
49 | tuple->src.u.icmp.id = hp->icmp6_identifier; | 50 | tuple->src.u.icmp.id = hp->icmp6_identifier; |
50 | tuple->dst.u.icmp.code = hp->icmp6_code; | 51 | tuple->dst.u.icmp.code = hp->icmp6_code; |
51 | 52 | ||
52 | return true; | 53 | return true; |
53 | } | 54 | } |
54 | 55 | ||
55 | /* Add 1; spaces filled with 0. */ | 56 | /* Add 1; spaces filled with 0. */ |
56 | static const u_int8_t invmap[] = { | 57 | static const u_int8_t invmap[] = { |
57 | [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, | 58 | [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, |
58 | [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, | 59 | [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, |
59 | [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, | 60 | [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, |
60 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 | 61 | [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 |
61 | }; | 62 | }; |
62 | 63 | ||
63 | static const u_int8_t noct_valid_new[] = { | 64 | static const u_int8_t noct_valid_new[] = { |
64 | [ICMPV6_MGM_QUERY - 130] = 1, | 65 | [ICMPV6_MGM_QUERY - 130] = 1, |
65 | [ICMPV6_MGM_REPORT - 130] = 1, | 66 | [ICMPV6_MGM_REPORT - 130] = 1, |
66 | [ICMPV6_MGM_REDUCTION - 130] = 1, | 67 | [ICMPV6_MGM_REDUCTION - 130] = 1, |
67 | [NDISC_ROUTER_SOLICITATION - 130] = 1, | 68 | [NDISC_ROUTER_SOLICITATION - 130] = 1, |
68 | [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, | 69 | [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, |
69 | [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, | 70 | [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, |
70 | [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, | 71 | [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, |
71 | [ICMPV6_MLD2_REPORT - 130] = 1 | 72 | [ICMPV6_MLD2_REPORT - 130] = 1 |
72 | }; | 73 | }; |
73 | 74 | ||
74 | static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, | 75 | static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, |
75 | const struct nf_conntrack_tuple *orig) | 76 | const struct nf_conntrack_tuple *orig) |
76 | { | 77 | { |
77 | int type = orig->dst.u.icmp.type - 128; | 78 | int type = orig->dst.u.icmp.type - 128; |
78 | if (type < 0 || type >= sizeof(invmap) || !invmap[type]) | 79 | if (type < 0 || type >= sizeof(invmap) || !invmap[type]) |
79 | return false; | 80 | return false; |
80 | 81 | ||
81 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | 82 | tuple->src.u.icmp.id = orig->src.u.icmp.id; |
82 | tuple->dst.u.icmp.type = invmap[type] - 1; | 83 | tuple->dst.u.icmp.type = invmap[type] - 1; |
83 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | 84 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; |
84 | return true; | 85 | return true; |
85 | } | 86 | } |
86 | 87 | ||
87 | static unsigned int *icmpv6_get_timeouts(struct net *net) | 88 | static unsigned int *icmpv6_get_timeouts(struct net *net) |
88 | { | 89 | { |
89 | return &icmpv6_pernet(net)->timeout; | 90 | return &icmpv6_pernet(net)->timeout; |
90 | } | 91 | } |
91 | 92 | ||
92 | /* Returns verdict for packet, or -1 for invalid. */ | 93 | /* Returns verdict for packet, or -1 for invalid. */ |
93 | static int icmpv6_packet(struct nf_conn *ct, | 94 | static int icmpv6_packet(struct nf_conn *ct, |
94 | const struct sk_buff *skb, | 95 | const struct sk_buff *skb, |
95 | unsigned int dataoff, | 96 | unsigned int dataoff, |
96 | enum ip_conntrack_info ctinfo, | 97 | enum ip_conntrack_info ctinfo) |
97 | unsigned int *timeout) | ||
98 | { | 98 | { |
99 | unsigned int *timeout = nf_ct_timeout_lookup(ct); | ||
100 | |||
101 | if (!timeout) | ||
102 | timeout = icmpv6_get_timeouts(nf_ct_net(ct)); | ||
103 | |||
99 | /* Do not immediately delete the connection after the first | 104 | /* Do not immediately delete the connection after the first |
100 | successful reply to avoid excessive conntrackd traffic | 105 | successful reply to avoid excessive conntrackd traffic |
101 | and also to handle correctly ICMP echo reply duplicates. */ | 106 | and also to handle correctly ICMP echo reply duplicates. */ |
102 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); | 107 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); |
103 | 108 | ||
104 | return NF_ACCEPT; | 109 | return NF_ACCEPT; |
105 | } | 110 | } |
106 | 111 | ||
107 | /* Called when a new connection for this protocol found. */ | 112 | /* Called when a new connection for this protocol found. */ |
108 | static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, | 113 | static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, |
109 | unsigned int dataoff, unsigned int *timeouts) | 114 | unsigned int dataoff) |
110 | { | 115 | { |
111 | static const u_int8_t valid_new[] = { | 116 | static const u_int8_t valid_new[] = { |
112 | [ICMPV6_ECHO_REQUEST - 128] = 1, | 117 | [ICMPV6_ECHO_REQUEST - 128] = 1, |
113 | [ICMPV6_NI_QUERY - 128] = 1 | 118 | [ICMPV6_NI_QUERY - 128] = 1 |
114 | }; | 119 | }; |
115 | int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; | 120 | int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; |
116 | 121 | ||
117 | if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { | 122 | if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { |
118 | /* Can't create a new ICMPv6 `conn' with this. */ | 123 | /* Can't create a new ICMPv6 `conn' with this. */ |
119 | pr_debug("icmpv6: can't create new conn with type %u\n", | 124 | pr_debug("icmpv6: can't create new conn with type %u\n", |
120 | type + 128); | 125 | type + 128); |
121 | nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); | 126 | nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); |
122 | return false; | 127 | return false; |
123 | } | 128 | } |
124 | return true; | 129 | return true; |
125 | } | 130 | } |
126 | 131 | ||
127 | static int | 132 | static int |
128 | icmpv6_error_message(struct net *net, struct nf_conn *tmpl, | 133 | icmpv6_error_message(struct net *net, struct nf_conn *tmpl, |
129 | struct sk_buff *skb, | 134 | struct sk_buff *skb, |
130 | unsigned int icmp6off) | 135 | unsigned int icmp6off) |
131 | { | 136 | { |
132 | struct nf_conntrack_tuple intuple, origtuple; | 137 | struct nf_conntrack_tuple intuple, origtuple; |
133 | const struct nf_conntrack_tuple_hash *h; | 138 | const struct nf_conntrack_tuple_hash *h; |
134 | const struct nf_conntrack_l4proto *inproto; | 139 | const struct nf_conntrack_l4proto *inproto; |
135 | enum ip_conntrack_info ctinfo; | 140 | enum ip_conntrack_info ctinfo; |
136 | struct nf_conntrack_zone tmp; | 141 | struct nf_conntrack_zone tmp; |
137 | 142 | ||
138 | WARN_ON(skb_nfct(skb)); | 143 | WARN_ON(skb_nfct(skb)); |
139 | 144 | ||
140 | /* Are they talking about one of our connections? */ | 145 | /* Are they talking about one of our connections? */ |
141 | if (!nf_ct_get_tuplepr(skb, | 146 | if (!nf_ct_get_tuplepr(skb, |
142 | skb_network_offset(skb) | 147 | skb_network_offset(skb) |
143 | + sizeof(struct ipv6hdr) | 148 | + sizeof(struct ipv6hdr) |
144 | + sizeof(struct icmp6hdr), | 149 | + sizeof(struct icmp6hdr), |
145 | PF_INET6, net, &origtuple)) { | 150 | PF_INET6, net, &origtuple)) { |
146 | pr_debug("icmpv6_error: Can't get tuple\n"); | 151 | pr_debug("icmpv6_error: Can't get tuple\n"); |
147 | return -NF_ACCEPT; | 152 | return -NF_ACCEPT; |
148 | } | 153 | } |
149 | 154 | ||
150 | /* rcu_read_lock()ed by nf_hook_thresh */ | 155 | /* rcu_read_lock()ed by nf_hook_thresh */ |
151 | inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); | 156 | inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); |
152 | 157 | ||
153 | /* Ordinarily, we'd expect the inverted tupleproto, but it's | 158 | /* Ordinarily, we'd expect the inverted tupleproto, but it's |
154 | been preserved inside the ICMP. */ | 159 | been preserved inside the ICMP. */ |
155 | if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { | 160 | if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { |
156 | pr_debug("icmpv6_error: Can't invert tuple\n"); | 161 | pr_debug("icmpv6_error: Can't invert tuple\n"); |
157 | return -NF_ACCEPT; | 162 | return -NF_ACCEPT; |
158 | } | 163 | } |
159 | 164 | ||
160 | ctinfo = IP_CT_RELATED; | 165 | ctinfo = IP_CT_RELATED; |
161 | 166 | ||
162 | h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), | 167 | h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), |
163 | &intuple); | 168 | &intuple); |
164 | if (!h) { | 169 | if (!h) { |
165 | pr_debug("icmpv6_error: no match\n"); | 170 | pr_debug("icmpv6_error: no match\n"); |
166 | return -NF_ACCEPT; | 171 | return -NF_ACCEPT; |
167 | } else { | 172 | } else { |
168 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | 173 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) |
169 | ctinfo += IP_CT_IS_REPLY; | 174 | ctinfo += IP_CT_IS_REPLY; |
170 | } | 175 | } |
171 | 176 | ||
172 | /* Update skb to refer to this connection */ | 177 | /* Update skb to refer to this connection */ |
173 | nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); | 178 | nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); |
174 | return NF_ACCEPT; | 179 | return NF_ACCEPT; |
175 | } | 180 | } |
176 | 181 | ||
177 | static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, | 182 | static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, |
178 | u8 pf, const char *msg) | 183 | u8 pf, const char *msg) |
179 | { | 184 | { |
180 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); | 185 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); |
181 | } | 186 | } |
182 | 187 | ||
183 | static int | 188 | static int |
184 | icmpv6_error(struct net *net, struct nf_conn *tmpl, | 189 | icmpv6_error(struct net *net, struct nf_conn *tmpl, |
185 | struct sk_buff *skb, unsigned int dataoff, | 190 | struct sk_buff *skb, unsigned int dataoff, |
186 | u8 pf, unsigned int hooknum) | 191 | u8 pf, unsigned int hooknum) |
187 | { | 192 | { |
188 | const struct icmp6hdr *icmp6h; | 193 | const struct icmp6hdr *icmp6h; |
189 | struct icmp6hdr _ih; | 194 | struct icmp6hdr _ih; |
190 | int type; | 195 | int type; |
191 | 196 | ||
192 | icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); | 197 | icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); |
193 | if (icmp6h == NULL) { | 198 | if (icmp6h == NULL) { |
194 | icmpv6_error_log(skb, net, pf, "short packet"); | 199 | icmpv6_error_log(skb, net, pf, "short packet"); |
195 | return -NF_ACCEPT; | 200 | return -NF_ACCEPT; |
196 | } | 201 | } |
197 | 202 | ||
198 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 203 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
199 | nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { | 204 | nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { |
200 | icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); | 205 | icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); |
201 | return -NF_ACCEPT; | 206 | return -NF_ACCEPT; |
202 | } | 207 | } |
203 | 208 | ||
204 | type = icmp6h->icmp6_type - 130; | 209 | type = icmp6h->icmp6_type - 130; |
205 | if (type >= 0 && type < sizeof(noct_valid_new) && | 210 | if (type >= 0 && type < sizeof(noct_valid_new) && |
206 | noct_valid_new[type]) { | 211 | noct_valid_new[type]) { |
207 | nf_ct_set(skb, NULL, IP_CT_UNTRACKED); | 212 | nf_ct_set(skb, NULL, IP_CT_UNTRACKED); |
208 | return NF_ACCEPT; | 213 | return NF_ACCEPT; |
209 | } | 214 | } |
210 | 215 | ||
211 | /* is not error message ? */ | 216 | /* is not error message ? */ |
212 | if (icmp6h->icmp6_type >= 128) | 217 | if (icmp6h->icmp6_type >= 128) |
213 | return NF_ACCEPT; | 218 | return NF_ACCEPT; |
214 | 219 | ||
215 | return icmpv6_error_message(net, tmpl, skb, dataoff); | 220 | return icmpv6_error_message(net, tmpl, skb, dataoff); |
216 | } | 221 | } |
217 | 222 | ||
218 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 223 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
219 | 224 | ||
220 | #include <linux/netfilter/nfnetlink.h> | 225 | #include <linux/netfilter/nfnetlink.h> |
221 | #include <linux/netfilter/nfnetlink_conntrack.h> | 226 | #include <linux/netfilter/nfnetlink_conntrack.h> |
222 | static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, | 227 | static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, |
223 | const struct nf_conntrack_tuple *t) | 228 | const struct nf_conntrack_tuple *t) |
224 | { | 229 | { |
225 | if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || | 230 | if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || |
226 | nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || | 231 | nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || |
227 | nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) | 232 | nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) |
228 | goto nla_put_failure; | 233 | goto nla_put_failure; |
229 | return 0; | 234 | return 0; |
230 | 235 | ||
231 | nla_put_failure: | 236 | nla_put_failure: |
232 | return -1; | 237 | return -1; |
233 | } | 238 | } |
234 | 239 | ||
235 | static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { | 240 | static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { |
236 | [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, | 241 | [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, |
237 | [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, | 242 | [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, |
238 | [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, | 243 | [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, |
239 | }; | 244 | }; |
240 | 245 | ||
241 | static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], | 246 | static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], |
242 | struct nf_conntrack_tuple *tuple) | 247 | struct nf_conntrack_tuple *tuple) |
243 | { | 248 | { |
244 | if (!tb[CTA_PROTO_ICMPV6_TYPE] || | 249 | if (!tb[CTA_PROTO_ICMPV6_TYPE] || |
245 | !tb[CTA_PROTO_ICMPV6_CODE] || | 250 | !tb[CTA_PROTO_ICMPV6_CODE] || |
246 | !tb[CTA_PROTO_ICMPV6_ID]) | 251 | !tb[CTA_PROTO_ICMPV6_ID]) |
247 | return -EINVAL; | 252 | return -EINVAL; |
248 | 253 | ||
249 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); | 254 | tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); |
250 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); | 255 | tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); |
251 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); | 256 | tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); |
252 | 257 | ||
253 | if (tuple->dst.u.icmp.type < 128 || | 258 | if (tuple->dst.u.icmp.type < 128 || |
254 | tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || | 259 | tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || |
255 | !invmap[tuple->dst.u.icmp.type - 128]) | 260 | !invmap[tuple->dst.u.icmp.type - 128]) |
256 | return -EINVAL; | 261 | return -EINVAL; |
257 | 262 | ||
258 | return 0; | 263 | return 0; |
259 | } | 264 | } |
260 | 265 | ||
261 | static unsigned int icmpv6_nlattr_tuple_size(void) | 266 | static unsigned int icmpv6_nlattr_tuple_size(void) |
262 | { | 267 | { |
263 | static unsigned int size __read_mostly; | 268 | static unsigned int size __read_mostly; |
264 | 269 | ||
265 | if (!size) | 270 | if (!size) |
266 | size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); | 271 | size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); |
267 | 272 | ||
268 | return size; | 273 | return size; |
269 | } | 274 | } |
270 | #endif | 275 | #endif |
271 | 276 | ||
272 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 277 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
273 | 278 | ||
274 | #include <linux/netfilter/nfnetlink.h> | 279 | #include <linux/netfilter/nfnetlink.h> |
275 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 280 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
276 | 281 | ||
277 | static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], | 282 | static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], |
278 | struct net *net, void *data) | 283 | struct net *net, void *data) |
279 | { | 284 | { |
280 | unsigned int *timeout = data; | 285 | unsigned int *timeout = data; |
281 | struct nf_icmp_net *in = icmpv6_pernet(net); | 286 | struct nf_icmp_net *in = icmpv6_pernet(net); |
282 | 287 | ||
288 | if (!timeout) | ||
289 | timeout = icmpv6_get_timeouts(net); | ||
283 | if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { | 290 | if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { |
284 | *timeout = | 291 | *timeout = |
285 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; | 292 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; |
286 | } else { | 293 | } else { |
287 | /* Set default ICMPv6 timeout. */ | 294 | /* Set default ICMPv6 timeout. */ |
288 | *timeout = in->timeout; | 295 | *timeout = in->timeout; |
289 | } | 296 | } |
290 | return 0; | 297 | return 0; |
291 | } | 298 | } |
292 | 299 | ||
293 | static int | 300 | static int |
294 | icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 301 | icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
295 | { | 302 | { |
296 | const unsigned int *timeout = data; | 303 | const unsigned int *timeout = data; |
297 | 304 | ||
298 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) | 305 | if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) |
299 | goto nla_put_failure; | 306 | goto nla_put_failure; |
300 | return 0; | 307 | return 0; |
301 | 308 | ||
302 | nla_put_failure: | 309 | nla_put_failure: |
303 | return -ENOSPC; | 310 | return -ENOSPC; |
304 | } | 311 | } |
305 | 312 | ||
306 | static const struct nla_policy | 313 | static const struct nla_policy |
307 | icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { | 314 | icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { |
308 | [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, | 315 | [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, |
309 | }; | 316 | }; |
310 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 317 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
311 | 318 | ||
312 | #ifdef CONFIG_SYSCTL | 319 | #ifdef CONFIG_SYSCTL |
313 | static struct ctl_table icmpv6_sysctl_table[] = { | 320 | static struct ctl_table icmpv6_sysctl_table[] = { |
314 | { | 321 | { |
315 | .procname = "nf_conntrack_icmpv6_timeout", | 322 | .procname = "nf_conntrack_icmpv6_timeout", |
316 | .maxlen = sizeof(unsigned int), | 323 | .maxlen = sizeof(unsigned int), |
317 | .mode = 0644, | 324 | .mode = 0644, |
318 | .proc_handler = proc_dointvec_jiffies, | 325 | .proc_handler = proc_dointvec_jiffies, |
319 | }, | 326 | }, |
320 | { } | 327 | { } |
321 | }; | 328 | }; |
322 | #endif /* CONFIG_SYSCTL */ | 329 | #endif /* CONFIG_SYSCTL */ |
323 | 330 | ||
324 | static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, | 331 | static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, |
325 | struct nf_icmp_net *in) | 332 | struct nf_icmp_net *in) |
326 | { | 333 | { |
327 | #ifdef CONFIG_SYSCTL | 334 | #ifdef CONFIG_SYSCTL |
328 | pn->ctl_table = kmemdup(icmpv6_sysctl_table, | 335 | pn->ctl_table = kmemdup(icmpv6_sysctl_table, |
329 | sizeof(icmpv6_sysctl_table), | 336 | sizeof(icmpv6_sysctl_table), |
330 | GFP_KERNEL); | 337 | GFP_KERNEL); |
331 | if (!pn->ctl_table) | 338 | if (!pn->ctl_table) |
332 | return -ENOMEM; | 339 | return -ENOMEM; |
333 | 340 | ||
334 | pn->ctl_table[0].data = &in->timeout; | 341 | pn->ctl_table[0].data = &in->timeout; |
335 | #endif | 342 | #endif |
336 | return 0; | 343 | return 0; |
337 | } | 344 | } |
338 | 345 | ||
339 | static int icmpv6_init_net(struct net *net, u_int16_t proto) | 346 | static int icmpv6_init_net(struct net *net, u_int16_t proto) |
340 | { | 347 | { |
341 | struct nf_icmp_net *in = icmpv6_pernet(net); | 348 | struct nf_icmp_net *in = icmpv6_pernet(net); |
342 | struct nf_proto_net *pn = &in->pn; | 349 | struct nf_proto_net *pn = &in->pn; |
343 | 350 | ||
344 | in->timeout = nf_ct_icmpv6_timeout; | 351 | in->timeout = nf_ct_icmpv6_timeout; |
345 | 352 | ||
346 | return icmpv6_kmemdup_sysctl_table(pn, in); | 353 | return icmpv6_kmemdup_sysctl_table(pn, in); |
347 | } | 354 | } |
348 | 355 | ||
349 | static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) | 356 | static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) |
350 | { | 357 | { |
351 | return &net->ct.nf_ct_proto.icmpv6.pn; | 358 | return &net->ct.nf_ct_proto.icmpv6.pn; |
352 | } | 359 | } |
353 | 360 | ||
354 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = | 361 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = |
355 | { | 362 | { |
356 | .l3proto = PF_INET6, | 363 | .l3proto = PF_INET6, |
357 | .l4proto = IPPROTO_ICMPV6, | 364 | .l4proto = IPPROTO_ICMPV6, |
358 | .pkt_to_tuple = icmpv6_pkt_to_tuple, | 365 | .pkt_to_tuple = icmpv6_pkt_to_tuple, |
359 | .invert_tuple = icmpv6_invert_tuple, | 366 | .invert_tuple = icmpv6_invert_tuple, |
360 | .packet = icmpv6_packet, | 367 | .packet = icmpv6_packet, |
361 | .get_timeouts = icmpv6_get_timeouts, | ||
362 | .new = icmpv6_new, | 368 | .new = icmpv6_new, |
363 | .error = icmpv6_error, | 369 | .error = icmpv6_error, |
364 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 370 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
365 | .tuple_to_nlattr = icmpv6_tuple_to_nlattr, | 371 | .tuple_to_nlattr = icmpv6_tuple_to_nlattr, |
366 | .nlattr_tuple_size = icmpv6_nlattr_tuple_size, | 372 | .nlattr_tuple_size = icmpv6_nlattr_tuple_size, |
367 | .nlattr_to_tuple = icmpv6_nlattr_to_tuple, | 373 | .nlattr_to_tuple = icmpv6_nlattr_to_tuple, |
368 | .nla_policy = icmpv6_nla_policy, | 374 | .nla_policy = icmpv6_nla_policy, |
369 | #endif | 375 | #endif |
370 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 376 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
371 | .ctnl_timeout = { | 377 | .ctnl_timeout = { |
372 | .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, | 378 | .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, |
373 | .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, | 379 | .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, |
374 | .nlattr_max = CTA_TIMEOUT_ICMP_MAX, | 380 | .nlattr_max = CTA_TIMEOUT_ICMP_MAX, |
375 | .obj_size = sizeof(unsigned int), | 381 | .obj_size = sizeof(unsigned int), |
376 | .nla_policy = icmpv6_timeout_nla_policy, | 382 | .nla_policy = icmpv6_timeout_nla_policy, |
377 | }, | 383 | }, |
378 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 384 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
379 | .init_net = icmpv6_init_net, | 385 | .init_net = icmpv6_init_net, |
380 | .get_net_proto = icmpv6_get_net_proto, | 386 | .get_net_proto = icmpv6_get_net_proto, |
net/netfilter/nf_conntrack_core.c
1 | /* Connection state tracking for netfilter. This is separated from, | 1 | /* Connection state tracking for netfilter. This is separated from, |
2 | but required by, the NAT layer; it can also be used by an iptables | 2 | but required by, the NAT layer; it can also be used by an iptables |
3 | extension. */ | 3 | extension. */ |
4 | 4 | ||
5 | /* (C) 1999-2001 Paul `Rusty' Russell | 5 | /* (C) 1999-2001 Paul `Rusty' Russell |
6 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | 6 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> |
7 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> | 7 | * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> |
8 | * (C) 2005-2012 Patrick McHardy <kaber@trash.net> | 8 | * (C) 2005-2012 Patrick McHardy <kaber@trash.net> |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 as | 11 | * it under the terms of the GNU General Public License version 2 as |
12 | * published by the Free Software Foundation. | 12 | * published by the Free Software Foundation. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
16 | 16 | ||
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/netfilter.h> | 18 | #include <linux/netfilter.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/proc_fs.h> | 22 | #include <linux/proc_fs.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/stddef.h> | 24 | #include <linux/stddef.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/random.h> | 26 | #include <linux/random.h> |
27 | #include <linux/jhash.h> | 27 | #include <linux/jhash.h> |
28 | #include <linux/err.h> | 28 | #include <linux/err.h> |
29 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | #include <linux/moduleparam.h> | 30 | #include <linux/moduleparam.h> |
31 | #include <linux/notifier.h> | 31 | #include <linux/notifier.h> |
32 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
33 | #include <linux/netdevice.h> | 33 | #include <linux/netdevice.h> |
34 | #include <linux/socket.h> | 34 | #include <linux/socket.h> |
35 | #include <linux/mm.h> | 35 | #include <linux/mm.h> |
36 | #include <linux/nsproxy.h> | 36 | #include <linux/nsproxy.h> |
37 | #include <linux/rculist_nulls.h> | 37 | #include <linux/rculist_nulls.h> |
38 | 38 | ||
39 | #include <net/netfilter/nf_conntrack.h> | 39 | #include <net/netfilter/nf_conntrack.h> |
40 | #include <net/netfilter/nf_conntrack_l4proto.h> | 40 | #include <net/netfilter/nf_conntrack_l4proto.h> |
41 | #include <net/netfilter/nf_conntrack_expect.h> | 41 | #include <net/netfilter/nf_conntrack_expect.h> |
42 | #include <net/netfilter/nf_conntrack_helper.h> | 42 | #include <net/netfilter/nf_conntrack_helper.h> |
43 | #include <net/netfilter/nf_conntrack_seqadj.h> | 43 | #include <net/netfilter/nf_conntrack_seqadj.h> |
44 | #include <net/netfilter/nf_conntrack_core.h> | 44 | #include <net/netfilter/nf_conntrack_core.h> |
45 | #include <net/netfilter/nf_conntrack_extend.h> | 45 | #include <net/netfilter/nf_conntrack_extend.h> |
46 | #include <net/netfilter/nf_conntrack_acct.h> | 46 | #include <net/netfilter/nf_conntrack_acct.h> |
47 | #include <net/netfilter/nf_conntrack_ecache.h> | 47 | #include <net/netfilter/nf_conntrack_ecache.h> |
48 | #include <net/netfilter/nf_conntrack_zones.h> | 48 | #include <net/netfilter/nf_conntrack_zones.h> |
49 | #include <net/netfilter/nf_conntrack_timestamp.h> | 49 | #include <net/netfilter/nf_conntrack_timestamp.h> |
50 | #include <net/netfilter/nf_conntrack_timeout.h> | 50 | #include <net/netfilter/nf_conntrack_timeout.h> |
51 | #include <net/netfilter/nf_conntrack_labels.h> | 51 | #include <net/netfilter/nf_conntrack_labels.h> |
52 | #include <net/netfilter/nf_conntrack_synproxy.h> | 52 | #include <net/netfilter/nf_conntrack_synproxy.h> |
53 | #include <net/netfilter/nf_nat.h> | 53 | #include <net/netfilter/nf_nat.h> |
54 | #include <net/netfilter/nf_nat_core.h> | 54 | #include <net/netfilter/nf_nat_core.h> |
55 | #include <net/netfilter/nf_nat_helper.h> | 55 | #include <net/netfilter/nf_nat_helper.h> |
56 | #include <net/netns/hash.h> | 56 | #include <net/netns/hash.h> |
57 | #include <net/ip.h> | 57 | #include <net/ip.h> |
58 | 58 | ||
59 | #include "nf_internals.h" | 59 | #include "nf_internals.h" |
60 | 60 | ||
61 | __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; | 61 | __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; |
62 | EXPORT_SYMBOL_GPL(nf_conntrack_locks); | 62 | EXPORT_SYMBOL_GPL(nf_conntrack_locks); |
63 | 63 | ||
64 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); | 64 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); |
65 | EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); | 65 | EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); |
66 | 66 | ||
67 | struct hlist_nulls_head *nf_conntrack_hash __read_mostly; | 67 | struct hlist_nulls_head *nf_conntrack_hash __read_mostly; |
68 | EXPORT_SYMBOL_GPL(nf_conntrack_hash); | 68 | EXPORT_SYMBOL_GPL(nf_conntrack_hash); |
69 | 69 | ||
70 | struct conntrack_gc_work { | 70 | struct conntrack_gc_work { |
71 | struct delayed_work dwork; | 71 | struct delayed_work dwork; |
72 | u32 last_bucket; | 72 | u32 last_bucket; |
73 | bool exiting; | 73 | bool exiting; |
74 | bool early_drop; | 74 | bool early_drop; |
75 | long next_gc_run; | 75 | long next_gc_run; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static __read_mostly struct kmem_cache *nf_conntrack_cachep; | 78 | static __read_mostly struct kmem_cache *nf_conntrack_cachep; |
79 | static __read_mostly spinlock_t nf_conntrack_locks_all_lock; | 79 | static __read_mostly spinlock_t nf_conntrack_locks_all_lock; |
80 | static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); | 80 | static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); |
81 | static __read_mostly bool nf_conntrack_locks_all; | 81 | static __read_mostly bool nf_conntrack_locks_all; |
82 | 82 | ||
83 | /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ | 83 | /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ |
84 | #define GC_MAX_BUCKETS_DIV 128u | 84 | #define GC_MAX_BUCKETS_DIV 128u |
85 | /* upper bound of full table scan */ | 85 | /* upper bound of full table scan */ |
86 | #define GC_MAX_SCAN_JIFFIES (16u * HZ) | 86 | #define GC_MAX_SCAN_JIFFIES (16u * HZ) |
87 | /* desired ratio of entries found to be expired */ | 87 | /* desired ratio of entries found to be expired */ |
88 | #define GC_EVICT_RATIO 50u | 88 | #define GC_EVICT_RATIO 50u |
89 | 89 | ||
90 | static struct conntrack_gc_work conntrack_gc_work; | 90 | static struct conntrack_gc_work conntrack_gc_work; |
91 | 91 | ||
92 | void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) | 92 | void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) |
93 | { | 93 | { |
94 | /* 1) Acquire the lock */ | 94 | /* 1) Acquire the lock */ |
95 | spin_lock(lock); | 95 | spin_lock(lock); |
96 | 96 | ||
97 | /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics | 97 | /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics |
98 | * It pairs with the smp_store_release() in nf_conntrack_all_unlock() | 98 | * It pairs with the smp_store_release() in nf_conntrack_all_unlock() |
99 | */ | 99 | */ |
100 | if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) | 100 | if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) |
101 | return; | 101 | return; |
102 | 102 | ||
103 | /* fast path failed, unlock */ | 103 | /* fast path failed, unlock */ |
104 | spin_unlock(lock); | 104 | spin_unlock(lock); |
105 | 105 | ||
106 | /* Slow path 1) get global lock */ | 106 | /* Slow path 1) get global lock */ |
107 | spin_lock(&nf_conntrack_locks_all_lock); | 107 | spin_lock(&nf_conntrack_locks_all_lock); |
108 | 108 | ||
109 | /* Slow path 2) get the lock we want */ | 109 | /* Slow path 2) get the lock we want */ |
110 | spin_lock(lock); | 110 | spin_lock(lock); |
111 | 111 | ||
112 | /* Slow path 3) release the global lock */ | 112 | /* Slow path 3) release the global lock */ |
113 | spin_unlock(&nf_conntrack_locks_all_lock); | 113 | spin_unlock(&nf_conntrack_locks_all_lock); |
114 | } | 114 | } |
115 | EXPORT_SYMBOL_GPL(nf_conntrack_lock); | 115 | EXPORT_SYMBOL_GPL(nf_conntrack_lock); |
116 | 116 | ||
117 | static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) | 117 | static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) |
118 | { | 118 | { |
119 | h1 %= CONNTRACK_LOCKS; | 119 | h1 %= CONNTRACK_LOCKS; |
120 | h2 %= CONNTRACK_LOCKS; | 120 | h2 %= CONNTRACK_LOCKS; |
121 | spin_unlock(&nf_conntrack_locks[h1]); | 121 | spin_unlock(&nf_conntrack_locks[h1]); |
122 | if (h1 != h2) | 122 | if (h1 != h2) |
123 | spin_unlock(&nf_conntrack_locks[h2]); | 123 | spin_unlock(&nf_conntrack_locks[h2]); |
124 | } | 124 | } |
125 | 125 | ||
126 | /* return true if we need to recompute hashes (in case hash table was resized) */ | 126 | /* return true if we need to recompute hashes (in case hash table was resized) */ |
127 | static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, | 127 | static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, |
128 | unsigned int h2, unsigned int sequence) | 128 | unsigned int h2, unsigned int sequence) |
129 | { | 129 | { |
130 | h1 %= CONNTRACK_LOCKS; | 130 | h1 %= CONNTRACK_LOCKS; |
131 | h2 %= CONNTRACK_LOCKS; | 131 | h2 %= CONNTRACK_LOCKS; |
132 | if (h1 <= h2) { | 132 | if (h1 <= h2) { |
133 | nf_conntrack_lock(&nf_conntrack_locks[h1]); | 133 | nf_conntrack_lock(&nf_conntrack_locks[h1]); |
134 | if (h1 != h2) | 134 | if (h1 != h2) |
135 | spin_lock_nested(&nf_conntrack_locks[h2], | 135 | spin_lock_nested(&nf_conntrack_locks[h2], |
136 | SINGLE_DEPTH_NESTING); | 136 | SINGLE_DEPTH_NESTING); |
137 | } else { | 137 | } else { |
138 | nf_conntrack_lock(&nf_conntrack_locks[h2]); | 138 | nf_conntrack_lock(&nf_conntrack_locks[h2]); |
139 | spin_lock_nested(&nf_conntrack_locks[h1], | 139 | spin_lock_nested(&nf_conntrack_locks[h1], |
140 | SINGLE_DEPTH_NESTING); | 140 | SINGLE_DEPTH_NESTING); |
141 | } | 141 | } |
142 | if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { | 142 | if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { |
143 | nf_conntrack_double_unlock(h1, h2); | 143 | nf_conntrack_double_unlock(h1, h2); |
144 | return true; | 144 | return true; |
145 | } | 145 | } |
146 | return false; | 146 | return false; |
147 | } | 147 | } |
148 | 148 | ||
149 | static void nf_conntrack_all_lock(void) | 149 | static void nf_conntrack_all_lock(void) |
150 | { | 150 | { |
151 | int i; | 151 | int i; |
152 | 152 | ||
153 | spin_lock(&nf_conntrack_locks_all_lock); | 153 | spin_lock(&nf_conntrack_locks_all_lock); |
154 | 154 | ||
155 | nf_conntrack_locks_all = true; | 155 | nf_conntrack_locks_all = true; |
156 | 156 | ||
157 | for (i = 0; i < CONNTRACK_LOCKS; i++) { | 157 | for (i = 0; i < CONNTRACK_LOCKS; i++) { |
158 | spin_lock(&nf_conntrack_locks[i]); | 158 | spin_lock(&nf_conntrack_locks[i]); |
159 | 159 | ||
160 | /* This spin_unlock provides the "release" to ensure that | 160 | /* This spin_unlock provides the "release" to ensure that |
161 | * nf_conntrack_locks_all==true is visible to everyone that | 161 | * nf_conntrack_locks_all==true is visible to everyone that |
162 | * acquired spin_lock(&nf_conntrack_locks[]). | 162 | * acquired spin_lock(&nf_conntrack_locks[]). |
163 | */ | 163 | */ |
164 | spin_unlock(&nf_conntrack_locks[i]); | 164 | spin_unlock(&nf_conntrack_locks[i]); |
165 | } | 165 | } |
166 | } | 166 | } |
167 | 167 | ||
168 | static void nf_conntrack_all_unlock(void) | 168 | static void nf_conntrack_all_unlock(void) |
169 | { | 169 | { |
170 | /* All prior stores must be complete before we clear | 170 | /* All prior stores must be complete before we clear |
171 | * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() | 171 | * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() |
172 | * might observe the false value but not the entire | 172 | * might observe the false value but not the entire |
173 | * critical section. | 173 | * critical section. |
174 | * It pairs with the smp_load_acquire() in nf_conntrack_lock() | 174 | * It pairs with the smp_load_acquire() in nf_conntrack_lock() |
175 | */ | 175 | */ |
176 | smp_store_release(&nf_conntrack_locks_all, false); | 176 | smp_store_release(&nf_conntrack_locks_all, false); |
177 | spin_unlock(&nf_conntrack_locks_all_lock); | 177 | spin_unlock(&nf_conntrack_locks_all_lock); |
178 | } | 178 | } |
179 | 179 | ||
180 | unsigned int nf_conntrack_htable_size __read_mostly; | 180 | unsigned int nf_conntrack_htable_size __read_mostly; |
181 | EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); | 181 | EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); |
182 | 182 | ||
183 | unsigned int nf_conntrack_max __read_mostly; | 183 | unsigned int nf_conntrack_max __read_mostly; |
184 | EXPORT_SYMBOL_GPL(nf_conntrack_max); | 184 | EXPORT_SYMBOL_GPL(nf_conntrack_max); |
185 | seqcount_t nf_conntrack_generation __read_mostly; | 185 | seqcount_t nf_conntrack_generation __read_mostly; |
186 | static unsigned int nf_conntrack_hash_rnd __read_mostly; | 186 | static unsigned int nf_conntrack_hash_rnd __read_mostly; |
187 | 187 | ||
188 | static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, | 188 | static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, |
189 | const struct net *net) | 189 | const struct net *net) |
190 | { | 190 | { |
191 | unsigned int n; | 191 | unsigned int n; |
192 | u32 seed; | 192 | u32 seed; |
193 | 193 | ||
194 | get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); | 194 | get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); |
195 | 195 | ||
196 | /* The direction must be ignored, so we hash everything up to the | 196 | /* The direction must be ignored, so we hash everything up to the |
197 | * destination ports (which is a multiple of 4) and treat the last | 197 | * destination ports (which is a multiple of 4) and treat the last |
198 | * three bytes manually. | 198 | * three bytes manually. |
199 | */ | 199 | */ |
200 | seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); | 200 | seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); |
201 | n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); | 201 | n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); |
202 | return jhash2((u32 *)tuple, n, seed ^ | 202 | return jhash2((u32 *)tuple, n, seed ^ |
203 | (((__force __u16)tuple->dst.u.all << 16) | | 203 | (((__force __u16)tuple->dst.u.all << 16) | |
204 | tuple->dst.protonum)); | 204 | tuple->dst.protonum)); |
205 | } | 205 | } |
206 | 206 | ||
207 | static u32 scale_hash(u32 hash) | 207 | static u32 scale_hash(u32 hash) |
208 | { | 208 | { |
209 | return reciprocal_scale(hash, nf_conntrack_htable_size); | 209 | return reciprocal_scale(hash, nf_conntrack_htable_size); |
210 | } | 210 | } |
211 | 211 | ||
212 | static u32 __hash_conntrack(const struct net *net, | 212 | static u32 __hash_conntrack(const struct net *net, |
213 | const struct nf_conntrack_tuple *tuple, | 213 | const struct nf_conntrack_tuple *tuple, |
214 | unsigned int size) | 214 | unsigned int size) |
215 | { | 215 | { |
216 | return reciprocal_scale(hash_conntrack_raw(tuple, net), size); | 216 | return reciprocal_scale(hash_conntrack_raw(tuple, net), size); |
217 | } | 217 | } |
218 | 218 | ||
219 | static u32 hash_conntrack(const struct net *net, | 219 | static u32 hash_conntrack(const struct net *net, |
220 | const struct nf_conntrack_tuple *tuple) | 220 | const struct nf_conntrack_tuple *tuple) |
221 | { | 221 | { |
222 | return scale_hash(hash_conntrack_raw(tuple, net)); | 222 | return scale_hash(hash_conntrack_raw(tuple, net)); |
223 | } | 223 | } |
224 | 224 | ||
225 | static bool | 225 | static bool |
226 | nf_ct_get_tuple(const struct sk_buff *skb, | 226 | nf_ct_get_tuple(const struct sk_buff *skb, |
227 | unsigned int nhoff, | 227 | unsigned int nhoff, |
228 | unsigned int dataoff, | 228 | unsigned int dataoff, |
229 | u_int16_t l3num, | 229 | u_int16_t l3num, |
230 | u_int8_t protonum, | 230 | u_int8_t protonum, |
231 | struct net *net, | 231 | struct net *net, |
232 | struct nf_conntrack_tuple *tuple, | 232 | struct nf_conntrack_tuple *tuple, |
233 | const struct nf_conntrack_l4proto *l4proto) | 233 | const struct nf_conntrack_l4proto *l4proto) |
234 | { | 234 | { |
235 | unsigned int size; | 235 | unsigned int size; |
236 | const __be32 *ap; | 236 | const __be32 *ap; |
237 | __be32 _addrs[8]; | 237 | __be32 _addrs[8]; |
238 | struct { | 238 | struct { |
239 | __be16 sport; | 239 | __be16 sport; |
240 | __be16 dport; | 240 | __be16 dport; |
241 | } _inet_hdr, *inet_hdr; | 241 | } _inet_hdr, *inet_hdr; |
242 | 242 | ||
243 | memset(tuple, 0, sizeof(*tuple)); | 243 | memset(tuple, 0, sizeof(*tuple)); |
244 | 244 | ||
245 | tuple->src.l3num = l3num; | 245 | tuple->src.l3num = l3num; |
246 | switch (l3num) { | 246 | switch (l3num) { |
247 | case NFPROTO_IPV4: | 247 | case NFPROTO_IPV4: |
248 | nhoff += offsetof(struct iphdr, saddr); | 248 | nhoff += offsetof(struct iphdr, saddr); |
249 | size = 2 * sizeof(__be32); | 249 | size = 2 * sizeof(__be32); |
250 | break; | 250 | break; |
251 | case NFPROTO_IPV6: | 251 | case NFPROTO_IPV6: |
252 | nhoff += offsetof(struct ipv6hdr, saddr); | 252 | nhoff += offsetof(struct ipv6hdr, saddr); |
253 | size = sizeof(_addrs); | 253 | size = sizeof(_addrs); |
254 | break; | 254 | break; |
255 | default: | 255 | default: |
256 | return true; | 256 | return true; |
257 | } | 257 | } |
258 | 258 | ||
259 | ap = skb_header_pointer(skb, nhoff, size, _addrs); | 259 | ap = skb_header_pointer(skb, nhoff, size, _addrs); |
260 | if (!ap) | 260 | if (!ap) |
261 | return false; | 261 | return false; |
262 | 262 | ||
263 | switch (l3num) { | 263 | switch (l3num) { |
264 | case NFPROTO_IPV4: | 264 | case NFPROTO_IPV4: |
265 | tuple->src.u3.ip = ap[0]; | 265 | tuple->src.u3.ip = ap[0]; |
266 | tuple->dst.u3.ip = ap[1]; | 266 | tuple->dst.u3.ip = ap[1]; |
267 | break; | 267 | break; |
268 | case NFPROTO_IPV6: | 268 | case NFPROTO_IPV6: |
269 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); | 269 | memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); |
270 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); | 270 | memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); |
271 | break; | 271 | break; |
272 | } | 272 | } |
273 | 273 | ||
274 | tuple->dst.protonum = protonum; | 274 | tuple->dst.protonum = protonum; |
275 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; | 275 | tuple->dst.dir = IP_CT_DIR_ORIGINAL; |
276 | 276 | ||
277 | if (unlikely(l4proto->pkt_to_tuple)) | 277 | if (unlikely(l4proto->pkt_to_tuple)) |
278 | return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); | 278 | return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); |
279 | 279 | ||
280 | /* Actually only need first 4 bytes to get ports. */ | 280 | /* Actually only need first 4 bytes to get ports. */ |
281 | inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); | 281 | inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); |
282 | if (!inet_hdr) | 282 | if (!inet_hdr) |
283 | return false; | 283 | return false; |
284 | 284 | ||
285 | tuple->src.u.udp.port = inet_hdr->sport; | 285 | tuple->src.u.udp.port = inet_hdr->sport; |
286 | tuple->dst.u.udp.port = inet_hdr->dport; | 286 | tuple->dst.u.udp.port = inet_hdr->dport; |
287 | return true; | 287 | return true; |
288 | } | 288 | } |
289 | 289 | ||
290 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | 290 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, |
291 | u_int8_t *protonum) | 291 | u_int8_t *protonum) |
292 | { | 292 | { |
293 | int dataoff = -1; | 293 | int dataoff = -1; |
294 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) | 294 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) |
295 | const struct iphdr *iph; | 295 | const struct iphdr *iph; |
296 | struct iphdr _iph; | 296 | struct iphdr _iph; |
297 | 297 | ||
298 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | 298 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); |
299 | if (!iph) | 299 | if (!iph) |
300 | return -1; | 300 | return -1; |
301 | 301 | ||
302 | /* Conntrack defragments packets, we might still see fragments | 302 | /* Conntrack defragments packets, we might still see fragments |
303 | * inside ICMP packets though. | 303 | * inside ICMP packets though. |
304 | */ | 304 | */ |
305 | if (iph->frag_off & htons(IP_OFFSET)) | 305 | if (iph->frag_off & htons(IP_OFFSET)) |
306 | return -1; | 306 | return -1; |
307 | 307 | ||
308 | dataoff = nhoff + (iph->ihl << 2); | 308 | dataoff = nhoff + (iph->ihl << 2); |
309 | *protonum = iph->protocol; | 309 | *protonum = iph->protocol; |
310 | 310 | ||
311 | /* Check bogus IP headers */ | 311 | /* Check bogus IP headers */ |
312 | if (dataoff > skb->len) { | 312 | if (dataoff > skb->len) { |
313 | pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", | 313 | pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", |
314 | nhoff, iph->ihl << 2, skb->len); | 314 | nhoff, iph->ihl << 2, skb->len); |
315 | return -1; | 315 | return -1; |
316 | } | 316 | } |
317 | #endif | 317 | #endif |
318 | return dataoff; | 318 | return dataoff; |
319 | } | 319 | } |
320 | 320 | ||
321 | static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | 321 | static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, |
322 | u8 *protonum) | 322 | u8 *protonum) |
323 | { | 323 | { |
324 | int protoff = -1; | 324 | int protoff = -1; |
325 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | 325 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) |
326 | unsigned int extoff = nhoff + sizeof(struct ipv6hdr); | 326 | unsigned int extoff = nhoff + sizeof(struct ipv6hdr); |
327 | __be16 frag_off; | 327 | __be16 frag_off; |
328 | u8 nexthdr; | 328 | u8 nexthdr; |
329 | 329 | ||
330 | if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), | 330 | if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), |
331 | &nexthdr, sizeof(nexthdr)) != 0) { | 331 | &nexthdr, sizeof(nexthdr)) != 0) { |
332 | pr_debug("can't get nexthdr\n"); | 332 | pr_debug("can't get nexthdr\n"); |
333 | return -1; | 333 | return -1; |
334 | } | 334 | } |
335 | protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); | 335 | protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); |
336 | /* | 336 | /* |
337 | * (protoff == skb->len) means the packet has not data, just | 337 | * (protoff == skb->len) means the packet has not data, just |
338 | * IPv6 and possibly extensions headers, but it is tracked anyway | 338 | * IPv6 and possibly extensions headers, but it is tracked anyway |
339 | */ | 339 | */ |
340 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { | 340 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { |
341 | pr_debug("can't find proto in pkt\n"); | 341 | pr_debug("can't find proto in pkt\n"); |
342 | return -1; | 342 | return -1; |
343 | } | 343 | } |
344 | 344 | ||
345 | *protonum = nexthdr; | 345 | *protonum = nexthdr; |
346 | #endif | 346 | #endif |
347 | return protoff; | 347 | return protoff; |
348 | } | 348 | } |
349 | 349 | ||
350 | static int get_l4proto(const struct sk_buff *skb, | 350 | static int get_l4proto(const struct sk_buff *skb, |
351 | unsigned int nhoff, u8 pf, u8 *l4num) | 351 | unsigned int nhoff, u8 pf, u8 *l4num) |
352 | { | 352 | { |
353 | switch (pf) { | 353 | switch (pf) { |
354 | case NFPROTO_IPV4: | 354 | case NFPROTO_IPV4: |
355 | return ipv4_get_l4proto(skb, nhoff, l4num); | 355 | return ipv4_get_l4proto(skb, nhoff, l4num); |
356 | case NFPROTO_IPV6: | 356 | case NFPROTO_IPV6: |
357 | return ipv6_get_l4proto(skb, nhoff, l4num); | 357 | return ipv6_get_l4proto(skb, nhoff, l4num); |
358 | default: | 358 | default: |
359 | *l4num = 0; | 359 | *l4num = 0; |
360 | break; | 360 | break; |
361 | } | 361 | } |
362 | return -1; | 362 | return -1; |
363 | } | 363 | } |
364 | 364 | ||
365 | bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, | 365 | bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, |
366 | u_int16_t l3num, | 366 | u_int16_t l3num, |
367 | struct net *net, struct nf_conntrack_tuple *tuple) | 367 | struct net *net, struct nf_conntrack_tuple *tuple) |
368 | { | 368 | { |
369 | const struct nf_conntrack_l4proto *l4proto; | 369 | const struct nf_conntrack_l4proto *l4proto; |
370 | u8 protonum; | 370 | u8 protonum; |
371 | int protoff; | 371 | int protoff; |
372 | int ret; | 372 | int ret; |
373 | 373 | ||
374 | rcu_read_lock(); | 374 | rcu_read_lock(); |
375 | 375 | ||
376 | protoff = get_l4proto(skb, nhoff, l3num, &protonum); | 376 | protoff = get_l4proto(skb, nhoff, l3num, &protonum); |
377 | if (protoff <= 0) { | 377 | if (protoff <= 0) { |
378 | rcu_read_unlock(); | 378 | rcu_read_unlock(); |
379 | return false; | 379 | return false; |
380 | } | 380 | } |
381 | 381 | ||
382 | l4proto = __nf_ct_l4proto_find(l3num, protonum); | 382 | l4proto = __nf_ct_l4proto_find(l3num, protonum); |
383 | 383 | ||
384 | ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, | 384 | ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, |
385 | l4proto); | 385 | l4proto); |
386 | 386 | ||
387 | rcu_read_unlock(); | 387 | rcu_read_unlock(); |
388 | return ret; | 388 | return ret; |
389 | } | 389 | } |
390 | EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); | 390 | EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); |
391 | 391 | ||
392 | bool | 392 | bool |
393 | nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, | 393 | nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, |
394 | const struct nf_conntrack_tuple *orig, | 394 | const struct nf_conntrack_tuple *orig, |
395 | const struct nf_conntrack_l4proto *l4proto) | 395 | const struct nf_conntrack_l4proto *l4proto) |
396 | { | 396 | { |
397 | memset(inverse, 0, sizeof(*inverse)); | 397 | memset(inverse, 0, sizeof(*inverse)); |
398 | 398 | ||
399 | inverse->src.l3num = orig->src.l3num; | 399 | inverse->src.l3num = orig->src.l3num; |
400 | 400 | ||
401 | switch (orig->src.l3num) { | 401 | switch (orig->src.l3num) { |
402 | case NFPROTO_IPV4: | 402 | case NFPROTO_IPV4: |
403 | inverse->src.u3.ip = orig->dst.u3.ip; | 403 | inverse->src.u3.ip = orig->dst.u3.ip; |
404 | inverse->dst.u3.ip = orig->src.u3.ip; | 404 | inverse->dst.u3.ip = orig->src.u3.ip; |
405 | break; | 405 | break; |
406 | case NFPROTO_IPV6: | 406 | case NFPROTO_IPV6: |
407 | inverse->src.u3.in6 = orig->dst.u3.in6; | 407 | inverse->src.u3.in6 = orig->dst.u3.in6; |
408 | inverse->dst.u3.in6 = orig->src.u3.in6; | 408 | inverse->dst.u3.in6 = orig->src.u3.in6; |
409 | break; | 409 | break; |
410 | default: | 410 | default: |
411 | break; | 411 | break; |
412 | } | 412 | } |
413 | 413 | ||
414 | inverse->dst.dir = !orig->dst.dir; | 414 | inverse->dst.dir = !orig->dst.dir; |
415 | 415 | ||
416 | inverse->dst.protonum = orig->dst.protonum; | 416 | inverse->dst.protonum = orig->dst.protonum; |
417 | 417 | ||
418 | if (unlikely(l4proto->invert_tuple)) | 418 | if (unlikely(l4proto->invert_tuple)) |
419 | return l4proto->invert_tuple(inverse, orig); | 419 | return l4proto->invert_tuple(inverse, orig); |
420 | 420 | ||
421 | inverse->src.u.all = orig->dst.u.all; | 421 | inverse->src.u.all = orig->dst.u.all; |
422 | inverse->dst.u.all = orig->src.u.all; | 422 | inverse->dst.u.all = orig->src.u.all; |
423 | return true; | 423 | return true; |
424 | } | 424 | } |
425 | EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); | 425 | EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); |
426 | 426 | ||
427 | static void | 427 | static void |
428 | clean_from_lists(struct nf_conn *ct) | 428 | clean_from_lists(struct nf_conn *ct) |
429 | { | 429 | { |
430 | pr_debug("clean_from_lists(%p)\n", ct); | 430 | pr_debug("clean_from_lists(%p)\n", ct); |
431 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | 431 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); |
432 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); | 432 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); |
433 | 433 | ||
434 | /* Destroy all pending expectations */ | 434 | /* Destroy all pending expectations */ |
435 | nf_ct_remove_expectations(ct); | 435 | nf_ct_remove_expectations(ct); |
436 | } | 436 | } |
437 | 437 | ||
438 | /* must be called with local_bh_disable */ | 438 | /* must be called with local_bh_disable */ |
439 | static void nf_ct_add_to_dying_list(struct nf_conn *ct) | 439 | static void nf_ct_add_to_dying_list(struct nf_conn *ct) |
440 | { | 440 | { |
441 | struct ct_pcpu *pcpu; | 441 | struct ct_pcpu *pcpu; |
442 | 442 | ||
443 | /* add this conntrack to the (per cpu) dying list */ | 443 | /* add this conntrack to the (per cpu) dying list */ |
444 | ct->cpu = smp_processor_id(); | 444 | ct->cpu = smp_processor_id(); |
445 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | 445 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); |
446 | 446 | ||
447 | spin_lock(&pcpu->lock); | 447 | spin_lock(&pcpu->lock); |
448 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 448 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, |
449 | &pcpu->dying); | 449 | &pcpu->dying); |
450 | spin_unlock(&pcpu->lock); | 450 | spin_unlock(&pcpu->lock); |
451 | } | 451 | } |
452 | 452 | ||
453 | /* must be called with local_bh_disable */ | 453 | /* must be called with local_bh_disable */ |
454 | static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) | 454 | static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) |
455 | { | 455 | { |
456 | struct ct_pcpu *pcpu; | 456 | struct ct_pcpu *pcpu; |
457 | 457 | ||
458 | /* add this conntrack to the (per cpu) unconfirmed list */ | 458 | /* add this conntrack to the (per cpu) unconfirmed list */ |
459 | ct->cpu = smp_processor_id(); | 459 | ct->cpu = smp_processor_id(); |
460 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | 460 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); |
461 | 461 | ||
462 | spin_lock(&pcpu->lock); | 462 | spin_lock(&pcpu->lock); |
463 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 463 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, |
464 | &pcpu->unconfirmed); | 464 | &pcpu->unconfirmed); |
465 | spin_unlock(&pcpu->lock); | 465 | spin_unlock(&pcpu->lock); |
466 | } | 466 | } |
467 | 467 | ||
468 | /* must be called with local_bh_disable */ | 468 | /* must be called with local_bh_disable */ |
469 | static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) | 469 | static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) |
470 | { | 470 | { |
471 | struct ct_pcpu *pcpu; | 471 | struct ct_pcpu *pcpu; |
472 | 472 | ||
473 | /* We overload first tuple to link into unconfirmed or dying list.*/ | 473 | /* We overload first tuple to link into unconfirmed or dying list.*/ |
474 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); | 474 | pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); |
475 | 475 | ||
476 | spin_lock(&pcpu->lock); | 476 | spin_lock(&pcpu->lock); |
477 | BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); | 477 | BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); |
478 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | 478 | hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); |
479 | spin_unlock(&pcpu->lock); | 479 | spin_unlock(&pcpu->lock); |
480 | } | 480 | } |
481 | 481 | ||
482 | #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) | 482 | #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) |
483 | 483 | ||
484 | /* Released via destroy_conntrack() */ | 484 | /* Released via destroy_conntrack() */ |
485 | struct nf_conn *nf_ct_tmpl_alloc(struct net *net, | 485 | struct nf_conn *nf_ct_tmpl_alloc(struct net *net, |
486 | const struct nf_conntrack_zone *zone, | 486 | const struct nf_conntrack_zone *zone, |
487 | gfp_t flags) | 487 | gfp_t flags) |
488 | { | 488 | { |
489 | struct nf_conn *tmpl, *p; | 489 | struct nf_conn *tmpl, *p; |
490 | 490 | ||
491 | if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { | 491 | if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { |
492 | tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); | 492 | tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); |
493 | if (!tmpl) | 493 | if (!tmpl) |
494 | return NULL; | 494 | return NULL; |
495 | 495 | ||
496 | p = tmpl; | 496 | p = tmpl; |
497 | tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); | 497 | tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); |
498 | if (tmpl != p) { | 498 | if (tmpl != p) { |
499 | tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); | 499 | tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); |
500 | tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; | 500 | tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; |
501 | } | 501 | } |
502 | } else { | 502 | } else { |
503 | tmpl = kzalloc(sizeof(*tmpl), flags); | 503 | tmpl = kzalloc(sizeof(*tmpl), flags); |
504 | if (!tmpl) | 504 | if (!tmpl) |
505 | return NULL; | 505 | return NULL; |
506 | } | 506 | } |
507 | 507 | ||
508 | tmpl->status = IPS_TEMPLATE; | 508 | tmpl->status = IPS_TEMPLATE; |
509 | write_pnet(&tmpl->ct_net, net); | 509 | write_pnet(&tmpl->ct_net, net); |
510 | nf_ct_zone_add(tmpl, zone); | 510 | nf_ct_zone_add(tmpl, zone); |
511 | atomic_set(&tmpl->ct_general.use, 0); | 511 | atomic_set(&tmpl->ct_general.use, 0); |
512 | 512 | ||
513 | return tmpl; | 513 | return tmpl; |
514 | } | 514 | } |
515 | EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); | 515 | EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); |
516 | 516 | ||
517 | void nf_ct_tmpl_free(struct nf_conn *tmpl) | 517 | void nf_ct_tmpl_free(struct nf_conn *tmpl) |
518 | { | 518 | { |
519 | nf_ct_ext_destroy(tmpl); | 519 | nf_ct_ext_destroy(tmpl); |
520 | nf_ct_ext_free(tmpl); | 520 | nf_ct_ext_free(tmpl); |
521 | 521 | ||
522 | if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) | 522 | if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) |
523 | kfree((char *)tmpl - tmpl->proto.tmpl_padto); | 523 | kfree((char *)tmpl - tmpl->proto.tmpl_padto); |
524 | else | 524 | else |
525 | kfree(tmpl); | 525 | kfree(tmpl); |
526 | } | 526 | } |
527 | EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); | 527 | EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); |
528 | 528 | ||
529 | static void | 529 | static void |
530 | destroy_conntrack(struct nf_conntrack *nfct) | 530 | destroy_conntrack(struct nf_conntrack *nfct) |
531 | { | 531 | { |
532 | struct nf_conn *ct = (struct nf_conn *)nfct; | 532 | struct nf_conn *ct = (struct nf_conn *)nfct; |
533 | const struct nf_conntrack_l4proto *l4proto; | 533 | const struct nf_conntrack_l4proto *l4proto; |
534 | 534 | ||
535 | pr_debug("destroy_conntrack(%p)\n", ct); | 535 | pr_debug("destroy_conntrack(%p)\n", ct); |
536 | WARN_ON(atomic_read(&nfct->use) != 0); | 536 | WARN_ON(atomic_read(&nfct->use) != 0); |
537 | 537 | ||
538 | if (unlikely(nf_ct_is_template(ct))) { | 538 | if (unlikely(nf_ct_is_template(ct))) { |
539 | nf_ct_tmpl_free(ct); | 539 | nf_ct_tmpl_free(ct); |
540 | return; | 540 | return; |
541 | } | 541 | } |
542 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); | 542 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
543 | if (l4proto->destroy) | 543 | if (l4proto->destroy) |
544 | l4proto->destroy(ct); | 544 | l4proto->destroy(ct); |
545 | 545 | ||
546 | local_bh_disable(); | 546 | local_bh_disable(); |
547 | /* Expectations will have been removed in clean_from_lists, | 547 | /* Expectations will have been removed in clean_from_lists, |
548 | * except TFTP can create an expectation on the first packet, | 548 | * except TFTP can create an expectation on the first packet, |
549 | * before connection is in the list, so we need to clean here, | 549 | * before connection is in the list, so we need to clean here, |
550 | * too. | 550 | * too. |
551 | */ | 551 | */ |
552 | nf_ct_remove_expectations(ct); | 552 | nf_ct_remove_expectations(ct); |
553 | 553 | ||
554 | nf_ct_del_from_dying_or_unconfirmed_list(ct); | 554 | nf_ct_del_from_dying_or_unconfirmed_list(ct); |
555 | 555 | ||
556 | local_bh_enable(); | 556 | local_bh_enable(); |
557 | 557 | ||
558 | if (ct->master) | 558 | if (ct->master) |
559 | nf_ct_put(ct->master); | 559 | nf_ct_put(ct->master); |
560 | 560 | ||
561 | pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); | 561 | pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); |
562 | nf_conntrack_free(ct); | 562 | nf_conntrack_free(ct); |
563 | } | 563 | } |
564 | 564 | ||
565 | static void nf_ct_delete_from_lists(struct nf_conn *ct) | 565 | static void nf_ct_delete_from_lists(struct nf_conn *ct) |
566 | { | 566 | { |
567 | struct net *net = nf_ct_net(ct); | 567 | struct net *net = nf_ct_net(ct); |
568 | unsigned int hash, reply_hash; | 568 | unsigned int hash, reply_hash; |
569 | unsigned int sequence; | 569 | unsigned int sequence; |
570 | 570 | ||
571 | nf_ct_helper_destroy(ct); | 571 | nf_ct_helper_destroy(ct); |
572 | 572 | ||
573 | local_bh_disable(); | 573 | local_bh_disable(); |
574 | do { | 574 | do { |
575 | sequence = read_seqcount_begin(&nf_conntrack_generation); | 575 | sequence = read_seqcount_begin(&nf_conntrack_generation); |
576 | hash = hash_conntrack(net, | 576 | hash = hash_conntrack(net, |
577 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 577 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
578 | reply_hash = hash_conntrack(net, | 578 | reply_hash = hash_conntrack(net, |
579 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 579 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
580 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | 580 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); |
581 | 581 | ||
582 | clean_from_lists(ct); | 582 | clean_from_lists(ct); |
583 | nf_conntrack_double_unlock(hash, reply_hash); | 583 | nf_conntrack_double_unlock(hash, reply_hash); |
584 | 584 | ||
585 | nf_ct_add_to_dying_list(ct); | 585 | nf_ct_add_to_dying_list(ct); |
586 | 586 | ||
587 | local_bh_enable(); | 587 | local_bh_enable(); |
588 | } | 588 | } |
589 | 589 | ||
590 | bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) | 590 | bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) |
591 | { | 591 | { |
592 | struct nf_conn_tstamp *tstamp; | 592 | struct nf_conn_tstamp *tstamp; |
593 | 593 | ||
594 | if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) | 594 | if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) |
595 | return false; | 595 | return false; |
596 | 596 | ||
597 | tstamp = nf_conn_tstamp_find(ct); | 597 | tstamp = nf_conn_tstamp_find(ct); |
598 | if (tstamp && tstamp->stop == 0) | 598 | if (tstamp && tstamp->stop == 0) |
599 | tstamp->stop = ktime_get_real_ns(); | 599 | tstamp->stop = ktime_get_real_ns(); |
600 | 600 | ||
601 | if (nf_conntrack_event_report(IPCT_DESTROY, ct, | 601 | if (nf_conntrack_event_report(IPCT_DESTROY, ct, |
602 | portid, report) < 0) { | 602 | portid, report) < 0) { |
603 | /* destroy event was not delivered. nf_ct_put will | 603 | /* destroy event was not delivered. nf_ct_put will |
604 | * be done by event cache worker on redelivery. | 604 | * be done by event cache worker on redelivery. |
605 | */ | 605 | */ |
606 | nf_ct_delete_from_lists(ct); | 606 | nf_ct_delete_from_lists(ct); |
607 | nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); | 607 | nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); |
608 | return false; | 608 | return false; |
609 | } | 609 | } |
610 | 610 | ||
611 | nf_conntrack_ecache_work(nf_ct_net(ct)); | 611 | nf_conntrack_ecache_work(nf_ct_net(ct)); |
612 | nf_ct_delete_from_lists(ct); | 612 | nf_ct_delete_from_lists(ct); |
613 | nf_ct_put(ct); | 613 | nf_ct_put(ct); |
614 | return true; | 614 | return true; |
615 | } | 615 | } |
616 | EXPORT_SYMBOL_GPL(nf_ct_delete); | 616 | EXPORT_SYMBOL_GPL(nf_ct_delete); |
617 | 617 | ||
618 | static inline bool | 618 | static inline bool |
619 | nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, | 619 | nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, |
620 | const struct nf_conntrack_tuple *tuple, | 620 | const struct nf_conntrack_tuple *tuple, |
621 | const struct nf_conntrack_zone *zone, | 621 | const struct nf_conntrack_zone *zone, |
622 | const struct net *net) | 622 | const struct net *net) |
623 | { | 623 | { |
624 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | 624 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); |
625 | 625 | ||
626 | /* A conntrack can be recreated with the equal tuple, | 626 | /* A conntrack can be recreated with the equal tuple, |
627 | * so we need to check that the conntrack is confirmed | 627 | * so we need to check that the conntrack is confirmed |
628 | */ | 628 | */ |
629 | return nf_ct_tuple_equal(tuple, &h->tuple) && | 629 | return nf_ct_tuple_equal(tuple, &h->tuple) && |
630 | nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && | 630 | nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && |
631 | nf_ct_is_confirmed(ct) && | 631 | nf_ct_is_confirmed(ct) && |
632 | net_eq(net, nf_ct_net(ct)); | 632 | net_eq(net, nf_ct_net(ct)); |
633 | } | 633 | } |
634 | 634 | ||
635 | /* caller must hold rcu readlock and none of the nf_conntrack_locks */ | 635 | /* caller must hold rcu readlock and none of the nf_conntrack_locks */ |
636 | static void nf_ct_gc_expired(struct nf_conn *ct) | 636 | static void nf_ct_gc_expired(struct nf_conn *ct) |
637 | { | 637 | { |
638 | if (!atomic_inc_not_zero(&ct->ct_general.use)) | 638 | if (!atomic_inc_not_zero(&ct->ct_general.use)) |
639 | return; | 639 | return; |
640 | 640 | ||
641 | if (nf_ct_should_gc(ct)) | 641 | if (nf_ct_should_gc(ct)) |
642 | nf_ct_kill(ct); | 642 | nf_ct_kill(ct); |
643 | 643 | ||
644 | nf_ct_put(ct); | 644 | nf_ct_put(ct); |
645 | } | 645 | } |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * Warning : | 648 | * Warning : |
649 | * - Caller must take a reference on returned object | 649 | * - Caller must take a reference on returned object |
650 | * and recheck nf_ct_tuple_equal(tuple, &h->tuple) | 650 | * and recheck nf_ct_tuple_equal(tuple, &h->tuple) |
651 | */ | 651 | */ |
652 | static struct nf_conntrack_tuple_hash * | 652 | static struct nf_conntrack_tuple_hash * |
653 | ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, | 653 | ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, |
654 | const struct nf_conntrack_tuple *tuple, u32 hash) | 654 | const struct nf_conntrack_tuple *tuple, u32 hash) |
655 | { | 655 | { |
656 | struct nf_conntrack_tuple_hash *h; | 656 | struct nf_conntrack_tuple_hash *h; |
657 | struct hlist_nulls_head *ct_hash; | 657 | struct hlist_nulls_head *ct_hash; |
658 | struct hlist_nulls_node *n; | 658 | struct hlist_nulls_node *n; |
659 | unsigned int bucket, hsize; | 659 | unsigned int bucket, hsize; |
660 | 660 | ||
661 | begin: | 661 | begin: |
662 | nf_conntrack_get_ht(&ct_hash, &hsize); | 662 | nf_conntrack_get_ht(&ct_hash, &hsize); |
663 | bucket = reciprocal_scale(hash, hsize); | 663 | bucket = reciprocal_scale(hash, hsize); |
664 | 664 | ||
665 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { | 665 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { |
666 | struct nf_conn *ct; | 666 | struct nf_conn *ct; |
667 | 667 | ||
668 | ct = nf_ct_tuplehash_to_ctrack(h); | 668 | ct = nf_ct_tuplehash_to_ctrack(h); |
669 | if (nf_ct_is_expired(ct)) { | 669 | if (nf_ct_is_expired(ct)) { |
670 | nf_ct_gc_expired(ct); | 670 | nf_ct_gc_expired(ct); |
671 | continue; | 671 | continue; |
672 | } | 672 | } |
673 | 673 | ||
674 | if (nf_ct_is_dying(ct)) | 674 | if (nf_ct_is_dying(ct)) |
675 | continue; | 675 | continue; |
676 | 676 | ||
677 | if (nf_ct_key_equal(h, tuple, zone, net)) | 677 | if (nf_ct_key_equal(h, tuple, zone, net)) |
678 | return h; | 678 | return h; |
679 | } | 679 | } |
680 | /* | 680 | /* |
681 | * if the nulls value we got at the end of this lookup is | 681 | * if the nulls value we got at the end of this lookup is |
682 | * not the expected one, we must restart lookup. | 682 | * not the expected one, we must restart lookup. |
683 | * We probably met an item that was moved to another chain. | 683 | * We probably met an item that was moved to another chain. |
684 | */ | 684 | */ |
685 | if (get_nulls_value(n) != bucket) { | 685 | if (get_nulls_value(n) != bucket) { |
686 | NF_CT_STAT_INC_ATOMIC(net, search_restart); | 686 | NF_CT_STAT_INC_ATOMIC(net, search_restart); |
687 | goto begin; | 687 | goto begin; |
688 | } | 688 | } |
689 | 689 | ||
690 | return NULL; | 690 | return NULL; |
691 | } | 691 | } |
692 | 692 | ||
693 | /* Find a connection corresponding to a tuple. */ | 693 | /* Find a connection corresponding to a tuple. */ |
694 | static struct nf_conntrack_tuple_hash * | 694 | static struct nf_conntrack_tuple_hash * |
695 | __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, | 695 | __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, |
696 | const struct nf_conntrack_tuple *tuple, u32 hash) | 696 | const struct nf_conntrack_tuple *tuple, u32 hash) |
697 | { | 697 | { |
698 | struct nf_conntrack_tuple_hash *h; | 698 | struct nf_conntrack_tuple_hash *h; |
699 | struct nf_conn *ct; | 699 | struct nf_conn *ct; |
700 | 700 | ||
701 | rcu_read_lock(); | 701 | rcu_read_lock(); |
702 | begin: | 702 | begin: |
703 | h = ____nf_conntrack_find(net, zone, tuple, hash); | 703 | h = ____nf_conntrack_find(net, zone, tuple, hash); |
704 | if (h) { | 704 | if (h) { |
705 | ct = nf_ct_tuplehash_to_ctrack(h); | 705 | ct = nf_ct_tuplehash_to_ctrack(h); |
706 | if (unlikely(nf_ct_is_dying(ct) || | 706 | if (unlikely(nf_ct_is_dying(ct) || |
707 | !atomic_inc_not_zero(&ct->ct_general.use))) | 707 | !atomic_inc_not_zero(&ct->ct_general.use))) |
708 | h = NULL; | 708 | h = NULL; |
709 | else { | 709 | else { |
710 | if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { | 710 | if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { |
711 | nf_ct_put(ct); | 711 | nf_ct_put(ct); |
712 | goto begin; | 712 | goto begin; |
713 | } | 713 | } |
714 | } | 714 | } |
715 | } | 715 | } |
716 | rcu_read_unlock(); | 716 | rcu_read_unlock(); |
717 | 717 | ||
718 | return h; | 718 | return h; |
719 | } | 719 | } |
720 | 720 | ||
721 | struct nf_conntrack_tuple_hash * | 721 | struct nf_conntrack_tuple_hash * |
722 | nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, | 722 | nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, |
723 | const struct nf_conntrack_tuple *tuple) | 723 | const struct nf_conntrack_tuple *tuple) |
724 | { | 724 | { |
725 | return __nf_conntrack_find_get(net, zone, tuple, | 725 | return __nf_conntrack_find_get(net, zone, tuple, |
726 | hash_conntrack_raw(tuple, net)); | 726 | hash_conntrack_raw(tuple, net)); |
727 | } | 727 | } |
728 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); | 728 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); |
729 | 729 | ||
730 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, | 730 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, |
731 | unsigned int hash, | 731 | unsigned int hash, |
732 | unsigned int reply_hash) | 732 | unsigned int reply_hash) |
733 | { | 733 | { |
734 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | 734 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, |
735 | &nf_conntrack_hash[hash]); | 735 | &nf_conntrack_hash[hash]); |
736 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, | 736 | hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, |
737 | &nf_conntrack_hash[reply_hash]); | 737 | &nf_conntrack_hash[reply_hash]); |
738 | } | 738 | } |
739 | 739 | ||
740 | int | 740 | int |
741 | nf_conntrack_hash_check_insert(struct nf_conn *ct) | 741 | nf_conntrack_hash_check_insert(struct nf_conn *ct) |
742 | { | 742 | { |
743 | const struct nf_conntrack_zone *zone; | 743 | const struct nf_conntrack_zone *zone; |
744 | struct net *net = nf_ct_net(ct); | 744 | struct net *net = nf_ct_net(ct); |
745 | unsigned int hash, reply_hash; | 745 | unsigned int hash, reply_hash; |
746 | struct nf_conntrack_tuple_hash *h; | 746 | struct nf_conntrack_tuple_hash *h; |
747 | struct hlist_nulls_node *n; | 747 | struct hlist_nulls_node *n; |
748 | unsigned int sequence; | 748 | unsigned int sequence; |
749 | 749 | ||
750 | zone = nf_ct_zone(ct); | 750 | zone = nf_ct_zone(ct); |
751 | 751 | ||
752 | local_bh_disable(); | 752 | local_bh_disable(); |
753 | do { | 753 | do { |
754 | sequence = read_seqcount_begin(&nf_conntrack_generation); | 754 | sequence = read_seqcount_begin(&nf_conntrack_generation); |
755 | hash = hash_conntrack(net, | 755 | hash = hash_conntrack(net, |
756 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 756 | &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
757 | reply_hash = hash_conntrack(net, | 757 | reply_hash = hash_conntrack(net, |
758 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 758 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
759 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | 759 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); |
760 | 760 | ||
761 | /* See if there's one in the list already, including reverse */ | 761 | /* See if there's one in the list already, including reverse */ |
762 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) | 762 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) |
763 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | 763 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, |
764 | zone, net)) | 764 | zone, net)) |
765 | goto out; | 765 | goto out; |
766 | 766 | ||
767 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) | 767 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) |
768 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, | 768 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
769 | zone, net)) | 769 | zone, net)) |
770 | goto out; | 770 | goto out; |
771 | 771 | ||
772 | smp_wmb(); | 772 | smp_wmb(); |
773 | /* The caller holds a reference to this object */ | 773 | /* The caller holds a reference to this object */ |
774 | atomic_set(&ct->ct_general.use, 2); | 774 | atomic_set(&ct->ct_general.use, 2); |
775 | __nf_conntrack_hash_insert(ct, hash, reply_hash); | 775 | __nf_conntrack_hash_insert(ct, hash, reply_hash); |
776 | nf_conntrack_double_unlock(hash, reply_hash); | 776 | nf_conntrack_double_unlock(hash, reply_hash); |
777 | NF_CT_STAT_INC(net, insert); | 777 | NF_CT_STAT_INC(net, insert); |
778 | local_bh_enable(); | 778 | local_bh_enable(); |
779 | return 0; | 779 | return 0; |
780 | 780 | ||
781 | out: | 781 | out: |
782 | nf_conntrack_double_unlock(hash, reply_hash); | 782 | nf_conntrack_double_unlock(hash, reply_hash); |
783 | NF_CT_STAT_INC(net, insert_failed); | 783 | NF_CT_STAT_INC(net, insert_failed); |
784 | local_bh_enable(); | 784 | local_bh_enable(); |
785 | return -EEXIST; | 785 | return -EEXIST; |
786 | } | 786 | } |
787 | EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); | 787 | EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); |
788 | 788 | ||
789 | static inline void nf_ct_acct_update(struct nf_conn *ct, | 789 | static inline void nf_ct_acct_update(struct nf_conn *ct, |
790 | enum ip_conntrack_info ctinfo, | 790 | enum ip_conntrack_info ctinfo, |
791 | unsigned int len) | 791 | unsigned int len) |
792 | { | 792 | { |
793 | struct nf_conn_acct *acct; | 793 | struct nf_conn_acct *acct; |
794 | 794 | ||
795 | acct = nf_conn_acct_find(ct); | 795 | acct = nf_conn_acct_find(ct); |
796 | if (acct) { | 796 | if (acct) { |
797 | struct nf_conn_counter *counter = acct->counter; | 797 | struct nf_conn_counter *counter = acct->counter; |
798 | 798 | ||
799 | atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); | 799 | atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); |
800 | atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); | 800 | atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); |
801 | } | 801 | } |
802 | } | 802 | } |
803 | 803 | ||
804 | static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, | 804 | static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, |
805 | const struct nf_conn *loser_ct) | 805 | const struct nf_conn *loser_ct) |
806 | { | 806 | { |
807 | struct nf_conn_acct *acct; | 807 | struct nf_conn_acct *acct; |
808 | 808 | ||
809 | acct = nf_conn_acct_find(loser_ct); | 809 | acct = nf_conn_acct_find(loser_ct); |
810 | if (acct) { | 810 | if (acct) { |
811 | struct nf_conn_counter *counter = acct->counter; | 811 | struct nf_conn_counter *counter = acct->counter; |
812 | unsigned int bytes; | 812 | unsigned int bytes; |
813 | 813 | ||
814 | /* u32 should be fine since we must have seen one packet. */ | 814 | /* u32 should be fine since we must have seen one packet. */ |
815 | bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); | 815 | bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); |
816 | nf_ct_acct_update(ct, ctinfo, bytes); | 816 | nf_ct_acct_update(ct, ctinfo, bytes); |
817 | } | 817 | } |
818 | } | 818 | } |
819 | 819 | ||
820 | /* Resolve race on insertion if this protocol allows this. */ | 820 | /* Resolve race on insertion if this protocol allows this. */ |
821 | static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, | 821 | static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, |
822 | enum ip_conntrack_info ctinfo, | 822 | enum ip_conntrack_info ctinfo, |
823 | struct nf_conntrack_tuple_hash *h) | 823 | struct nf_conntrack_tuple_hash *h) |
824 | { | 824 | { |
825 | /* This is the conntrack entry already in hashes that won race. */ | 825 | /* This is the conntrack entry already in hashes that won race. */ |
826 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | 826 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); |
827 | const struct nf_conntrack_l4proto *l4proto; | 827 | const struct nf_conntrack_l4proto *l4proto; |
828 | 828 | ||
829 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); | 829 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
830 | if (l4proto->allow_clash && | 830 | if (l4proto->allow_clash && |
831 | ((ct->status & IPS_NAT_DONE_MASK) == 0) && | 831 | ((ct->status & IPS_NAT_DONE_MASK) == 0) && |
832 | !nf_ct_is_dying(ct) && | 832 | !nf_ct_is_dying(ct) && |
833 | atomic_inc_not_zero(&ct->ct_general.use)) { | 833 | atomic_inc_not_zero(&ct->ct_general.use)) { |
834 | enum ip_conntrack_info oldinfo; | 834 | enum ip_conntrack_info oldinfo; |
835 | struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); | 835 | struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); |
836 | 836 | ||
837 | nf_ct_acct_merge(ct, ctinfo, loser_ct); | 837 | nf_ct_acct_merge(ct, ctinfo, loser_ct); |
838 | nf_conntrack_put(&loser_ct->ct_general); | 838 | nf_conntrack_put(&loser_ct->ct_general); |
839 | nf_ct_set(skb, ct, oldinfo); | 839 | nf_ct_set(skb, ct, oldinfo); |
840 | return NF_ACCEPT; | 840 | return NF_ACCEPT; |
841 | } | 841 | } |
842 | NF_CT_STAT_INC(net, drop); | 842 | NF_CT_STAT_INC(net, drop); |
843 | return NF_DROP; | 843 | return NF_DROP; |
844 | } | 844 | } |
845 | 845 | ||
846 | /* Confirm a connection given skb; places it in hash table */ | 846 | /* Confirm a connection given skb; places it in hash table */ |
847 | int | 847 | int |
848 | __nf_conntrack_confirm(struct sk_buff *skb) | 848 | __nf_conntrack_confirm(struct sk_buff *skb) |
849 | { | 849 | { |
850 | const struct nf_conntrack_zone *zone; | 850 | const struct nf_conntrack_zone *zone; |
851 | unsigned int hash, reply_hash; | 851 | unsigned int hash, reply_hash; |
852 | struct nf_conntrack_tuple_hash *h; | 852 | struct nf_conntrack_tuple_hash *h; |
853 | struct nf_conn *ct; | 853 | struct nf_conn *ct; |
854 | struct nf_conn_help *help; | 854 | struct nf_conn_help *help; |
855 | struct nf_conn_tstamp *tstamp; | 855 | struct nf_conn_tstamp *tstamp; |
856 | struct hlist_nulls_node *n; | 856 | struct hlist_nulls_node *n; |
857 | enum ip_conntrack_info ctinfo; | 857 | enum ip_conntrack_info ctinfo; |
858 | struct net *net; | 858 | struct net *net; |
859 | unsigned int sequence; | 859 | unsigned int sequence; |
860 | int ret = NF_DROP; | 860 | int ret = NF_DROP; |
861 | 861 | ||
862 | ct = nf_ct_get(skb, &ctinfo); | 862 | ct = nf_ct_get(skb, &ctinfo); |
863 | net = nf_ct_net(ct); | 863 | net = nf_ct_net(ct); |
864 | 864 | ||
865 | /* ipt_REJECT uses nf_conntrack_attach to attach related | 865 | /* ipt_REJECT uses nf_conntrack_attach to attach related |
866 | ICMP/TCP RST packets in other direction. Actual packet | 866 | ICMP/TCP RST packets in other direction. Actual packet |
867 | which created connection will be IP_CT_NEW or for an | 867 | which created connection will be IP_CT_NEW or for an |
868 | expected connection, IP_CT_RELATED. */ | 868 | expected connection, IP_CT_RELATED. */ |
869 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) | 869 | if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) |
870 | return NF_ACCEPT; | 870 | return NF_ACCEPT; |
871 | 871 | ||
872 | zone = nf_ct_zone(ct); | 872 | zone = nf_ct_zone(ct); |
873 | local_bh_disable(); | 873 | local_bh_disable(); |
874 | 874 | ||
875 | do { | 875 | do { |
876 | sequence = read_seqcount_begin(&nf_conntrack_generation); | 876 | sequence = read_seqcount_begin(&nf_conntrack_generation); |
877 | /* reuse the hash saved before */ | 877 | /* reuse the hash saved before */ |
878 | hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; | 878 | hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; |
879 | hash = scale_hash(hash); | 879 | hash = scale_hash(hash); |
880 | reply_hash = hash_conntrack(net, | 880 | reply_hash = hash_conntrack(net, |
881 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 881 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); |
882 | 882 | ||
883 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); | 883 | } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); |
884 | 884 | ||
885 | /* We're not in hash table, and we refuse to set up related | 885 | /* We're not in hash table, and we refuse to set up related |
886 | * connections for unconfirmed conns. But packet copies and | 886 | * connections for unconfirmed conns. But packet copies and |
887 | * REJECT will give spurious warnings here. | 887 | * REJECT will give spurious warnings here. |
888 | */ | 888 | */ |
889 | 889 | ||
890 | /* No external references means no one else could have | 890 | /* No external references means no one else could have |
891 | * confirmed us. | 891 | * confirmed us. |
892 | */ | 892 | */ |
893 | WARN_ON(nf_ct_is_confirmed(ct)); | 893 | WARN_ON(nf_ct_is_confirmed(ct)); |
894 | pr_debug("Confirming conntrack %p\n", ct); | 894 | pr_debug("Confirming conntrack %p\n", ct); |
895 | /* We have to check the DYING flag after unlink to prevent | 895 | /* We have to check the DYING flag after unlink to prevent |
896 | * a race against nf_ct_get_next_corpse() possibly called from | 896 | * a race against nf_ct_get_next_corpse() possibly called from |
897 | * user context, else we insert an already 'dead' hash, blocking | 897 | * user context, else we insert an already 'dead' hash, blocking |
898 | * further use of that particular connection -JM. | 898 | * further use of that particular connection -JM. |
899 | */ | 899 | */ |
900 | nf_ct_del_from_dying_or_unconfirmed_list(ct); | 900 | nf_ct_del_from_dying_or_unconfirmed_list(ct); |
901 | 901 | ||
902 | if (unlikely(nf_ct_is_dying(ct))) { | 902 | if (unlikely(nf_ct_is_dying(ct))) { |
903 | nf_ct_add_to_dying_list(ct); | 903 | nf_ct_add_to_dying_list(ct); |
904 | goto dying; | 904 | goto dying; |
905 | } | 905 | } |
906 | 906 | ||
907 | /* See if there's one in the list already, including reverse: | 907 | /* See if there's one in the list already, including reverse: |
908 | NAT could have grabbed it without realizing, since we're | 908 | NAT could have grabbed it without realizing, since we're |
909 | not in the hash. If there is, we lost race. */ | 909 | not in the hash. If there is, we lost race. */ |
910 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) | 910 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) |
911 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | 911 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, |
912 | zone, net)) | 912 | zone, net)) |
913 | goto out; | 913 | goto out; |
914 | 914 | ||
915 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) | 915 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) |
916 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, | 916 | if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
917 | zone, net)) | 917 | zone, net)) |
918 | goto out; | 918 | goto out; |
919 | 919 | ||
920 | /* Timer relative to confirmation time, not original | 920 | /* Timer relative to confirmation time, not original |
921 | setting time, otherwise we'd get timer wrap in | 921 | setting time, otherwise we'd get timer wrap in |
922 | weird delay cases. */ | 922 | weird delay cases. */ |
923 | ct->timeout += nfct_time_stamp; | 923 | ct->timeout += nfct_time_stamp; |
924 | atomic_inc(&ct->ct_general.use); | 924 | atomic_inc(&ct->ct_general.use); |
925 | ct->status |= IPS_CONFIRMED; | 925 | ct->status |= IPS_CONFIRMED; |
926 | 926 | ||
927 | /* set conntrack timestamp, if enabled. */ | 927 | /* set conntrack timestamp, if enabled. */ |
928 | tstamp = nf_conn_tstamp_find(ct); | 928 | tstamp = nf_conn_tstamp_find(ct); |
929 | if (tstamp) { | 929 | if (tstamp) { |
930 | if (skb->tstamp == 0) | 930 | if (skb->tstamp == 0) |
931 | __net_timestamp(skb); | 931 | __net_timestamp(skb); |
932 | 932 | ||
933 | tstamp->start = ktime_to_ns(skb->tstamp); | 933 | tstamp->start = ktime_to_ns(skb->tstamp); |
934 | } | 934 | } |
935 | /* Since the lookup is lockless, hash insertion must be done after | 935 | /* Since the lookup is lockless, hash insertion must be done after |
936 | * starting the timer and setting the CONFIRMED bit. The RCU barriers | 936 | * starting the timer and setting the CONFIRMED bit. The RCU barriers |
937 | * guarantee that no other CPU can find the conntrack before the above | 937 | * guarantee that no other CPU can find the conntrack before the above |
938 | * stores are visible. | 938 | * stores are visible. |
939 | */ | 939 | */ |
940 | __nf_conntrack_hash_insert(ct, hash, reply_hash); | 940 | __nf_conntrack_hash_insert(ct, hash, reply_hash); |
941 | nf_conntrack_double_unlock(hash, reply_hash); | 941 | nf_conntrack_double_unlock(hash, reply_hash); |
942 | local_bh_enable(); | 942 | local_bh_enable(); |
943 | 943 | ||
944 | help = nfct_help(ct); | 944 | help = nfct_help(ct); |
945 | if (help && help->helper) | 945 | if (help && help->helper) |
946 | nf_conntrack_event_cache(IPCT_HELPER, ct); | 946 | nf_conntrack_event_cache(IPCT_HELPER, ct); |
947 | 947 | ||
948 | nf_conntrack_event_cache(master_ct(ct) ? | 948 | nf_conntrack_event_cache(master_ct(ct) ? |
949 | IPCT_RELATED : IPCT_NEW, ct); | 949 | IPCT_RELATED : IPCT_NEW, ct); |
950 | return NF_ACCEPT; | 950 | return NF_ACCEPT; |
951 | 951 | ||
952 | out: | 952 | out: |
953 | nf_ct_add_to_dying_list(ct); | 953 | nf_ct_add_to_dying_list(ct); |
954 | ret = nf_ct_resolve_clash(net, skb, ctinfo, h); | 954 | ret = nf_ct_resolve_clash(net, skb, ctinfo, h); |
955 | dying: | 955 | dying: |
956 | nf_conntrack_double_unlock(hash, reply_hash); | 956 | nf_conntrack_double_unlock(hash, reply_hash); |
957 | NF_CT_STAT_INC(net, insert_failed); | 957 | NF_CT_STAT_INC(net, insert_failed); |
958 | local_bh_enable(); | 958 | local_bh_enable(); |
959 | return ret; | 959 | return ret; |
960 | } | 960 | } |
961 | EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); | 961 | EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); |
962 | 962 | ||
963 | /* Returns true if a connection correspondings to the tuple (required | 963 | /* Returns true if a connection correspondings to the tuple (required |
964 | for NAT). */ | 964 | for NAT). */ |
965 | int | 965 | int |
966 | nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, | 966 | nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, |
967 | const struct nf_conn *ignored_conntrack) | 967 | const struct nf_conn *ignored_conntrack) |
968 | { | 968 | { |
969 | struct net *net = nf_ct_net(ignored_conntrack); | 969 | struct net *net = nf_ct_net(ignored_conntrack); |
970 | const struct nf_conntrack_zone *zone; | 970 | const struct nf_conntrack_zone *zone; |
971 | struct nf_conntrack_tuple_hash *h; | 971 | struct nf_conntrack_tuple_hash *h; |
972 | struct hlist_nulls_head *ct_hash; | 972 | struct hlist_nulls_head *ct_hash; |
973 | unsigned int hash, hsize; | 973 | unsigned int hash, hsize; |
974 | struct hlist_nulls_node *n; | 974 | struct hlist_nulls_node *n; |
975 | struct nf_conn *ct; | 975 | struct nf_conn *ct; |
976 | 976 | ||
977 | zone = nf_ct_zone(ignored_conntrack); | 977 | zone = nf_ct_zone(ignored_conntrack); |
978 | 978 | ||
979 | rcu_read_lock(); | 979 | rcu_read_lock(); |
980 | begin: | 980 | begin: |
981 | nf_conntrack_get_ht(&ct_hash, &hsize); | 981 | nf_conntrack_get_ht(&ct_hash, &hsize); |
982 | hash = __hash_conntrack(net, tuple, hsize); | 982 | hash = __hash_conntrack(net, tuple, hsize); |
983 | 983 | ||
984 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { | 984 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { |
985 | ct = nf_ct_tuplehash_to_ctrack(h); | 985 | ct = nf_ct_tuplehash_to_ctrack(h); |
986 | 986 | ||
987 | if (ct == ignored_conntrack) | 987 | if (ct == ignored_conntrack) |
988 | continue; | 988 | continue; |
989 | 989 | ||
990 | if (nf_ct_is_expired(ct)) { | 990 | if (nf_ct_is_expired(ct)) { |
991 | nf_ct_gc_expired(ct); | 991 | nf_ct_gc_expired(ct); |
992 | continue; | 992 | continue; |
993 | } | 993 | } |
994 | 994 | ||
995 | if (nf_ct_key_equal(h, tuple, zone, net)) { | 995 | if (nf_ct_key_equal(h, tuple, zone, net)) { |
996 | NF_CT_STAT_INC_ATOMIC(net, found); | 996 | NF_CT_STAT_INC_ATOMIC(net, found); |
997 | rcu_read_unlock(); | 997 | rcu_read_unlock(); |
998 | return 1; | 998 | return 1; |
999 | } | 999 | } |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | if (get_nulls_value(n) != hash) { | 1002 | if (get_nulls_value(n) != hash) { |
1003 | NF_CT_STAT_INC_ATOMIC(net, search_restart); | 1003 | NF_CT_STAT_INC_ATOMIC(net, search_restart); |
1004 | goto begin; | 1004 | goto begin; |
1005 | } | 1005 | } |
1006 | 1006 | ||
1007 | rcu_read_unlock(); | 1007 | rcu_read_unlock(); |
1008 | 1008 | ||
1009 | return 0; | 1009 | return 0; |
1010 | } | 1010 | } |
1011 | EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); | 1011 | EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); |
1012 | 1012 | ||
1013 | #define NF_CT_EVICTION_RANGE 8 | 1013 | #define NF_CT_EVICTION_RANGE 8 |
1014 | 1014 | ||
1015 | /* There's a small race here where we may free a just-assured | 1015 | /* There's a small race here where we may free a just-assured |
1016 | connection. Too bad: we're in trouble anyway. */ | 1016 | connection. Too bad: we're in trouble anyway. */ |
1017 | static unsigned int early_drop_list(struct net *net, | 1017 | static unsigned int early_drop_list(struct net *net, |
1018 | struct hlist_nulls_head *head) | 1018 | struct hlist_nulls_head *head) |
1019 | { | 1019 | { |
1020 | struct nf_conntrack_tuple_hash *h; | 1020 | struct nf_conntrack_tuple_hash *h; |
1021 | struct hlist_nulls_node *n; | 1021 | struct hlist_nulls_node *n; |
1022 | unsigned int drops = 0; | 1022 | unsigned int drops = 0; |
1023 | struct nf_conn *tmp; | 1023 | struct nf_conn *tmp; |
1024 | 1024 | ||
1025 | hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { | 1025 | hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { |
1026 | tmp = nf_ct_tuplehash_to_ctrack(h); | 1026 | tmp = nf_ct_tuplehash_to_ctrack(h); |
1027 | 1027 | ||
1028 | if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) | 1028 | if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) |
1029 | continue; | 1029 | continue; |
1030 | 1030 | ||
1031 | if (nf_ct_is_expired(tmp)) { | 1031 | if (nf_ct_is_expired(tmp)) { |
1032 | nf_ct_gc_expired(tmp); | 1032 | nf_ct_gc_expired(tmp); |
1033 | continue; | 1033 | continue; |
1034 | } | 1034 | } |
1035 | 1035 | ||
1036 | if (test_bit(IPS_ASSURED_BIT, &tmp->status) || | 1036 | if (test_bit(IPS_ASSURED_BIT, &tmp->status) || |
1037 | !net_eq(nf_ct_net(tmp), net) || | 1037 | !net_eq(nf_ct_net(tmp), net) || |
1038 | nf_ct_is_dying(tmp)) | 1038 | nf_ct_is_dying(tmp)) |
1039 | continue; | 1039 | continue; |
1040 | 1040 | ||
1041 | if (!atomic_inc_not_zero(&tmp->ct_general.use)) | 1041 | if (!atomic_inc_not_zero(&tmp->ct_general.use)) |
1042 | continue; | 1042 | continue; |
1043 | 1043 | ||
1044 | /* kill only if still in same netns -- might have moved due to | 1044 | /* kill only if still in same netns -- might have moved due to |
1045 | * SLAB_TYPESAFE_BY_RCU rules. | 1045 | * SLAB_TYPESAFE_BY_RCU rules. |
1046 | * | 1046 | * |
1047 | * We steal the timer reference. If that fails timer has | 1047 | * We steal the timer reference. If that fails timer has |
1048 | * already fired or someone else deleted it. Just drop ref | 1048 | * already fired or someone else deleted it. Just drop ref |
1049 | * and move to next entry. | 1049 | * and move to next entry. |
1050 | */ | 1050 | */ |
1051 | if (net_eq(nf_ct_net(tmp), net) && | 1051 | if (net_eq(nf_ct_net(tmp), net) && |
1052 | nf_ct_is_confirmed(tmp) && | 1052 | nf_ct_is_confirmed(tmp) && |
1053 | nf_ct_delete(tmp, 0, 0)) | 1053 | nf_ct_delete(tmp, 0, 0)) |
1054 | drops++; | 1054 | drops++; |
1055 | 1055 | ||
1056 | nf_ct_put(tmp); | 1056 | nf_ct_put(tmp); |
1057 | } | 1057 | } |
1058 | 1058 | ||
1059 | return drops; | 1059 | return drops; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | static noinline int early_drop(struct net *net, unsigned int _hash) | 1062 | static noinline int early_drop(struct net *net, unsigned int _hash) |
1063 | { | 1063 | { |
1064 | unsigned int i; | 1064 | unsigned int i; |
1065 | 1065 | ||
1066 | for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { | 1066 | for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { |
1067 | struct hlist_nulls_head *ct_hash; | 1067 | struct hlist_nulls_head *ct_hash; |
1068 | unsigned int hash, hsize, drops; | 1068 | unsigned int hash, hsize, drops; |
1069 | 1069 | ||
1070 | rcu_read_lock(); | 1070 | rcu_read_lock(); |
1071 | nf_conntrack_get_ht(&ct_hash, &hsize); | 1071 | nf_conntrack_get_ht(&ct_hash, &hsize); |
1072 | hash = reciprocal_scale(_hash++, hsize); | 1072 | hash = reciprocal_scale(_hash++, hsize); |
1073 | 1073 | ||
1074 | drops = early_drop_list(net, &ct_hash[hash]); | 1074 | drops = early_drop_list(net, &ct_hash[hash]); |
1075 | rcu_read_unlock(); | 1075 | rcu_read_unlock(); |
1076 | 1076 | ||
1077 | if (drops) { | 1077 | if (drops) { |
1078 | NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); | 1078 | NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); |
1079 | return true; | 1079 | return true; |
1080 | } | 1080 | } |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | return false; | 1083 | return false; |
1084 | } | 1084 | } |
1085 | 1085 | ||
1086 | static bool gc_worker_skip_ct(const struct nf_conn *ct) | 1086 | static bool gc_worker_skip_ct(const struct nf_conn *ct) |
1087 | { | 1087 | { |
1088 | return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); | 1088 | return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | static bool gc_worker_can_early_drop(const struct nf_conn *ct) | 1091 | static bool gc_worker_can_early_drop(const struct nf_conn *ct) |
1092 | { | 1092 | { |
1093 | const struct nf_conntrack_l4proto *l4proto; | 1093 | const struct nf_conntrack_l4proto *l4proto; |
1094 | 1094 | ||
1095 | if (!test_bit(IPS_ASSURED_BIT, &ct->status)) | 1095 | if (!test_bit(IPS_ASSURED_BIT, &ct->status)) |
1096 | return true; | 1096 | return true; |
1097 | 1097 | ||
1098 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); | 1098 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
1099 | if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) | 1099 | if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) |
1100 | return true; | 1100 | return true; |
1101 | 1101 | ||
1102 | return false; | 1102 | return false; |
1103 | } | 1103 | } |
1104 | 1104 | ||
1105 | #define DAY (86400 * HZ) | 1105 | #define DAY (86400 * HZ) |
1106 | 1106 | ||
1107 | /* Set an arbitrary timeout large enough not to ever expire, this save | 1107 | /* Set an arbitrary timeout large enough not to ever expire, this save |
1108 | * us a check for the IPS_OFFLOAD_BIT from the packet path via | 1108 | * us a check for the IPS_OFFLOAD_BIT from the packet path via |
1109 | * nf_ct_is_expired(). | 1109 | * nf_ct_is_expired(). |
1110 | */ | 1110 | */ |
1111 | static void nf_ct_offload_timeout(struct nf_conn *ct) | 1111 | static void nf_ct_offload_timeout(struct nf_conn *ct) |
1112 | { | 1112 | { |
1113 | if (nf_ct_expires(ct) < DAY / 2) | 1113 | if (nf_ct_expires(ct) < DAY / 2) |
1114 | ct->timeout = nfct_time_stamp + DAY; | 1114 | ct->timeout = nfct_time_stamp + DAY; |
1115 | } | 1115 | } |
1116 | 1116 | ||
1117 | static void gc_worker(struct work_struct *work) | 1117 | static void gc_worker(struct work_struct *work) |
1118 | { | 1118 | { |
1119 | unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); | 1119 | unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); |
1120 | unsigned int i, goal, buckets = 0, expired_count = 0; | 1120 | unsigned int i, goal, buckets = 0, expired_count = 0; |
1121 | unsigned int nf_conntrack_max95 = 0; | 1121 | unsigned int nf_conntrack_max95 = 0; |
1122 | struct conntrack_gc_work *gc_work; | 1122 | struct conntrack_gc_work *gc_work; |
1123 | unsigned int ratio, scanned = 0; | 1123 | unsigned int ratio, scanned = 0; |
1124 | unsigned long next_run; | 1124 | unsigned long next_run; |
1125 | 1125 | ||
1126 | gc_work = container_of(work, struct conntrack_gc_work, dwork.work); | 1126 | gc_work = container_of(work, struct conntrack_gc_work, dwork.work); |
1127 | 1127 | ||
1128 | goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; | 1128 | goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; |
1129 | i = gc_work->last_bucket; | 1129 | i = gc_work->last_bucket; |
1130 | if (gc_work->early_drop) | 1130 | if (gc_work->early_drop) |
1131 | nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; | 1131 | nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; |
1132 | 1132 | ||
1133 | do { | 1133 | do { |
1134 | struct nf_conntrack_tuple_hash *h; | 1134 | struct nf_conntrack_tuple_hash *h; |
1135 | struct hlist_nulls_head *ct_hash; | 1135 | struct hlist_nulls_head *ct_hash; |
1136 | struct hlist_nulls_node *n; | 1136 | struct hlist_nulls_node *n; |
1137 | unsigned int hashsz; | 1137 | unsigned int hashsz; |
1138 | struct nf_conn *tmp; | 1138 | struct nf_conn *tmp; |
1139 | 1139 | ||
1140 | i++; | 1140 | i++; |
1141 | rcu_read_lock(); | 1141 | rcu_read_lock(); |
1142 | 1142 | ||
1143 | nf_conntrack_get_ht(&ct_hash, &hashsz); | 1143 | nf_conntrack_get_ht(&ct_hash, &hashsz); |
1144 | if (i >= hashsz) | 1144 | if (i >= hashsz) |
1145 | i = 0; | 1145 | i = 0; |
1146 | 1146 | ||
1147 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { | 1147 | hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { |
1148 | struct net *net; | 1148 | struct net *net; |
1149 | 1149 | ||
1150 | tmp = nf_ct_tuplehash_to_ctrack(h); | 1150 | tmp = nf_ct_tuplehash_to_ctrack(h); |
1151 | 1151 | ||
1152 | scanned++; | 1152 | scanned++; |
1153 | if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { | 1153 | if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { |
1154 | nf_ct_offload_timeout(tmp); | 1154 | nf_ct_offload_timeout(tmp); |
1155 | continue; | 1155 | continue; |
1156 | } | 1156 | } |
1157 | 1157 | ||
1158 | if (nf_ct_is_expired(tmp)) { | 1158 | if (nf_ct_is_expired(tmp)) { |
1159 | nf_ct_gc_expired(tmp); | 1159 | nf_ct_gc_expired(tmp); |
1160 | expired_count++; | 1160 | expired_count++; |
1161 | continue; | 1161 | continue; |
1162 | } | 1162 | } |
1163 | 1163 | ||
1164 | if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) | 1164 | if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) |
1165 | continue; | 1165 | continue; |
1166 | 1166 | ||
1167 | net = nf_ct_net(tmp); | 1167 | net = nf_ct_net(tmp); |
1168 | if (atomic_read(&net->ct.count) < nf_conntrack_max95) | 1168 | if (atomic_read(&net->ct.count) < nf_conntrack_max95) |
1169 | continue; | 1169 | continue; |
1170 | 1170 | ||
1171 | /* need to take reference to avoid possible races */ | 1171 | /* need to take reference to avoid possible races */ |
1172 | if (!atomic_inc_not_zero(&tmp->ct_general.use)) | 1172 | if (!atomic_inc_not_zero(&tmp->ct_general.use)) |
1173 | continue; | 1173 | continue; |
1174 | 1174 | ||
1175 | if (gc_worker_skip_ct(tmp)) { | 1175 | if (gc_worker_skip_ct(tmp)) { |
1176 | nf_ct_put(tmp); | 1176 | nf_ct_put(tmp); |
1177 | continue; | 1177 | continue; |
1178 | } | 1178 | } |
1179 | 1179 | ||
1180 | if (gc_worker_can_early_drop(tmp)) | 1180 | if (gc_worker_can_early_drop(tmp)) |
1181 | nf_ct_kill(tmp); | 1181 | nf_ct_kill(tmp); |
1182 | 1182 | ||
1183 | nf_ct_put(tmp); | 1183 | nf_ct_put(tmp); |
1184 | } | 1184 | } |
1185 | 1185 | ||
1186 | /* could check get_nulls_value() here and restart if ct | 1186 | /* could check get_nulls_value() here and restart if ct |
1187 | * was moved to another chain. But given gc is best-effort | 1187 | * was moved to another chain. But given gc is best-effort |
1188 | * we will just continue with next hash slot. | 1188 | * we will just continue with next hash slot. |
1189 | */ | 1189 | */ |
1190 | rcu_read_unlock(); | 1190 | rcu_read_unlock(); |
1191 | cond_resched(); | 1191 | cond_resched(); |
1192 | } while (++buckets < goal); | 1192 | } while (++buckets < goal); |
1193 | 1193 | ||
1194 | if (gc_work->exiting) | 1194 | if (gc_work->exiting) |
1195 | return; | 1195 | return; |
1196 | 1196 | ||
1197 | /* | 1197 | /* |
1198 | * Eviction will normally happen from the packet path, and not | 1198 | * Eviction will normally happen from the packet path, and not |
1199 | * from this gc worker. | 1199 | * from this gc worker. |
1200 | * | 1200 | * |
1201 | * This worker is only here to reap expired entries when system went | 1201 | * This worker is only here to reap expired entries when system went |
1202 | * idle after a busy period. | 1202 | * idle after a busy period. |
1203 | * | 1203 | * |
1204 | * The heuristics below are supposed to balance conflicting goals: | 1204 | * The heuristics below are supposed to balance conflicting goals: |
1205 | * | 1205 | * |
1206 | * 1. Minimize time until we notice a stale entry | 1206 | * 1. Minimize time until we notice a stale entry |
1207 | * 2. Maximize scan intervals to not waste cycles | 1207 | * 2. Maximize scan intervals to not waste cycles |
1208 | * | 1208 | * |
1209 | * Normally, expire ratio will be close to 0. | 1209 | * Normally, expire ratio will be close to 0. |
1210 | * | 1210 | * |
1211 | * As soon as a sizeable fraction of the entries have expired | 1211 | * As soon as a sizeable fraction of the entries have expired |
1212 | * increase scan frequency. | 1212 | * increase scan frequency. |
1213 | */ | 1213 | */ |
1214 | ratio = scanned ? expired_count * 100 / scanned : 0; | 1214 | ratio = scanned ? expired_count * 100 / scanned : 0; |
1215 | if (ratio > GC_EVICT_RATIO) { | 1215 | if (ratio > GC_EVICT_RATIO) { |
1216 | gc_work->next_gc_run = min_interval; | 1216 | gc_work->next_gc_run = min_interval; |
1217 | } else { | 1217 | } else { |
1218 | unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; | 1218 | unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; |
1219 | 1219 | ||
1220 | BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); | 1220 | BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); |
1221 | 1221 | ||
1222 | gc_work->next_gc_run += min_interval; | 1222 | gc_work->next_gc_run += min_interval; |
1223 | if (gc_work->next_gc_run > max) | 1223 | if (gc_work->next_gc_run > max) |
1224 | gc_work->next_gc_run = max; | 1224 | gc_work->next_gc_run = max; |
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | next_run = gc_work->next_gc_run; | 1227 | next_run = gc_work->next_gc_run; |
1228 | gc_work->last_bucket = i; | 1228 | gc_work->last_bucket = i; |
1229 | gc_work->early_drop = false; | 1229 | gc_work->early_drop = false; |
1230 | queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); | 1230 | queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); |
1231 | } | 1231 | } |
1232 | 1232 | ||
1233 | static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) | 1233 | static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) |
1234 | { | 1234 | { |
1235 | INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); | 1235 | INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); |
1236 | gc_work->next_gc_run = HZ; | 1236 | gc_work->next_gc_run = HZ; |
1237 | gc_work->exiting = false; | 1237 | gc_work->exiting = false; |
1238 | } | 1238 | } |
1239 | 1239 | ||
1240 | static struct nf_conn * | 1240 | static struct nf_conn * |
1241 | __nf_conntrack_alloc(struct net *net, | 1241 | __nf_conntrack_alloc(struct net *net, |
1242 | const struct nf_conntrack_zone *zone, | 1242 | const struct nf_conntrack_zone *zone, |
1243 | const struct nf_conntrack_tuple *orig, | 1243 | const struct nf_conntrack_tuple *orig, |
1244 | const struct nf_conntrack_tuple *repl, | 1244 | const struct nf_conntrack_tuple *repl, |
1245 | gfp_t gfp, u32 hash) | 1245 | gfp_t gfp, u32 hash) |
1246 | { | 1246 | { |
1247 | struct nf_conn *ct; | 1247 | struct nf_conn *ct; |
1248 | 1248 | ||
1249 | /* We don't want any race condition at early drop stage */ | 1249 | /* We don't want any race condition at early drop stage */ |
1250 | atomic_inc(&net->ct.count); | 1250 | atomic_inc(&net->ct.count); |
1251 | 1251 | ||
1252 | if (nf_conntrack_max && | 1252 | if (nf_conntrack_max && |
1253 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { | 1253 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { |
1254 | if (!early_drop(net, hash)) { | 1254 | if (!early_drop(net, hash)) { |
1255 | if (!conntrack_gc_work.early_drop) | 1255 | if (!conntrack_gc_work.early_drop) |
1256 | conntrack_gc_work.early_drop = true; | 1256 | conntrack_gc_work.early_drop = true; |
1257 | atomic_dec(&net->ct.count); | 1257 | atomic_dec(&net->ct.count); |
1258 | net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); | 1258 | net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); |
1259 | return ERR_PTR(-ENOMEM); | 1259 | return ERR_PTR(-ENOMEM); |
1260 | } | 1260 | } |
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | /* | 1263 | /* |
1264 | * Do not use kmem_cache_zalloc(), as this cache uses | 1264 | * Do not use kmem_cache_zalloc(), as this cache uses |
1265 | * SLAB_TYPESAFE_BY_RCU. | 1265 | * SLAB_TYPESAFE_BY_RCU. |
1266 | */ | 1266 | */ |
1267 | ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); | 1267 | ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); |
1268 | if (ct == NULL) | 1268 | if (ct == NULL) |
1269 | goto out; | 1269 | goto out; |
1270 | 1270 | ||
1271 | spin_lock_init(&ct->lock); | 1271 | spin_lock_init(&ct->lock); |
1272 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | 1272 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; |
1273 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; | 1273 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; |
1274 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; | 1274 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; |
1275 | /* save hash for reusing when confirming */ | 1275 | /* save hash for reusing when confirming */ |
1276 | *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; | 1276 | *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; |
1277 | ct->status = 0; | 1277 | ct->status = 0; |
1278 | write_pnet(&ct->ct_net, net); | 1278 | write_pnet(&ct->ct_net, net); |
1279 | memset(&ct->__nfct_init_offset[0], 0, | 1279 | memset(&ct->__nfct_init_offset[0], 0, |
1280 | offsetof(struct nf_conn, proto) - | 1280 | offsetof(struct nf_conn, proto) - |
1281 | offsetof(struct nf_conn, __nfct_init_offset[0])); | 1281 | offsetof(struct nf_conn, __nfct_init_offset[0])); |
1282 | 1282 | ||
1283 | nf_ct_zone_add(ct, zone); | 1283 | nf_ct_zone_add(ct, zone); |
1284 | 1284 | ||
1285 | /* Because we use RCU lookups, we set ct_general.use to zero before | 1285 | /* Because we use RCU lookups, we set ct_general.use to zero before |
1286 | * this is inserted in any list. | 1286 | * this is inserted in any list. |
1287 | */ | 1287 | */ |
1288 | atomic_set(&ct->ct_general.use, 0); | 1288 | atomic_set(&ct->ct_general.use, 0); |
1289 | return ct; | 1289 | return ct; |
1290 | out: | 1290 | out: |
1291 | atomic_dec(&net->ct.count); | 1291 | atomic_dec(&net->ct.count); |
1292 | return ERR_PTR(-ENOMEM); | 1292 | return ERR_PTR(-ENOMEM); |
1293 | } | 1293 | } |
1294 | 1294 | ||
1295 | struct nf_conn *nf_conntrack_alloc(struct net *net, | 1295 | struct nf_conn *nf_conntrack_alloc(struct net *net, |
1296 | const struct nf_conntrack_zone *zone, | 1296 | const struct nf_conntrack_zone *zone, |
1297 | const struct nf_conntrack_tuple *orig, | 1297 | const struct nf_conntrack_tuple *orig, |
1298 | const struct nf_conntrack_tuple *repl, | 1298 | const struct nf_conntrack_tuple *repl, |
1299 | gfp_t gfp) | 1299 | gfp_t gfp) |
1300 | { | 1300 | { |
1301 | return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); | 1301 | return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); |
1302 | } | 1302 | } |
1303 | EXPORT_SYMBOL_GPL(nf_conntrack_alloc); | 1303 | EXPORT_SYMBOL_GPL(nf_conntrack_alloc); |
1304 | 1304 | ||
1305 | void nf_conntrack_free(struct nf_conn *ct) | 1305 | void nf_conntrack_free(struct nf_conn *ct) |
1306 | { | 1306 | { |
1307 | struct net *net = nf_ct_net(ct); | 1307 | struct net *net = nf_ct_net(ct); |
1308 | 1308 | ||
1309 | /* A freed object has refcnt == 0, that's | 1309 | /* A freed object has refcnt == 0, that's |
1310 | * the golden rule for SLAB_TYPESAFE_BY_RCU | 1310 | * the golden rule for SLAB_TYPESAFE_BY_RCU |
1311 | */ | 1311 | */ |
1312 | WARN_ON(atomic_read(&ct->ct_general.use) != 0); | 1312 | WARN_ON(atomic_read(&ct->ct_general.use) != 0); |
1313 | 1313 | ||
1314 | nf_ct_ext_destroy(ct); | 1314 | nf_ct_ext_destroy(ct); |
1315 | nf_ct_ext_free(ct); | 1315 | nf_ct_ext_free(ct); |
1316 | kmem_cache_free(nf_conntrack_cachep, ct); | 1316 | kmem_cache_free(nf_conntrack_cachep, ct); |
1317 | smp_mb__before_atomic(); | 1317 | smp_mb__before_atomic(); |
1318 | atomic_dec(&net->ct.count); | 1318 | atomic_dec(&net->ct.count); |
1319 | } | 1319 | } |
1320 | EXPORT_SYMBOL_GPL(nf_conntrack_free); | 1320 | EXPORT_SYMBOL_GPL(nf_conntrack_free); |
1321 | 1321 | ||
1322 | 1322 | ||
1323 | /* Allocate a new conntrack: we return -ENOMEM if classification | 1323 | /* Allocate a new conntrack: we return -ENOMEM if classification |
1324 | failed due to stress. Otherwise it really is unclassifiable. */ | 1324 | failed due to stress. Otherwise it really is unclassifiable. */ |
1325 | static noinline struct nf_conntrack_tuple_hash * | 1325 | static noinline struct nf_conntrack_tuple_hash * |
1326 | init_conntrack(struct net *net, struct nf_conn *tmpl, | 1326 | init_conntrack(struct net *net, struct nf_conn *tmpl, |
1327 | const struct nf_conntrack_tuple *tuple, | 1327 | const struct nf_conntrack_tuple *tuple, |
1328 | const struct nf_conntrack_l4proto *l4proto, | 1328 | const struct nf_conntrack_l4proto *l4proto, |
1329 | struct sk_buff *skb, | 1329 | struct sk_buff *skb, |
1330 | unsigned int dataoff, u32 hash) | 1330 | unsigned int dataoff, u32 hash) |
1331 | { | 1331 | { |
1332 | struct nf_conn *ct; | 1332 | struct nf_conn *ct; |
1333 | struct nf_conn_help *help; | 1333 | struct nf_conn_help *help; |
1334 | struct nf_conntrack_tuple repl_tuple; | 1334 | struct nf_conntrack_tuple repl_tuple; |
1335 | struct nf_conntrack_ecache *ecache; | 1335 | struct nf_conntrack_ecache *ecache; |
1336 | struct nf_conntrack_expect *exp = NULL; | 1336 | struct nf_conntrack_expect *exp = NULL; |
1337 | const struct nf_conntrack_zone *zone; | 1337 | const struct nf_conntrack_zone *zone; |
1338 | struct nf_conn_timeout *timeout_ext; | 1338 | struct nf_conn_timeout *timeout_ext; |
1339 | struct nf_conntrack_zone tmp; | 1339 | struct nf_conntrack_zone tmp; |
1340 | unsigned int *timeouts; | ||
1341 | 1340 | ||
1342 | if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { | 1341 | if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { |
1343 | pr_debug("Can't invert tuple.\n"); | 1342 | pr_debug("Can't invert tuple.\n"); |
1344 | return NULL; | 1343 | return NULL; |
1345 | } | 1344 | } |
1346 | 1345 | ||
1347 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); | 1346 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); |
1348 | ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, | 1347 | ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, |
1349 | hash); | 1348 | hash); |
1350 | if (IS_ERR(ct)) | 1349 | if (IS_ERR(ct)) |
1351 | return (struct nf_conntrack_tuple_hash *)ct; | 1350 | return (struct nf_conntrack_tuple_hash *)ct; |
1352 | 1351 | ||
1353 | if (!nf_ct_add_synproxy(ct, tmpl)) { | 1352 | if (!nf_ct_add_synproxy(ct, tmpl)) { |
1354 | nf_conntrack_free(ct); | 1353 | nf_conntrack_free(ct); |
1355 | return ERR_PTR(-ENOMEM); | 1354 | return ERR_PTR(-ENOMEM); |
1356 | } | 1355 | } |
1357 | 1356 | ||
1358 | timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; | 1357 | timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; |
1359 | if (timeout_ext) { | ||
1360 | timeouts = nf_ct_timeout_data(timeout_ext); | ||
1361 | if (unlikely(!timeouts)) | ||
1362 | timeouts = l4proto->get_timeouts(net); | ||
1363 | } else { | ||
1364 | timeouts = l4proto->get_timeouts(net); | ||
1365 | } | ||
1366 | 1358 | ||
1367 | if (!l4proto->new(ct, skb, dataoff, timeouts)) { | 1359 | if (!l4proto->new(ct, skb, dataoff)) { |
1368 | nf_conntrack_free(ct); | 1360 | nf_conntrack_free(ct); |
1369 | pr_debug("can't track with proto module\n"); | 1361 | pr_debug("can't track with proto module\n"); |
1370 | return NULL; | 1362 | return NULL; |
1371 | } | 1363 | } |
1372 | 1364 | ||
1373 | if (timeout_ext) | 1365 | if (timeout_ext) |
1374 | nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), | 1366 | nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), |
1375 | GFP_ATOMIC); | 1367 | GFP_ATOMIC); |
1376 | 1368 | ||
1377 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); | 1369 | nf_ct_acct_ext_add(ct, GFP_ATOMIC); |
1378 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | 1370 | nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); |
1379 | nf_ct_labels_ext_add(ct); | 1371 | nf_ct_labels_ext_add(ct); |
1380 | 1372 | ||
1381 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; | 1373 | ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; |
1382 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, | 1374 | nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, |
1383 | ecache ? ecache->expmask : 0, | 1375 | ecache ? ecache->expmask : 0, |
1384 | GFP_ATOMIC); | 1376 | GFP_ATOMIC); |
1385 | 1377 | ||
1386 | local_bh_disable(); | 1378 | local_bh_disable(); |
1387 | if (net->ct.expect_count) { | 1379 | if (net->ct.expect_count) { |
1388 | spin_lock(&nf_conntrack_expect_lock); | 1380 | spin_lock(&nf_conntrack_expect_lock); |
1389 | exp = nf_ct_find_expectation(net, zone, tuple); | 1381 | exp = nf_ct_find_expectation(net, zone, tuple); |
1390 | if (exp) { | 1382 | if (exp) { |
1391 | pr_debug("expectation arrives ct=%p exp=%p\n", | 1383 | pr_debug("expectation arrives ct=%p exp=%p\n", |
1392 | ct, exp); | 1384 | ct, exp); |
1393 | /* Welcome, Mr. Bond. We've been expecting you... */ | 1385 | /* Welcome, Mr. Bond. We've been expecting you... */ |
1394 | __set_bit(IPS_EXPECTED_BIT, &ct->status); | 1386 | __set_bit(IPS_EXPECTED_BIT, &ct->status); |
1395 | /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ | 1387 | /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ |
1396 | ct->master = exp->master; | 1388 | ct->master = exp->master; |
1397 | if (exp->helper) { | 1389 | if (exp->helper) { |
1398 | help = nf_ct_helper_ext_add(ct, exp->helper, | 1390 | help = nf_ct_helper_ext_add(ct, exp->helper, |
1399 | GFP_ATOMIC); | 1391 | GFP_ATOMIC); |
1400 | if (help) | 1392 | if (help) |
1401 | rcu_assign_pointer(help->helper, exp->helper); | 1393 | rcu_assign_pointer(help->helper, exp->helper); |
1402 | } | 1394 | } |
1403 | 1395 | ||
1404 | #ifdef CONFIG_NF_CONNTRACK_MARK | 1396 | #ifdef CONFIG_NF_CONNTRACK_MARK |
1405 | ct->mark = exp->master->mark; | 1397 | ct->mark = exp->master->mark; |
1406 | #endif | 1398 | #endif |
1407 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 1399 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
1408 | ct->secmark = exp->master->secmark; | 1400 | ct->secmark = exp->master->secmark; |
1409 | #endif | 1401 | #endif |
1410 | NF_CT_STAT_INC(net, expect_new); | 1402 | NF_CT_STAT_INC(net, expect_new); |
1411 | } | 1403 | } |
1412 | spin_unlock(&nf_conntrack_expect_lock); | 1404 | spin_unlock(&nf_conntrack_expect_lock); |
1413 | } | 1405 | } |
1414 | if (!exp) | 1406 | if (!exp) |
1415 | __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); | 1407 | __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); |
1416 | 1408 | ||
1417 | /* Now it is inserted into the unconfirmed list, bump refcount */ | 1409 | /* Now it is inserted into the unconfirmed list, bump refcount */ |
1418 | nf_conntrack_get(&ct->ct_general); | 1410 | nf_conntrack_get(&ct->ct_general); |
1419 | nf_ct_add_to_unconfirmed_list(ct); | 1411 | nf_ct_add_to_unconfirmed_list(ct); |
1420 | 1412 | ||
1421 | local_bh_enable(); | 1413 | local_bh_enable(); |
1422 | 1414 | ||
1423 | if (exp) { | 1415 | if (exp) { |
1424 | if (exp->expectfn) | 1416 | if (exp->expectfn) |
1425 | exp->expectfn(ct, exp); | 1417 | exp->expectfn(ct, exp); |
1426 | nf_ct_expect_put(exp); | 1418 | nf_ct_expect_put(exp); |
1427 | } | 1419 | } |
1428 | 1420 | ||
1429 | return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; | 1421 | return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; |
1430 | } | 1422 | } |
1431 | 1423 | ||
1432 | /* On success, returns 0, sets skb->_nfct | ctinfo */ | 1424 | /* On success, returns 0, sets skb->_nfct | ctinfo */ |
1433 | static int | 1425 | static int |
1434 | resolve_normal_ct(struct net *net, struct nf_conn *tmpl, | 1426 | resolve_normal_ct(struct net *net, struct nf_conn *tmpl, |
1435 | struct sk_buff *skb, | 1427 | struct sk_buff *skb, |
1436 | unsigned int dataoff, | 1428 | unsigned int dataoff, |
1437 | u_int16_t l3num, | 1429 | u_int16_t l3num, |
1438 | u_int8_t protonum, | 1430 | u_int8_t protonum, |
1439 | const struct nf_conntrack_l4proto *l4proto) | 1431 | const struct nf_conntrack_l4proto *l4proto) |
1440 | { | 1432 | { |
1441 | const struct nf_conntrack_zone *zone; | 1433 | const struct nf_conntrack_zone *zone; |
1442 | struct nf_conntrack_tuple tuple; | 1434 | struct nf_conntrack_tuple tuple; |
1443 | struct nf_conntrack_tuple_hash *h; | 1435 | struct nf_conntrack_tuple_hash *h; |
1444 | enum ip_conntrack_info ctinfo; | 1436 | enum ip_conntrack_info ctinfo; |
1445 | struct nf_conntrack_zone tmp; | 1437 | struct nf_conntrack_zone tmp; |
1446 | struct nf_conn *ct; | 1438 | struct nf_conn *ct; |
1447 | u32 hash; | 1439 | u32 hash; |
1448 | 1440 | ||
1449 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), | 1441 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), |
1450 | dataoff, l3num, protonum, net, &tuple, l4proto)) { | 1442 | dataoff, l3num, protonum, net, &tuple, l4proto)) { |
1451 | pr_debug("Can't get tuple\n"); | 1443 | pr_debug("Can't get tuple\n"); |
1452 | return 0; | 1444 | return 0; |
1453 | } | 1445 | } |
1454 | 1446 | ||
1455 | /* look for tuple match */ | 1447 | /* look for tuple match */ |
1456 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); | 1448 | zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); |
1457 | hash = hash_conntrack_raw(&tuple, net); | 1449 | hash = hash_conntrack_raw(&tuple, net); |
1458 | h = __nf_conntrack_find_get(net, zone, &tuple, hash); | 1450 | h = __nf_conntrack_find_get(net, zone, &tuple, hash); |
1459 | if (!h) { | 1451 | if (!h) { |
1460 | h = init_conntrack(net, tmpl, &tuple, l4proto, | 1452 | h = init_conntrack(net, tmpl, &tuple, l4proto, |
1461 | skb, dataoff, hash); | 1453 | skb, dataoff, hash); |
1462 | if (!h) | 1454 | if (!h) |
1463 | return 0; | 1455 | return 0; |
1464 | if (IS_ERR(h)) | 1456 | if (IS_ERR(h)) |
1465 | return PTR_ERR(h); | 1457 | return PTR_ERR(h); |
1466 | } | 1458 | } |
1467 | ct = nf_ct_tuplehash_to_ctrack(h); | 1459 | ct = nf_ct_tuplehash_to_ctrack(h); |
1468 | 1460 | ||
1469 | /* It exists; we have (non-exclusive) reference. */ | 1461 | /* It exists; we have (non-exclusive) reference. */ |
1470 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { | 1462 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { |
1471 | ctinfo = IP_CT_ESTABLISHED_REPLY; | 1463 | ctinfo = IP_CT_ESTABLISHED_REPLY; |
1472 | } else { | 1464 | } else { |
1473 | /* Once we've had two way comms, always ESTABLISHED. */ | 1465 | /* Once we've had two way comms, always ESTABLISHED. */ |
1474 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | 1466 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { |
1475 | pr_debug("normal packet for %p\n", ct); | 1467 | pr_debug("normal packet for %p\n", ct); |
1476 | ctinfo = IP_CT_ESTABLISHED; | 1468 | ctinfo = IP_CT_ESTABLISHED; |
1477 | } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { | 1469 | } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { |
1478 | pr_debug("related packet for %p\n", ct); | 1470 | pr_debug("related packet for %p\n", ct); |
1479 | ctinfo = IP_CT_RELATED; | 1471 | ctinfo = IP_CT_RELATED; |
1480 | } else { | 1472 | } else { |
1481 | pr_debug("new packet for %p\n", ct); | 1473 | pr_debug("new packet for %p\n", ct); |
1482 | ctinfo = IP_CT_NEW; | 1474 | ctinfo = IP_CT_NEW; |
1483 | } | 1475 | } |
1484 | } | 1476 | } |
1485 | nf_ct_set(skb, ct, ctinfo); | 1477 | nf_ct_set(skb, ct, ctinfo); |
1486 | return 0; | 1478 | return 0; |
1487 | } | 1479 | } |
1488 | 1480 | ||
1489 | unsigned int | 1481 | unsigned int |
1490 | nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, | 1482 | nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, |
1491 | struct sk_buff *skb) | 1483 | struct sk_buff *skb) |
1492 | { | 1484 | { |
1493 | const struct nf_conntrack_l4proto *l4proto; | 1485 | const struct nf_conntrack_l4proto *l4proto; |
1494 | struct nf_conn *ct, *tmpl; | 1486 | struct nf_conn *ct, *tmpl; |
1495 | enum ip_conntrack_info ctinfo; | 1487 | enum ip_conntrack_info ctinfo; |
1496 | unsigned int *timeouts; | ||
1497 | u_int8_t protonum; | 1488 | u_int8_t protonum; |
1498 | int dataoff, ret; | 1489 | int dataoff, ret; |
1499 | 1490 | ||
1500 | tmpl = nf_ct_get(skb, &ctinfo); | 1491 | tmpl = nf_ct_get(skb, &ctinfo); |
1501 | if (tmpl || ctinfo == IP_CT_UNTRACKED) { | 1492 | if (tmpl || ctinfo == IP_CT_UNTRACKED) { |
1502 | /* Previously seen (loopback or untracked)? Ignore. */ | 1493 | /* Previously seen (loopback or untracked)? Ignore. */ |
1503 | if ((tmpl && !nf_ct_is_template(tmpl)) || | 1494 | if ((tmpl && !nf_ct_is_template(tmpl)) || |
1504 | ctinfo == IP_CT_UNTRACKED) { | 1495 | ctinfo == IP_CT_UNTRACKED) { |
1505 | NF_CT_STAT_INC_ATOMIC(net, ignore); | 1496 | NF_CT_STAT_INC_ATOMIC(net, ignore); |
1506 | return NF_ACCEPT; | 1497 | return NF_ACCEPT; |
1507 | } | 1498 | } |
1508 | skb->_nfct = 0; | 1499 | skb->_nfct = 0; |
1509 | } | 1500 | } |
1510 | 1501 | ||
1511 | /* rcu_read_lock()ed by nf_hook_thresh */ | 1502 | /* rcu_read_lock()ed by nf_hook_thresh */ |
1512 | dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum); | 1503 | dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum); |
1513 | if (dataoff <= 0) { | 1504 | if (dataoff <= 0) { |
1514 | pr_debug("not prepared to track yet or error occurred\n"); | 1505 | pr_debug("not prepared to track yet or error occurred\n"); |
1515 | NF_CT_STAT_INC_ATOMIC(net, error); | 1506 | NF_CT_STAT_INC_ATOMIC(net, error); |
1516 | NF_CT_STAT_INC_ATOMIC(net, invalid); | 1507 | NF_CT_STAT_INC_ATOMIC(net, invalid); |
1517 | ret = NF_ACCEPT; | 1508 | ret = NF_ACCEPT; |
1518 | goto out; | 1509 | goto out; |
1519 | } | 1510 | } |
1520 | 1511 | ||
1521 | l4proto = __nf_ct_l4proto_find(pf, protonum); | 1512 | l4proto = __nf_ct_l4proto_find(pf, protonum); |
1522 | 1513 | ||
1523 | /* It may be an special packet, error, unclean... | 1514 | /* It may be an special packet, error, unclean... |
1524 | * inverse of the return code tells to the netfilter | 1515 | * inverse of the return code tells to the netfilter |
1525 | * core what to do with the packet. */ | 1516 | * core what to do with the packet. */ |
1526 | if (l4proto->error != NULL) { | 1517 | if (l4proto->error != NULL) { |
1527 | ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); | 1518 | ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); |
1528 | if (ret <= 0) { | 1519 | if (ret <= 0) { |
1529 | NF_CT_STAT_INC_ATOMIC(net, error); | 1520 | NF_CT_STAT_INC_ATOMIC(net, error); |
1530 | NF_CT_STAT_INC_ATOMIC(net, invalid); | 1521 | NF_CT_STAT_INC_ATOMIC(net, invalid); |
1531 | ret = -ret; | 1522 | ret = -ret; |
1532 | goto out; | 1523 | goto out; |
1533 | } | 1524 | } |
1534 | /* ICMP[v6] protocol trackers may assign one conntrack. */ | 1525 | /* ICMP[v6] protocol trackers may assign one conntrack. */ |
1535 | if (skb->_nfct) | 1526 | if (skb->_nfct) |
1536 | goto out; | 1527 | goto out; |
1537 | } | 1528 | } |
1538 | repeat: | 1529 | repeat: |
1539 | ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto); | 1530 | ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto); |
1540 | if (ret < 0) { | 1531 | if (ret < 0) { |
1541 | /* Too stressed to deal. */ | 1532 | /* Too stressed to deal. */ |
1542 | NF_CT_STAT_INC_ATOMIC(net, drop); | 1533 | NF_CT_STAT_INC_ATOMIC(net, drop); |
1543 | ret = NF_DROP; | 1534 | ret = NF_DROP; |
1544 | goto out; | 1535 | goto out; |
1545 | } | 1536 | } |
1546 | 1537 | ||
1547 | ct = nf_ct_get(skb, &ctinfo); | 1538 | ct = nf_ct_get(skb, &ctinfo); |
1548 | if (!ct) { | 1539 | if (!ct) { |
1549 | /* Not valid part of a connection */ | 1540 | /* Not valid part of a connection */ |
1550 | NF_CT_STAT_INC_ATOMIC(net, invalid); | 1541 | NF_CT_STAT_INC_ATOMIC(net, invalid); |
1551 | ret = NF_ACCEPT; | 1542 | ret = NF_ACCEPT; |
1552 | goto out; | 1543 | goto out; |
1553 | } | 1544 | } |
1554 | 1545 | ||
1555 | /* Decide what timeout policy we want to apply to this flow. */ | 1546 | ret = l4proto->packet(ct, skb, dataoff, ctinfo); |
1556 | timeouts = nf_ct_timeout_lookup(net, ct, l4proto); | ||
1557 | |||
1558 | ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); | ||
1559 | if (ret <= 0) { | 1547 | if (ret <= 0) { |
1560 | /* Invalid: inverse of the return code tells | 1548 | /* Invalid: inverse of the return code tells |
1561 | * the netfilter core what to do */ | 1549 | * the netfilter core what to do */ |
1562 | pr_debug("nf_conntrack_in: Can't track with proto module\n"); | 1550 | pr_debug("nf_conntrack_in: Can't track with proto module\n"); |
1563 | nf_conntrack_put(&ct->ct_general); | 1551 | nf_conntrack_put(&ct->ct_general); |
1564 | skb->_nfct = 0; | 1552 | skb->_nfct = 0; |
1565 | NF_CT_STAT_INC_ATOMIC(net, invalid); | 1553 | NF_CT_STAT_INC_ATOMIC(net, invalid); |
1566 | if (ret == -NF_DROP) | 1554 | if (ret == -NF_DROP) |
1567 | NF_CT_STAT_INC_ATOMIC(net, drop); | 1555 | NF_CT_STAT_INC_ATOMIC(net, drop); |
1568 | /* Special case: TCP tracker reports an attempt to reopen a | 1556 | /* Special case: TCP tracker reports an attempt to reopen a |
1569 | * closed/aborted connection. We have to go back and create a | 1557 | * closed/aborted connection. We have to go back and create a |
1570 | * fresh conntrack. | 1558 | * fresh conntrack. |
1571 | */ | 1559 | */ |
1572 | if (ret == -NF_REPEAT) | 1560 | if (ret == -NF_REPEAT) |
1573 | goto repeat; | 1561 | goto repeat; |
1574 | ret = -ret; | 1562 | ret = -ret; |
1575 | goto out; | 1563 | goto out; |
1576 | } | 1564 | } |
1577 | 1565 | ||
1578 | if (ctinfo == IP_CT_ESTABLISHED_REPLY && | 1566 | if (ctinfo == IP_CT_ESTABLISHED_REPLY && |
1579 | !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) | 1567 | !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) |
1580 | nf_conntrack_event_cache(IPCT_REPLY, ct); | 1568 | nf_conntrack_event_cache(IPCT_REPLY, ct); |
1581 | out: | 1569 | out: |
1582 | if (tmpl) | 1570 | if (tmpl) |
1583 | nf_ct_put(tmpl); | 1571 | nf_ct_put(tmpl); |
1584 | 1572 | ||
1585 | return ret; | 1573 | return ret; |
1586 | } | 1574 | } |
1587 | EXPORT_SYMBOL_GPL(nf_conntrack_in); | 1575 | EXPORT_SYMBOL_GPL(nf_conntrack_in); |
1588 | 1576 | ||
1589 | bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, | 1577 | bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, |
1590 | const struct nf_conntrack_tuple *orig) | 1578 | const struct nf_conntrack_tuple *orig) |
1591 | { | 1579 | { |
1592 | bool ret; | 1580 | bool ret; |
1593 | 1581 | ||
1594 | rcu_read_lock(); | 1582 | rcu_read_lock(); |
1595 | ret = nf_ct_invert_tuple(inverse, orig, | 1583 | ret = nf_ct_invert_tuple(inverse, orig, |
1596 | __nf_ct_l4proto_find(orig->src.l3num, | 1584 | __nf_ct_l4proto_find(orig->src.l3num, |
1597 | orig->dst.protonum)); | 1585 | orig->dst.protonum)); |
1598 | rcu_read_unlock(); | 1586 | rcu_read_unlock(); |
1599 | return ret; | 1587 | return ret; |
1600 | } | 1588 | } |
1601 | EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); | 1589 | EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); |
1602 | 1590 | ||
1603 | /* Alter reply tuple (maybe alter helper). This is for NAT, and is | 1591 | /* Alter reply tuple (maybe alter helper). This is for NAT, and is |
1604 | implicitly racy: see __nf_conntrack_confirm */ | 1592 | implicitly racy: see __nf_conntrack_confirm */ |
1605 | void nf_conntrack_alter_reply(struct nf_conn *ct, | 1593 | void nf_conntrack_alter_reply(struct nf_conn *ct, |
1606 | const struct nf_conntrack_tuple *newreply) | 1594 | const struct nf_conntrack_tuple *newreply) |
1607 | { | 1595 | { |
1608 | struct nf_conn_help *help = nfct_help(ct); | 1596 | struct nf_conn_help *help = nfct_help(ct); |
1609 | 1597 | ||
1610 | /* Should be unconfirmed, so not in hash table yet */ | 1598 | /* Should be unconfirmed, so not in hash table yet */ |
1611 | WARN_ON(nf_ct_is_confirmed(ct)); | 1599 | WARN_ON(nf_ct_is_confirmed(ct)); |
1612 | 1600 | ||
1613 | pr_debug("Altering reply tuple of %p to ", ct); | 1601 | pr_debug("Altering reply tuple of %p to ", ct); |
1614 | nf_ct_dump_tuple(newreply); | 1602 | nf_ct_dump_tuple(newreply); |
1615 | 1603 | ||
1616 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | 1604 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; |
1617 | if (ct->master || (help && !hlist_empty(&help->expectations))) | 1605 | if (ct->master || (help && !hlist_empty(&help->expectations))) |
1618 | return; | 1606 | return; |
1619 | 1607 | ||
1620 | rcu_read_lock(); | 1608 | rcu_read_lock(); |
1621 | __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); | 1609 | __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); |
1622 | rcu_read_unlock(); | 1610 | rcu_read_unlock(); |
1623 | } | 1611 | } |
1624 | EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); | 1612 | EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); |
1625 | 1613 | ||
1626 | /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ | 1614 | /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ |
1627 | void __nf_ct_refresh_acct(struct nf_conn *ct, | 1615 | void __nf_ct_refresh_acct(struct nf_conn *ct, |
1628 | enum ip_conntrack_info ctinfo, | 1616 | enum ip_conntrack_info ctinfo, |
1629 | const struct sk_buff *skb, | 1617 | const struct sk_buff *skb, |
1630 | unsigned long extra_jiffies, | 1618 | unsigned long extra_jiffies, |
1631 | int do_acct) | 1619 | int do_acct) |
1632 | { | 1620 | { |
1633 | WARN_ON(!skb); | 1621 | WARN_ON(!skb); |
1634 | 1622 | ||
1635 | /* Only update if this is not a fixed timeout */ | 1623 | /* Only update if this is not a fixed timeout */ |
1636 | if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) | 1624 | if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) |
1637 | goto acct; | 1625 | goto acct; |
1638 | 1626 | ||
1639 | /* If not in hash table, timer will not be active yet */ | 1627 | /* If not in hash table, timer will not be active yet */ |
1640 | if (nf_ct_is_confirmed(ct)) | 1628 | if (nf_ct_is_confirmed(ct)) |
1641 | extra_jiffies += nfct_time_stamp; | 1629 | extra_jiffies += nfct_time_stamp; |
1642 | 1630 | ||
1643 | ct->timeout = extra_jiffies; | 1631 | ct->timeout = extra_jiffies; |
1644 | acct: | 1632 | acct: |
1645 | if (do_acct) | 1633 | if (do_acct) |
1646 | nf_ct_acct_update(ct, ctinfo, skb->len); | 1634 | nf_ct_acct_update(ct, ctinfo, skb->len); |
1647 | } | 1635 | } |
1648 | EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); | 1636 | EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); |
1649 | 1637 | ||
1650 | bool nf_ct_kill_acct(struct nf_conn *ct, | 1638 | bool nf_ct_kill_acct(struct nf_conn *ct, |
1651 | enum ip_conntrack_info ctinfo, | 1639 | enum ip_conntrack_info ctinfo, |
1652 | const struct sk_buff *skb) | 1640 | const struct sk_buff *skb) |
1653 | { | 1641 | { |
1654 | nf_ct_acct_update(ct, ctinfo, skb->len); | 1642 | nf_ct_acct_update(ct, ctinfo, skb->len); |
1655 | 1643 | ||
1656 | return nf_ct_delete(ct, 0, 0); | 1644 | return nf_ct_delete(ct, 0, 0); |
1657 | } | 1645 | } |
1658 | EXPORT_SYMBOL_GPL(nf_ct_kill_acct); | 1646 | EXPORT_SYMBOL_GPL(nf_ct_kill_acct); |
1659 | 1647 | ||
1660 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 1648 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
1661 | 1649 | ||
1662 | #include <linux/netfilter/nfnetlink.h> | 1650 | #include <linux/netfilter/nfnetlink.h> |
1663 | #include <linux/netfilter/nfnetlink_conntrack.h> | 1651 | #include <linux/netfilter/nfnetlink_conntrack.h> |
1664 | #include <linux/mutex.h> | 1652 | #include <linux/mutex.h> |
1665 | 1653 | ||
1666 | /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be | 1654 | /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be |
1667 | * in ip_conntrack_core, since we don't want the protocols to autoload | 1655 | * in ip_conntrack_core, since we don't want the protocols to autoload |
1668 | * or depend on ctnetlink */ | 1656 | * or depend on ctnetlink */ |
1669 | int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, | 1657 | int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, |
1670 | const struct nf_conntrack_tuple *tuple) | 1658 | const struct nf_conntrack_tuple *tuple) |
1671 | { | 1659 | { |
1672 | if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || | 1660 | if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || |
1673 | nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) | 1661 | nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) |
1674 | goto nla_put_failure; | 1662 | goto nla_put_failure; |
1675 | return 0; | 1663 | return 0; |
1676 | 1664 | ||
1677 | nla_put_failure: | 1665 | nla_put_failure: |
1678 | return -1; | 1666 | return -1; |
1679 | } | 1667 | } |
1680 | EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); | 1668 | EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); |
1681 | 1669 | ||
1682 | const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { | 1670 | const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { |
1683 | [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, | 1671 | [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, |
1684 | [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, | 1672 | [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, |
1685 | }; | 1673 | }; |
1686 | EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); | 1674 | EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); |
1687 | 1675 | ||
1688 | int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], | 1676 | int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], |
1689 | struct nf_conntrack_tuple *t) | 1677 | struct nf_conntrack_tuple *t) |
1690 | { | 1678 | { |
1691 | if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) | 1679 | if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) |
1692 | return -EINVAL; | 1680 | return -EINVAL; |
1693 | 1681 | ||
1694 | t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); | 1682 | t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); |
1695 | t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); | 1683 | t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); |
1696 | 1684 | ||
1697 | return 0; | 1685 | return 0; |
1698 | } | 1686 | } |
1699 | EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); | 1687 | EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); |
1700 | 1688 | ||
1701 | unsigned int nf_ct_port_nlattr_tuple_size(void) | 1689 | unsigned int nf_ct_port_nlattr_tuple_size(void) |
1702 | { | 1690 | { |
1703 | static unsigned int size __read_mostly; | 1691 | static unsigned int size __read_mostly; |
1704 | 1692 | ||
1705 | if (!size) | 1693 | if (!size) |
1706 | size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); | 1694 | size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); |
1707 | 1695 | ||
1708 | return size; | 1696 | return size; |
1709 | } | 1697 | } |
1710 | EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); | 1698 | EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); |
1711 | #endif | 1699 | #endif |
1712 | 1700 | ||
1713 | /* Used by ipt_REJECT and ip6t_REJECT. */ | 1701 | /* Used by ipt_REJECT and ip6t_REJECT. */ |
1714 | static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) | 1702 | static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) |
1715 | { | 1703 | { |
1716 | struct nf_conn *ct; | 1704 | struct nf_conn *ct; |
1717 | enum ip_conntrack_info ctinfo; | 1705 | enum ip_conntrack_info ctinfo; |
1718 | 1706 | ||
1719 | /* This ICMP is in reverse direction to the packet which caused it */ | 1707 | /* This ICMP is in reverse direction to the packet which caused it */ |
1720 | ct = nf_ct_get(skb, &ctinfo); | 1708 | ct = nf_ct_get(skb, &ctinfo); |
1721 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) | 1709 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) |
1722 | ctinfo = IP_CT_RELATED_REPLY; | 1710 | ctinfo = IP_CT_RELATED_REPLY; |
1723 | else | 1711 | else |
1724 | ctinfo = IP_CT_RELATED; | 1712 | ctinfo = IP_CT_RELATED; |
1725 | 1713 | ||
1726 | /* Attach to new skbuff, and increment count */ | 1714 | /* Attach to new skbuff, and increment count */ |
1727 | nf_ct_set(nskb, ct, ctinfo); | 1715 | nf_ct_set(nskb, ct, ctinfo); |
1728 | nf_conntrack_get(skb_nfct(nskb)); | 1716 | nf_conntrack_get(skb_nfct(nskb)); |
1729 | } | 1717 | } |
1730 | 1718 | ||
1731 | static int nf_conntrack_update(struct net *net, struct sk_buff *skb) | 1719 | static int nf_conntrack_update(struct net *net, struct sk_buff *skb) |
1732 | { | 1720 | { |
1733 | const struct nf_conntrack_l4proto *l4proto; | 1721 | const struct nf_conntrack_l4proto *l4proto; |
1734 | struct nf_conntrack_tuple_hash *h; | 1722 | struct nf_conntrack_tuple_hash *h; |
1735 | struct nf_conntrack_tuple tuple; | 1723 | struct nf_conntrack_tuple tuple; |
1736 | enum ip_conntrack_info ctinfo; | 1724 | enum ip_conntrack_info ctinfo; |
1737 | struct nf_nat_hook *nat_hook; | 1725 | struct nf_nat_hook *nat_hook; |
1738 | unsigned int status; | 1726 | unsigned int status; |
1739 | struct nf_conn *ct; | 1727 | struct nf_conn *ct; |
1740 | int dataoff; | 1728 | int dataoff; |
1741 | u16 l3num; | 1729 | u16 l3num; |
1742 | u8 l4num; | 1730 | u8 l4num; |
1743 | 1731 | ||
1744 | ct = nf_ct_get(skb, &ctinfo); | 1732 | ct = nf_ct_get(skb, &ctinfo); |
1745 | if (!ct || nf_ct_is_confirmed(ct)) | 1733 | if (!ct || nf_ct_is_confirmed(ct)) |
1746 | return 0; | 1734 | return 0; |
1747 | 1735 | ||
1748 | l3num = nf_ct_l3num(ct); | 1736 | l3num = nf_ct_l3num(ct); |
1749 | 1737 | ||
1750 | dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); | 1738 | dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); |
1751 | if (dataoff <= 0) | 1739 | if (dataoff <= 0) |
1752 | return -1; | 1740 | return -1; |
1753 | 1741 | ||
1754 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); | 1742 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); |
1755 | 1743 | ||
1756 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, | 1744 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, |
1757 | l4num, net, &tuple, l4proto)) | 1745 | l4num, net, &tuple, l4proto)) |
1758 | return -1; | 1746 | return -1; |
1759 | 1747 | ||
1760 | if (ct->status & IPS_SRC_NAT) { | 1748 | if (ct->status & IPS_SRC_NAT) { |
1761 | memcpy(tuple.src.u3.all, | 1749 | memcpy(tuple.src.u3.all, |
1762 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all, | 1750 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all, |
1763 | sizeof(tuple.src.u3.all)); | 1751 | sizeof(tuple.src.u3.all)); |
1764 | tuple.src.u.all = | 1752 | tuple.src.u.all = |
1765 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all; | 1753 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all; |
1766 | } | 1754 | } |
1767 | 1755 | ||
1768 | if (ct->status & IPS_DST_NAT) { | 1756 | if (ct->status & IPS_DST_NAT) { |
1769 | memcpy(tuple.dst.u3.all, | 1757 | memcpy(tuple.dst.u3.all, |
1770 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all, | 1758 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all, |
1771 | sizeof(tuple.dst.u3.all)); | 1759 | sizeof(tuple.dst.u3.all)); |
1772 | tuple.dst.u.all = | 1760 | tuple.dst.u.all = |
1773 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all; | 1761 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all; |
1774 | } | 1762 | } |
1775 | 1763 | ||
1776 | h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); | 1764 | h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); |
1777 | if (!h) | 1765 | if (!h) |
1778 | return 0; | 1766 | return 0; |
1779 | 1767 | ||
1780 | /* Store status bits of the conntrack that is clashing to re-do NAT | 1768 | /* Store status bits of the conntrack that is clashing to re-do NAT |
1781 | * mangling according to what it has been done already to this packet. | 1769 | * mangling according to what it has been done already to this packet. |
1782 | */ | 1770 | */ |
1783 | status = ct->status; | 1771 | status = ct->status; |
1784 | 1772 | ||
1785 | nf_ct_put(ct); | 1773 | nf_ct_put(ct); |
1786 | ct = nf_ct_tuplehash_to_ctrack(h); | 1774 | ct = nf_ct_tuplehash_to_ctrack(h); |
1787 | nf_ct_set(skb, ct, ctinfo); | 1775 | nf_ct_set(skb, ct, ctinfo); |
1788 | 1776 | ||
1789 | nat_hook = rcu_dereference(nf_nat_hook); | 1777 | nat_hook = rcu_dereference(nf_nat_hook); |
1790 | if (!nat_hook) | 1778 | if (!nat_hook) |
1791 | return 0; | 1779 | return 0; |
1792 | 1780 | ||
1793 | if (status & IPS_SRC_NAT && | 1781 | if (status & IPS_SRC_NAT && |
1794 | nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, | 1782 | nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, |
1795 | IP_CT_DIR_ORIGINAL) == NF_DROP) | 1783 | IP_CT_DIR_ORIGINAL) == NF_DROP) |
1796 | return -1; | 1784 | return -1; |
1797 | 1785 | ||
1798 | if (status & IPS_DST_NAT && | 1786 | if (status & IPS_DST_NAT && |
1799 | nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, | 1787 | nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, |
1800 | IP_CT_DIR_ORIGINAL) == NF_DROP) | 1788 | IP_CT_DIR_ORIGINAL) == NF_DROP) |
1801 | return -1; | 1789 | return -1; |
1802 | 1790 | ||
1803 | return 0; | 1791 | return 0; |
1804 | } | 1792 | } |
1805 | 1793 | ||
1806 | static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, | 1794 | static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, |
1807 | const struct sk_buff *skb) | 1795 | const struct sk_buff *skb) |
1808 | { | 1796 | { |
1809 | const struct nf_conntrack_tuple *src_tuple; | 1797 | const struct nf_conntrack_tuple *src_tuple; |
1810 | const struct nf_conntrack_tuple_hash *hash; | 1798 | const struct nf_conntrack_tuple_hash *hash; |
1811 | struct nf_conntrack_tuple srctuple; | 1799 | struct nf_conntrack_tuple srctuple; |
1812 | enum ip_conntrack_info ctinfo; | 1800 | enum ip_conntrack_info ctinfo; |
1813 | struct nf_conn *ct; | 1801 | struct nf_conn *ct; |
1814 | 1802 | ||
1815 | ct = nf_ct_get(skb, &ctinfo); | 1803 | ct = nf_ct_get(skb, &ctinfo); |
1816 | if (ct) { | 1804 | if (ct) { |
1817 | src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); | 1805 | src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); |
1818 | memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); | 1806 | memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); |
1819 | return true; | 1807 | return true; |
1820 | } | 1808 | } |
1821 | 1809 | ||
1822 | if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), | 1810 | if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), |
1823 | NFPROTO_IPV4, dev_net(skb->dev), | 1811 | NFPROTO_IPV4, dev_net(skb->dev), |
1824 | &srctuple)) | 1812 | &srctuple)) |
1825 | return false; | 1813 | return false; |
1826 | 1814 | ||
1827 | hash = nf_conntrack_find_get(dev_net(skb->dev), | 1815 | hash = nf_conntrack_find_get(dev_net(skb->dev), |
1828 | &nf_ct_zone_dflt, | 1816 | &nf_ct_zone_dflt, |
1829 | &srctuple); | 1817 | &srctuple); |
1830 | if (!hash) | 1818 | if (!hash) |
1831 | return false; | 1819 | return false; |
1832 | 1820 | ||
1833 | ct = nf_ct_tuplehash_to_ctrack(hash); | 1821 | ct = nf_ct_tuplehash_to_ctrack(hash); |
1834 | src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); | 1822 | src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); |
1835 | memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); | 1823 | memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); |
1836 | nf_ct_put(ct); | 1824 | nf_ct_put(ct); |
1837 | 1825 | ||
1838 | return true; | 1826 | return true; |
1839 | } | 1827 | } |
1840 | 1828 | ||
1841 | /* Bring out ya dead! */ | 1829 | /* Bring out ya dead! */ |
1842 | static struct nf_conn * | 1830 | static struct nf_conn * |
1843 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), | 1831 | get_next_corpse(int (*iter)(struct nf_conn *i, void *data), |
1844 | void *data, unsigned int *bucket) | 1832 | void *data, unsigned int *bucket) |
1845 | { | 1833 | { |
1846 | struct nf_conntrack_tuple_hash *h; | 1834 | struct nf_conntrack_tuple_hash *h; |
1847 | struct nf_conn *ct; | 1835 | struct nf_conn *ct; |
1848 | struct hlist_nulls_node *n; | 1836 | struct hlist_nulls_node *n; |
1849 | spinlock_t *lockp; | 1837 | spinlock_t *lockp; |
1850 | 1838 | ||
1851 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { | 1839 | for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { |
1852 | lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; | 1840 | lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; |
1853 | local_bh_disable(); | 1841 | local_bh_disable(); |
1854 | nf_conntrack_lock(lockp); | 1842 | nf_conntrack_lock(lockp); |
1855 | if (*bucket < nf_conntrack_htable_size) { | 1843 | if (*bucket < nf_conntrack_htable_size) { |
1856 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { | 1844 | hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { |
1857 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) | 1845 | if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) |
1858 | continue; | 1846 | continue; |
1859 | ct = nf_ct_tuplehash_to_ctrack(h); | 1847 | ct = nf_ct_tuplehash_to_ctrack(h); |
1860 | if (iter(ct, data)) | 1848 | if (iter(ct, data)) |
1861 | goto found; | 1849 | goto found; |
1862 | } | 1850 | } |
1863 | } | 1851 | } |
1864 | spin_unlock(lockp); | 1852 | spin_unlock(lockp); |
1865 | local_bh_enable(); | 1853 | local_bh_enable(); |
1866 | cond_resched(); | 1854 | cond_resched(); |
1867 | } | 1855 | } |
1868 | 1856 | ||
1869 | return NULL; | 1857 | return NULL; |
1870 | found: | 1858 | found: |
1871 | atomic_inc(&ct->ct_general.use); | 1859 | atomic_inc(&ct->ct_general.use); |
1872 | spin_unlock(lockp); | 1860 | spin_unlock(lockp); |
1873 | local_bh_enable(); | 1861 | local_bh_enable(); |
1874 | return ct; | 1862 | return ct; |
1875 | } | 1863 | } |
1876 | 1864 | ||
1877 | static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), | 1865 | static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), |
1878 | void *data, u32 portid, int report) | 1866 | void *data, u32 portid, int report) |
1879 | { | 1867 | { |
1880 | unsigned int bucket = 0, sequence; | 1868 | unsigned int bucket = 0, sequence; |
1881 | struct nf_conn *ct; | 1869 | struct nf_conn *ct; |
1882 | 1870 | ||
1883 | might_sleep(); | 1871 | might_sleep(); |
1884 | 1872 | ||
1885 | for (;;) { | 1873 | for (;;) { |
1886 | sequence = read_seqcount_begin(&nf_conntrack_generation); | 1874 | sequence = read_seqcount_begin(&nf_conntrack_generation); |
1887 | 1875 | ||
1888 | while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { | 1876 | while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { |
1889 | /* Time to push up daises... */ | 1877 | /* Time to push up daises... */ |
1890 | 1878 | ||
1891 | nf_ct_delete(ct, portid, report); | 1879 | nf_ct_delete(ct, portid, report); |
1892 | nf_ct_put(ct); | 1880 | nf_ct_put(ct); |
1893 | cond_resched(); | 1881 | cond_resched(); |
1894 | } | 1882 | } |
1895 | 1883 | ||
1896 | if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) | 1884 | if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) |
1897 | break; | 1885 | break; |
1898 | bucket = 0; | 1886 | bucket = 0; |
1899 | } | 1887 | } |
1900 | } | 1888 | } |
1901 | 1889 | ||
1902 | struct iter_data { | 1890 | struct iter_data { |
1903 | int (*iter)(struct nf_conn *i, void *data); | 1891 | int (*iter)(struct nf_conn *i, void *data); |
1904 | void *data; | 1892 | void *data; |
1905 | struct net *net; | 1893 | struct net *net; |
1906 | }; | 1894 | }; |
1907 | 1895 | ||
1908 | static int iter_net_only(struct nf_conn *i, void *data) | 1896 | static int iter_net_only(struct nf_conn *i, void *data) |
1909 | { | 1897 | { |
1910 | struct iter_data *d = data; | 1898 | struct iter_data *d = data; |
1911 | 1899 | ||
1912 | if (!net_eq(d->net, nf_ct_net(i))) | 1900 | if (!net_eq(d->net, nf_ct_net(i))) |
1913 | return 0; | 1901 | return 0; |
1914 | 1902 | ||
1915 | return d->iter(i, d->data); | 1903 | return d->iter(i, d->data); |
1916 | } | 1904 | } |
1917 | 1905 | ||
1918 | static void | 1906 | static void |
1919 | __nf_ct_unconfirmed_destroy(struct net *net) | 1907 | __nf_ct_unconfirmed_destroy(struct net *net) |
1920 | { | 1908 | { |
1921 | int cpu; | 1909 | int cpu; |
1922 | 1910 | ||
1923 | for_each_possible_cpu(cpu) { | 1911 | for_each_possible_cpu(cpu) { |
1924 | struct nf_conntrack_tuple_hash *h; | 1912 | struct nf_conntrack_tuple_hash *h; |
1925 | struct hlist_nulls_node *n; | 1913 | struct hlist_nulls_node *n; |
1926 | struct ct_pcpu *pcpu; | 1914 | struct ct_pcpu *pcpu; |
1927 | 1915 | ||
1928 | pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); | 1916 | pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); |
1929 | 1917 | ||
1930 | spin_lock_bh(&pcpu->lock); | 1918 | spin_lock_bh(&pcpu->lock); |
1931 | hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { | 1919 | hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { |
1932 | struct nf_conn *ct; | 1920 | struct nf_conn *ct; |
1933 | 1921 | ||
1934 | ct = nf_ct_tuplehash_to_ctrack(h); | 1922 | ct = nf_ct_tuplehash_to_ctrack(h); |
1935 | 1923 | ||
1936 | /* we cannot call iter() on unconfirmed list, the | 1924 | /* we cannot call iter() on unconfirmed list, the |
1937 | * owning cpu can reallocate ct->ext at any time. | 1925 | * owning cpu can reallocate ct->ext at any time. |
1938 | */ | 1926 | */ |
1939 | set_bit(IPS_DYING_BIT, &ct->status); | 1927 | set_bit(IPS_DYING_BIT, &ct->status); |
1940 | } | 1928 | } |
1941 | spin_unlock_bh(&pcpu->lock); | 1929 | spin_unlock_bh(&pcpu->lock); |
1942 | cond_resched(); | 1930 | cond_resched(); |
1943 | } | 1931 | } |
1944 | } | 1932 | } |
1945 | 1933 | ||
1946 | void nf_ct_unconfirmed_destroy(struct net *net) | 1934 | void nf_ct_unconfirmed_destroy(struct net *net) |
1947 | { | 1935 | { |
1948 | might_sleep(); | 1936 | might_sleep(); |
1949 | 1937 | ||
1950 | if (atomic_read(&net->ct.count) > 0) { | 1938 | if (atomic_read(&net->ct.count) > 0) { |
1951 | __nf_ct_unconfirmed_destroy(net); | 1939 | __nf_ct_unconfirmed_destroy(net); |
1952 | nf_queue_nf_hook_drop(net); | 1940 | nf_queue_nf_hook_drop(net); |
1953 | synchronize_net(); | 1941 | synchronize_net(); |
1954 | } | 1942 | } |
1955 | } | 1943 | } |
1956 | EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); | 1944 | EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); |
1957 | 1945 | ||
1958 | void nf_ct_iterate_cleanup_net(struct net *net, | 1946 | void nf_ct_iterate_cleanup_net(struct net *net, |
1959 | int (*iter)(struct nf_conn *i, void *data), | 1947 | int (*iter)(struct nf_conn *i, void *data), |
1960 | void *data, u32 portid, int report) | 1948 | void *data, u32 portid, int report) |
1961 | { | 1949 | { |
1962 | struct iter_data d; | 1950 | struct iter_data d; |
1963 | 1951 | ||
1964 | might_sleep(); | 1952 | might_sleep(); |
1965 | 1953 | ||
1966 | if (atomic_read(&net->ct.count) == 0) | 1954 | if (atomic_read(&net->ct.count) == 0) |
1967 | return; | 1955 | return; |
1968 | 1956 | ||
1969 | d.iter = iter; | 1957 | d.iter = iter; |
1970 | d.data = data; | 1958 | d.data = data; |
1971 | d.net = net; | 1959 | d.net = net; |
1972 | 1960 | ||
1973 | nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); | 1961 | nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); |
1974 | } | 1962 | } |
1975 | EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); | 1963 | EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); |
1976 | 1964 | ||
1977 | /** | 1965 | /** |
1978 | * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table | 1966 | * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table |
1979 | * @iter: callback to invoke for each conntrack | 1967 | * @iter: callback to invoke for each conntrack |
1980 | * @data: data to pass to @iter | 1968 | * @data: data to pass to @iter |
1981 | * | 1969 | * |
1982 | * Like nf_ct_iterate_cleanup, but first marks conntracks on the | 1970 | * Like nf_ct_iterate_cleanup, but first marks conntracks on the |
1983 | * unconfirmed list as dying (so they will not be inserted into | 1971 | * unconfirmed list as dying (so they will not be inserted into |
1984 | * main table). | 1972 | * main table). |
1985 | * | 1973 | * |
1986 | * Can only be called in module exit path. | 1974 | * Can only be called in module exit path. |
1987 | */ | 1975 | */ |
1988 | void | 1976 | void |
1989 | nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) | 1977 | nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) |
1990 | { | 1978 | { |
1991 | struct net *net; | 1979 | struct net *net; |
1992 | 1980 | ||
1993 | down_read(&net_rwsem); | 1981 | down_read(&net_rwsem); |
1994 | for_each_net(net) { | 1982 | for_each_net(net) { |
1995 | if (atomic_read(&net->ct.count) == 0) | 1983 | if (atomic_read(&net->ct.count) == 0) |
1996 | continue; | 1984 | continue; |
1997 | __nf_ct_unconfirmed_destroy(net); | 1985 | __nf_ct_unconfirmed_destroy(net); |
1998 | nf_queue_nf_hook_drop(net); | 1986 | nf_queue_nf_hook_drop(net); |
1999 | } | 1987 | } |
2000 | up_read(&net_rwsem); | 1988 | up_read(&net_rwsem); |
2001 | 1989 | ||
2002 | /* Need to wait for netns cleanup worker to finish, if its | 1990 | /* Need to wait for netns cleanup worker to finish, if its |
2003 | * running -- it might have deleted a net namespace from | 1991 | * running -- it might have deleted a net namespace from |
2004 | * the global list, so our __nf_ct_unconfirmed_destroy() might | 1992 | * the global list, so our __nf_ct_unconfirmed_destroy() might |
2005 | * not have affected all namespaces. | 1993 | * not have affected all namespaces. |
2006 | */ | 1994 | */ |
2007 | net_ns_barrier(); | 1995 | net_ns_barrier(); |
2008 | 1996 | ||
2009 | /* a conntrack could have been unlinked from unconfirmed list | 1997 | /* a conntrack could have been unlinked from unconfirmed list |
2010 | * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). | 1998 | * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). |
2011 | * This makes sure its inserted into conntrack table. | 1999 | * This makes sure its inserted into conntrack table. |
2012 | */ | 2000 | */ |
2013 | synchronize_net(); | 2001 | synchronize_net(); |
2014 | 2002 | ||
2015 | nf_ct_iterate_cleanup(iter, data, 0, 0); | 2003 | nf_ct_iterate_cleanup(iter, data, 0, 0); |
2016 | } | 2004 | } |
2017 | EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); | 2005 | EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); |
2018 | 2006 | ||
2019 | static int kill_all(struct nf_conn *i, void *data) | 2007 | static int kill_all(struct nf_conn *i, void *data) |
2020 | { | 2008 | { |
2021 | return net_eq(nf_ct_net(i), data); | 2009 | return net_eq(nf_ct_net(i), data); |
2022 | } | 2010 | } |
2023 | 2011 | ||
2024 | void nf_ct_free_hashtable(void *hash, unsigned int size) | 2012 | void nf_ct_free_hashtable(void *hash, unsigned int size) |
2025 | { | 2013 | { |
2026 | if (is_vmalloc_addr(hash)) | 2014 | if (is_vmalloc_addr(hash)) |
2027 | vfree(hash); | 2015 | vfree(hash); |
2028 | else | 2016 | else |
2029 | free_pages((unsigned long)hash, | 2017 | free_pages((unsigned long)hash, |
2030 | get_order(sizeof(struct hlist_head) * size)); | 2018 | get_order(sizeof(struct hlist_head) * size)); |
2031 | } | 2019 | } |
2032 | EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); | 2020 | EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); |
2033 | 2021 | ||
2034 | void nf_conntrack_cleanup_start(void) | 2022 | void nf_conntrack_cleanup_start(void) |
2035 | { | 2023 | { |
2036 | conntrack_gc_work.exiting = true; | 2024 | conntrack_gc_work.exiting = true; |
2037 | RCU_INIT_POINTER(ip_ct_attach, NULL); | 2025 | RCU_INIT_POINTER(ip_ct_attach, NULL); |
2038 | } | 2026 | } |
2039 | 2027 | ||
2040 | void nf_conntrack_cleanup_end(void) | 2028 | void nf_conntrack_cleanup_end(void) |
2041 | { | 2029 | { |
2042 | RCU_INIT_POINTER(nf_ct_hook, NULL); | 2030 | RCU_INIT_POINTER(nf_ct_hook, NULL); |
2043 | cancel_delayed_work_sync(&conntrack_gc_work.dwork); | 2031 | cancel_delayed_work_sync(&conntrack_gc_work.dwork); |
2044 | nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); | 2032 | nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); |
2045 | 2033 | ||
2046 | nf_conntrack_proto_fini(); | 2034 | nf_conntrack_proto_fini(); |
2047 | nf_conntrack_seqadj_fini(); | 2035 | nf_conntrack_seqadj_fini(); |
2048 | nf_conntrack_labels_fini(); | 2036 | nf_conntrack_labels_fini(); |
2049 | nf_conntrack_helper_fini(); | 2037 | nf_conntrack_helper_fini(); |
2050 | nf_conntrack_timeout_fini(); | 2038 | nf_conntrack_timeout_fini(); |
2051 | nf_conntrack_ecache_fini(); | 2039 | nf_conntrack_ecache_fini(); |
2052 | nf_conntrack_tstamp_fini(); | 2040 | nf_conntrack_tstamp_fini(); |
2053 | nf_conntrack_acct_fini(); | 2041 | nf_conntrack_acct_fini(); |
2054 | nf_conntrack_expect_fini(); | 2042 | nf_conntrack_expect_fini(); |
2055 | 2043 | ||
2056 | kmem_cache_destroy(nf_conntrack_cachep); | 2044 | kmem_cache_destroy(nf_conntrack_cachep); |
2057 | } | 2045 | } |
2058 | 2046 | ||
2059 | /* | 2047 | /* |
2060 | * Mishearing the voices in his head, our hero wonders how he's | 2048 | * Mishearing the voices in his head, our hero wonders how he's |
2061 | * supposed to kill the mall. | 2049 | * supposed to kill the mall. |
2062 | */ | 2050 | */ |
2063 | void nf_conntrack_cleanup_net(struct net *net) | 2051 | void nf_conntrack_cleanup_net(struct net *net) |
2064 | { | 2052 | { |
2065 | LIST_HEAD(single); | 2053 | LIST_HEAD(single); |
2066 | 2054 | ||
2067 | list_add(&net->exit_list, &single); | 2055 | list_add(&net->exit_list, &single); |
2068 | nf_conntrack_cleanup_net_list(&single); | 2056 | nf_conntrack_cleanup_net_list(&single); |
2069 | } | 2057 | } |
2070 | 2058 | ||
2071 | void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) | 2059 | void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) |
2072 | { | 2060 | { |
2073 | int busy; | 2061 | int busy; |
2074 | struct net *net; | 2062 | struct net *net; |
2075 | 2063 | ||
2076 | /* | 2064 | /* |
2077 | * This makes sure all current packets have passed through | 2065 | * This makes sure all current packets have passed through |
2078 | * netfilter framework. Roll on, two-stage module | 2066 | * netfilter framework. Roll on, two-stage module |
2079 | * delete... | 2067 | * delete... |
2080 | */ | 2068 | */ |
2081 | synchronize_net(); | 2069 | synchronize_net(); |
2082 | i_see_dead_people: | 2070 | i_see_dead_people: |
2083 | busy = 0; | 2071 | busy = 0; |
2084 | list_for_each_entry(net, net_exit_list, exit_list) { | 2072 | list_for_each_entry(net, net_exit_list, exit_list) { |
2085 | nf_ct_iterate_cleanup(kill_all, net, 0, 0); | 2073 | nf_ct_iterate_cleanup(kill_all, net, 0, 0); |
2086 | if (atomic_read(&net->ct.count) != 0) | 2074 | if (atomic_read(&net->ct.count) != 0) |
2087 | busy = 1; | 2075 | busy = 1; |
2088 | } | 2076 | } |
2089 | if (busy) { | 2077 | if (busy) { |
2090 | schedule(); | 2078 | schedule(); |
2091 | goto i_see_dead_people; | 2079 | goto i_see_dead_people; |
2092 | } | 2080 | } |
2093 | 2081 | ||
2094 | list_for_each_entry(net, net_exit_list, exit_list) { | 2082 | list_for_each_entry(net, net_exit_list, exit_list) { |
2095 | nf_conntrack_proto_pernet_fini(net); | 2083 | nf_conntrack_proto_pernet_fini(net); |
2096 | nf_conntrack_helper_pernet_fini(net); | 2084 | nf_conntrack_helper_pernet_fini(net); |
2097 | nf_conntrack_ecache_pernet_fini(net); | 2085 | nf_conntrack_ecache_pernet_fini(net); |
2098 | nf_conntrack_tstamp_pernet_fini(net); | 2086 | nf_conntrack_tstamp_pernet_fini(net); |
2099 | nf_conntrack_acct_pernet_fini(net); | 2087 | nf_conntrack_acct_pernet_fini(net); |
2100 | nf_conntrack_expect_pernet_fini(net); | 2088 | nf_conntrack_expect_pernet_fini(net); |
2101 | free_percpu(net->ct.stat); | 2089 | free_percpu(net->ct.stat); |
2102 | free_percpu(net->ct.pcpu_lists); | 2090 | free_percpu(net->ct.pcpu_lists); |
2103 | } | 2091 | } |
2104 | } | 2092 | } |
2105 | 2093 | ||
2106 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) | 2094 | void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) |
2107 | { | 2095 | { |
2108 | struct hlist_nulls_head *hash; | 2096 | struct hlist_nulls_head *hash; |
2109 | unsigned int nr_slots, i; | 2097 | unsigned int nr_slots, i; |
2110 | size_t sz; | 2098 | size_t sz; |
2111 | 2099 | ||
2112 | if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) | 2100 | if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) |
2113 | return NULL; | 2101 | return NULL; |
2114 | 2102 | ||
2115 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); | 2103 | BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); |
2116 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); | 2104 | nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); |
2117 | 2105 | ||
2118 | if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head))) | 2106 | if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head))) |
2119 | return NULL; | 2107 | return NULL; |
2120 | 2108 | ||
2121 | sz = nr_slots * sizeof(struct hlist_nulls_head); | 2109 | sz = nr_slots * sizeof(struct hlist_nulls_head); |
2122 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, | 2110 | hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, |
2123 | get_order(sz)); | 2111 | get_order(sz)); |
2124 | if (!hash) | 2112 | if (!hash) |
2125 | hash = vzalloc(sz); | 2113 | hash = vzalloc(sz); |
2126 | 2114 | ||
2127 | if (hash && nulls) | 2115 | if (hash && nulls) |
2128 | for (i = 0; i < nr_slots; i++) | 2116 | for (i = 0; i < nr_slots; i++) |
2129 | INIT_HLIST_NULLS_HEAD(&hash[i], i); | 2117 | INIT_HLIST_NULLS_HEAD(&hash[i], i); |
2130 | 2118 | ||
2131 | return hash; | 2119 | return hash; |
2132 | } | 2120 | } |
2133 | EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); | 2121 | EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); |
2134 | 2122 | ||
2135 | int nf_conntrack_hash_resize(unsigned int hashsize) | 2123 | int nf_conntrack_hash_resize(unsigned int hashsize) |
2136 | { | 2124 | { |
2137 | int i, bucket; | 2125 | int i, bucket; |
2138 | unsigned int old_size; | 2126 | unsigned int old_size; |
2139 | struct hlist_nulls_head *hash, *old_hash; | 2127 | struct hlist_nulls_head *hash, *old_hash; |
2140 | struct nf_conntrack_tuple_hash *h; | 2128 | struct nf_conntrack_tuple_hash *h; |
2141 | struct nf_conn *ct; | 2129 | struct nf_conn *ct; |
2142 | 2130 | ||
2143 | if (!hashsize) | 2131 | if (!hashsize) |
2144 | return -EINVAL; | 2132 | return -EINVAL; |
2145 | 2133 | ||
2146 | hash = nf_ct_alloc_hashtable(&hashsize, 1); | 2134 | hash = nf_ct_alloc_hashtable(&hashsize, 1); |
2147 | if (!hash) | 2135 | if (!hash) |
2148 | return -ENOMEM; | 2136 | return -ENOMEM; |
2149 | 2137 | ||
2150 | old_size = nf_conntrack_htable_size; | 2138 | old_size = nf_conntrack_htable_size; |
2151 | if (old_size == hashsize) { | 2139 | if (old_size == hashsize) { |
2152 | nf_ct_free_hashtable(hash, hashsize); | 2140 | nf_ct_free_hashtable(hash, hashsize); |
2153 | return 0; | 2141 | return 0; |
2154 | } | 2142 | } |
2155 | 2143 | ||
2156 | local_bh_disable(); | 2144 | local_bh_disable(); |
2157 | nf_conntrack_all_lock(); | 2145 | nf_conntrack_all_lock(); |
2158 | write_seqcount_begin(&nf_conntrack_generation); | 2146 | write_seqcount_begin(&nf_conntrack_generation); |
2159 | 2147 | ||
2160 | /* Lookups in the old hash might happen in parallel, which means we | 2148 | /* Lookups in the old hash might happen in parallel, which means we |
2161 | * might get false negatives during connection lookup. New connections | 2149 | * might get false negatives during connection lookup. New connections |
2162 | * created because of a false negative won't make it into the hash | 2150 | * created because of a false negative won't make it into the hash |
2163 | * though since that required taking the locks. | 2151 | * though since that required taking the locks. |
2164 | */ | 2152 | */ |
2165 | 2153 | ||
2166 | for (i = 0; i < nf_conntrack_htable_size; i++) { | 2154 | for (i = 0; i < nf_conntrack_htable_size; i++) { |
2167 | while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { | 2155 | while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { |
2168 | h = hlist_nulls_entry(nf_conntrack_hash[i].first, | 2156 | h = hlist_nulls_entry(nf_conntrack_hash[i].first, |
2169 | struct nf_conntrack_tuple_hash, hnnode); | 2157 | struct nf_conntrack_tuple_hash, hnnode); |
2170 | ct = nf_ct_tuplehash_to_ctrack(h); | 2158 | ct = nf_ct_tuplehash_to_ctrack(h); |
2171 | hlist_nulls_del_rcu(&h->hnnode); | 2159 | hlist_nulls_del_rcu(&h->hnnode); |
2172 | bucket = __hash_conntrack(nf_ct_net(ct), | 2160 | bucket = __hash_conntrack(nf_ct_net(ct), |
2173 | &h->tuple, hashsize); | 2161 | &h->tuple, hashsize); |
2174 | hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); | 2162 | hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); |
2175 | } | 2163 | } |
2176 | } | 2164 | } |
2177 | old_size = nf_conntrack_htable_size; | 2165 | old_size = nf_conntrack_htable_size; |
2178 | old_hash = nf_conntrack_hash; | 2166 | old_hash = nf_conntrack_hash; |
2179 | 2167 | ||
2180 | nf_conntrack_hash = hash; | 2168 | nf_conntrack_hash = hash; |
2181 | nf_conntrack_htable_size = hashsize; | 2169 | nf_conntrack_htable_size = hashsize; |
2182 | 2170 | ||
2183 | write_seqcount_end(&nf_conntrack_generation); | 2171 | write_seqcount_end(&nf_conntrack_generation); |
2184 | nf_conntrack_all_unlock(); | 2172 | nf_conntrack_all_unlock(); |
2185 | local_bh_enable(); | 2173 | local_bh_enable(); |
2186 | 2174 | ||
2187 | synchronize_net(); | 2175 | synchronize_net(); |
2188 | nf_ct_free_hashtable(old_hash, old_size); | 2176 | nf_ct_free_hashtable(old_hash, old_size); |
2189 | return 0; | 2177 | return 0; |
2190 | } | 2178 | } |
2191 | 2179 | ||
2192 | int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) | 2180 | int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) |
2193 | { | 2181 | { |
2194 | unsigned int hashsize; | 2182 | unsigned int hashsize; |
2195 | int rc; | 2183 | int rc; |
2196 | 2184 | ||
2197 | if (current->nsproxy->net_ns != &init_net) | 2185 | if (current->nsproxy->net_ns != &init_net) |
2198 | return -EOPNOTSUPP; | 2186 | return -EOPNOTSUPP; |
2199 | 2187 | ||
2200 | /* On boot, we can set this without any fancy locking. */ | 2188 | /* On boot, we can set this without any fancy locking. */ |
2201 | if (!nf_conntrack_htable_size) | 2189 | if (!nf_conntrack_htable_size) |
2202 | return param_set_uint(val, kp); | 2190 | return param_set_uint(val, kp); |
2203 | 2191 | ||
2204 | rc = kstrtouint(val, 0, &hashsize); | 2192 | rc = kstrtouint(val, 0, &hashsize); |
2205 | if (rc) | 2193 | if (rc) |
2206 | return rc; | 2194 | return rc; |
2207 | 2195 | ||
2208 | return nf_conntrack_hash_resize(hashsize); | 2196 | return nf_conntrack_hash_resize(hashsize); |
2209 | } | 2197 | } |
2210 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); | 2198 | EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); |
2211 | 2199 | ||
2212 | module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, | 2200 | module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, |
2213 | &nf_conntrack_htable_size, 0600); | 2201 | &nf_conntrack_htable_size, 0600); |
2214 | 2202 | ||
2215 | static __always_inline unsigned int total_extension_size(void) | 2203 | static __always_inline unsigned int total_extension_size(void) |
2216 | { | 2204 | { |
2217 | /* remember to add new extensions below */ | 2205 | /* remember to add new extensions below */ |
2218 | BUILD_BUG_ON(NF_CT_EXT_NUM > 9); | 2206 | BUILD_BUG_ON(NF_CT_EXT_NUM > 9); |
2219 | 2207 | ||
2220 | return sizeof(struct nf_ct_ext) + | 2208 | return sizeof(struct nf_ct_ext) + |
2221 | sizeof(struct nf_conn_help) | 2209 | sizeof(struct nf_conn_help) |
2222 | #if IS_ENABLED(CONFIG_NF_NAT) | 2210 | #if IS_ENABLED(CONFIG_NF_NAT) |
2223 | + sizeof(struct nf_conn_nat) | 2211 | + sizeof(struct nf_conn_nat) |
2224 | #endif | 2212 | #endif |
2225 | + sizeof(struct nf_conn_seqadj) | 2213 | + sizeof(struct nf_conn_seqadj) |
2226 | + sizeof(struct nf_conn_acct) | 2214 | + sizeof(struct nf_conn_acct) |
2227 | #ifdef CONFIG_NF_CONNTRACK_EVENTS | 2215 | #ifdef CONFIG_NF_CONNTRACK_EVENTS |
2228 | + sizeof(struct nf_conntrack_ecache) | 2216 | + sizeof(struct nf_conntrack_ecache) |
2229 | #endif | 2217 | #endif |
2230 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP | 2218 | #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP |
2231 | + sizeof(struct nf_conn_tstamp) | 2219 | + sizeof(struct nf_conn_tstamp) |
2232 | #endif | 2220 | #endif |
2233 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 2221 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
2234 | + sizeof(struct nf_conn_timeout) | 2222 | + sizeof(struct nf_conn_timeout) |
2235 | #endif | 2223 | #endif |
2236 | #ifdef CONFIG_NF_CONNTRACK_LABELS | 2224 | #ifdef CONFIG_NF_CONNTRACK_LABELS |
2237 | + sizeof(struct nf_conn_labels) | 2225 | + sizeof(struct nf_conn_labels) |
2238 | #endif | 2226 | #endif |
2239 | #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) | 2227 | #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) |
2240 | + sizeof(struct nf_conn_synproxy) | 2228 | + sizeof(struct nf_conn_synproxy) |
2241 | #endif | 2229 | #endif |
2242 | ; | 2230 | ; |
2243 | }; | 2231 | }; |
2244 | 2232 | ||
2245 | int nf_conntrack_init_start(void) | 2233 | int nf_conntrack_init_start(void) |
2246 | { | 2234 | { |
2247 | int max_factor = 8; | 2235 | int max_factor = 8; |
2248 | int ret = -ENOMEM; | 2236 | int ret = -ENOMEM; |
2249 | int i; | 2237 | int i; |
2250 | 2238 | ||
2251 | /* struct nf_ct_ext uses u8 to store offsets/size */ | 2239 | /* struct nf_ct_ext uses u8 to store offsets/size */ |
2252 | BUILD_BUG_ON(total_extension_size() > 255u); | 2240 | BUILD_BUG_ON(total_extension_size() > 255u); |
2253 | 2241 | ||
2254 | seqcount_init(&nf_conntrack_generation); | 2242 | seqcount_init(&nf_conntrack_generation); |
2255 | 2243 | ||
2256 | for (i = 0; i < CONNTRACK_LOCKS; i++) | 2244 | for (i = 0; i < CONNTRACK_LOCKS; i++) |
2257 | spin_lock_init(&nf_conntrack_locks[i]); | 2245 | spin_lock_init(&nf_conntrack_locks[i]); |
2258 | 2246 | ||
2259 | if (!nf_conntrack_htable_size) { | 2247 | if (!nf_conntrack_htable_size) { |
2260 | /* Idea from tcp.c: use 1/16384 of memory. | 2248 | /* Idea from tcp.c: use 1/16384 of memory. |
2261 | * On i386: 32MB machine has 512 buckets. | 2249 | * On i386: 32MB machine has 512 buckets. |
2262 | * >= 1GB machines have 16384 buckets. | 2250 | * >= 1GB machines have 16384 buckets. |
2263 | * >= 4GB machines have 65536 buckets. | 2251 | * >= 4GB machines have 65536 buckets. |
2264 | */ | 2252 | */ |
2265 | nf_conntrack_htable_size | 2253 | nf_conntrack_htable_size |
2266 | = (((totalram_pages << PAGE_SHIFT) / 16384) | 2254 | = (((totalram_pages << PAGE_SHIFT) / 16384) |
2267 | / sizeof(struct hlist_head)); | 2255 | / sizeof(struct hlist_head)); |
2268 | if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) | 2256 | if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) |
2269 | nf_conntrack_htable_size = 65536; | 2257 | nf_conntrack_htable_size = 65536; |
2270 | else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) | 2258 | else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) |
2271 | nf_conntrack_htable_size = 16384; | 2259 | nf_conntrack_htable_size = 16384; |
2272 | if (nf_conntrack_htable_size < 32) | 2260 | if (nf_conntrack_htable_size < 32) |
2273 | nf_conntrack_htable_size = 32; | 2261 | nf_conntrack_htable_size = 32; |
2274 | 2262 | ||
2275 | /* Use a max. factor of four by default to get the same max as | 2263 | /* Use a max. factor of four by default to get the same max as |
2276 | * with the old struct list_heads. When a table size is given | 2264 | * with the old struct list_heads. When a table size is given |
2277 | * we use the old value of 8 to avoid reducing the max. | 2265 | * we use the old value of 8 to avoid reducing the max. |
2278 | * entries. */ | 2266 | * entries. */ |
2279 | max_factor = 4; | 2267 | max_factor = 4; |
2280 | } | 2268 | } |
2281 | 2269 | ||
2282 | nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); | 2270 | nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); |
2283 | if (!nf_conntrack_hash) | 2271 | if (!nf_conntrack_hash) |
2284 | return -ENOMEM; | 2272 | return -ENOMEM; |
2285 | 2273 | ||
2286 | nf_conntrack_max = max_factor * nf_conntrack_htable_size; | 2274 | nf_conntrack_max = max_factor * nf_conntrack_htable_size; |
2287 | 2275 | ||
2288 | nf_conntrack_cachep = kmem_cache_create("nf_conntrack", | 2276 | nf_conntrack_cachep = kmem_cache_create("nf_conntrack", |
2289 | sizeof(struct nf_conn), | 2277 | sizeof(struct nf_conn), |
2290 | NFCT_INFOMASK + 1, | 2278 | NFCT_INFOMASK + 1, |
2291 | SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); | 2279 | SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); |
2292 | if (!nf_conntrack_cachep) | 2280 | if (!nf_conntrack_cachep) |
2293 | goto err_cachep; | 2281 | goto err_cachep; |
2294 | 2282 | ||
2295 | ret = nf_conntrack_expect_init(); | 2283 | ret = nf_conntrack_expect_init(); |
2296 | if (ret < 0) | 2284 | if (ret < 0) |
2297 | goto err_expect; | 2285 | goto err_expect; |
2298 | 2286 | ||
2299 | ret = nf_conntrack_acct_init(); | 2287 | ret = nf_conntrack_acct_init(); |
2300 | if (ret < 0) | 2288 | if (ret < 0) |
2301 | goto err_acct; | 2289 | goto err_acct; |
2302 | 2290 | ||
2303 | ret = nf_conntrack_tstamp_init(); | 2291 | ret = nf_conntrack_tstamp_init(); |
2304 | if (ret < 0) | 2292 | if (ret < 0) |
2305 | goto err_tstamp; | 2293 | goto err_tstamp; |
2306 | 2294 | ||
2307 | ret = nf_conntrack_ecache_init(); | 2295 | ret = nf_conntrack_ecache_init(); |
2308 | if (ret < 0) | 2296 | if (ret < 0) |
2309 | goto err_ecache; | 2297 | goto err_ecache; |
2310 | 2298 | ||
2311 | ret = nf_conntrack_timeout_init(); | 2299 | ret = nf_conntrack_timeout_init(); |
2312 | if (ret < 0) | 2300 | if (ret < 0) |
2313 | goto err_timeout; | 2301 | goto err_timeout; |
2314 | 2302 | ||
2315 | ret = nf_conntrack_helper_init(); | 2303 | ret = nf_conntrack_helper_init(); |
2316 | if (ret < 0) | 2304 | if (ret < 0) |
2317 | goto err_helper; | 2305 | goto err_helper; |
2318 | 2306 | ||
2319 | ret = nf_conntrack_labels_init(); | 2307 | ret = nf_conntrack_labels_init(); |
2320 | if (ret < 0) | 2308 | if (ret < 0) |
2321 | goto err_labels; | 2309 | goto err_labels; |
2322 | 2310 | ||
2323 | ret = nf_conntrack_seqadj_init(); | 2311 | ret = nf_conntrack_seqadj_init(); |
2324 | if (ret < 0) | 2312 | if (ret < 0) |
2325 | goto err_seqadj; | 2313 | goto err_seqadj; |
2326 | 2314 | ||
2327 | ret = nf_conntrack_proto_init(); | 2315 | ret = nf_conntrack_proto_init(); |
2328 | if (ret < 0) | 2316 | if (ret < 0) |
2329 | goto err_proto; | 2317 | goto err_proto; |
2330 | 2318 | ||
2331 | conntrack_gc_work_init(&conntrack_gc_work); | 2319 | conntrack_gc_work_init(&conntrack_gc_work); |
2332 | queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); | 2320 | queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); |
2333 | 2321 | ||
2334 | return 0; | 2322 | return 0; |
2335 | 2323 | ||
2336 | err_proto: | 2324 | err_proto: |
2337 | nf_conntrack_seqadj_fini(); | 2325 | nf_conntrack_seqadj_fini(); |
2338 | err_seqadj: | 2326 | err_seqadj: |
2339 | nf_conntrack_labels_fini(); | 2327 | nf_conntrack_labels_fini(); |
2340 | err_labels: | 2328 | err_labels: |
2341 | nf_conntrack_helper_fini(); | 2329 | nf_conntrack_helper_fini(); |
2342 | err_helper: | 2330 | err_helper: |
2343 | nf_conntrack_timeout_fini(); | 2331 | nf_conntrack_timeout_fini(); |
2344 | err_timeout: | 2332 | err_timeout: |
2345 | nf_conntrack_ecache_fini(); | 2333 | nf_conntrack_ecache_fini(); |
2346 | err_ecache: | 2334 | err_ecache: |
2347 | nf_conntrack_tstamp_fini(); | 2335 | nf_conntrack_tstamp_fini(); |
2348 | err_tstamp: | 2336 | err_tstamp: |
2349 | nf_conntrack_acct_fini(); | 2337 | nf_conntrack_acct_fini(); |
2350 | err_acct: | 2338 | err_acct: |
2351 | nf_conntrack_expect_fini(); | 2339 | nf_conntrack_expect_fini(); |
2352 | err_expect: | 2340 | err_expect: |
2353 | kmem_cache_destroy(nf_conntrack_cachep); | 2341 | kmem_cache_destroy(nf_conntrack_cachep); |
2354 | err_cachep: | 2342 | err_cachep: |
2355 | nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); | 2343 | nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); |
2356 | return ret; | 2344 | return ret; |
2357 | } | 2345 | } |
2358 | 2346 | ||
2359 | static struct nf_ct_hook nf_conntrack_hook = { | 2347 | static struct nf_ct_hook nf_conntrack_hook = { |
2360 | .update = nf_conntrack_update, | 2348 | .update = nf_conntrack_update, |
2361 | .destroy = destroy_conntrack, | 2349 | .destroy = destroy_conntrack, |
2362 | .get_tuple_skb = nf_conntrack_get_tuple_skb, | 2350 | .get_tuple_skb = nf_conntrack_get_tuple_skb, |
2363 | }; | 2351 | }; |
2364 | 2352 | ||
2365 | void nf_conntrack_init_end(void) | 2353 | void nf_conntrack_init_end(void) |
2366 | { | 2354 | { |
2367 | /* For use by REJECT target */ | 2355 | /* For use by REJECT target */ |
2368 | RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); | 2356 | RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); |
2369 | RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); | 2357 | RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); |
2370 | } | 2358 | } |
2371 | 2359 | ||
2372 | /* | 2360 | /* |
2373 | * We need to use special "null" values, not used in hash table | 2361 | * We need to use special "null" values, not used in hash table |
2374 | */ | 2362 | */ |
2375 | #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) | 2363 | #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) |
2376 | #define DYING_NULLS_VAL ((1<<30)+1) | 2364 | #define DYING_NULLS_VAL ((1<<30)+1) |
2377 | #define TEMPLATE_NULLS_VAL ((1<<30)+2) | 2365 | #define TEMPLATE_NULLS_VAL ((1<<30)+2) |
2378 | 2366 | ||
2379 | int nf_conntrack_init_net(struct net *net) | 2367 | int nf_conntrack_init_net(struct net *net) |
2380 | { | 2368 | { |
2381 | int ret = -ENOMEM; | 2369 | int ret = -ENOMEM; |
2382 | int cpu; | 2370 | int cpu; |
2383 | 2371 | ||
2384 | BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); | 2372 | BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); |
2385 | atomic_set(&net->ct.count, 0); | 2373 | atomic_set(&net->ct.count, 0); |
2386 | 2374 | ||
2387 | net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); | 2375 | net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); |
2388 | if (!net->ct.pcpu_lists) | 2376 | if (!net->ct.pcpu_lists) |
2389 | goto err_stat; | 2377 | goto err_stat; |
2390 | 2378 | ||
2391 | for_each_possible_cpu(cpu) { | 2379 | for_each_possible_cpu(cpu) { |
2392 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); | 2380 | struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); |
2393 | 2381 | ||
2394 | spin_lock_init(&pcpu->lock); | 2382 | spin_lock_init(&pcpu->lock); |
2395 | INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); | 2383 | INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); |
2396 | INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); | 2384 | INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); |
2397 | } | 2385 | } |
2398 | 2386 | ||
2399 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); | 2387 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); |
2400 | if (!net->ct.stat) | 2388 | if (!net->ct.stat) |
2401 | goto err_pcpu_lists; | 2389 | goto err_pcpu_lists; |
2402 | 2390 | ||
2403 | ret = nf_conntrack_expect_pernet_init(net); | 2391 | ret = nf_conntrack_expect_pernet_init(net); |
2404 | if (ret < 0) | 2392 | if (ret < 0) |
2405 | goto err_expect; | 2393 | goto err_expect; |
2406 | ret = nf_conntrack_acct_pernet_init(net); | 2394 | ret = nf_conntrack_acct_pernet_init(net); |
2407 | if (ret < 0) | 2395 | if (ret < 0) |
2408 | goto err_acct; | 2396 | goto err_acct; |
2409 | ret = nf_conntrack_tstamp_pernet_init(net); | 2397 | ret = nf_conntrack_tstamp_pernet_init(net); |
2410 | if (ret < 0) | 2398 | if (ret < 0) |
2411 | goto err_tstamp; | 2399 | goto err_tstamp; |
2412 | ret = nf_conntrack_ecache_pernet_init(net); | 2400 | ret = nf_conntrack_ecache_pernet_init(net); |
2413 | if (ret < 0) | 2401 | if (ret < 0) |
2414 | goto err_ecache; | 2402 | goto err_ecache; |
2415 | ret = nf_conntrack_helper_pernet_init(net); | 2403 | ret = nf_conntrack_helper_pernet_init(net); |
2416 | if (ret < 0) | 2404 | if (ret < 0) |
2417 | goto err_helper; | 2405 | goto err_helper; |
2418 | ret = nf_conntrack_proto_pernet_init(net); | 2406 | ret = nf_conntrack_proto_pernet_init(net); |
2419 | if (ret < 0) | 2407 | if (ret < 0) |
2420 | goto err_proto; | 2408 | goto err_proto; |
2421 | return 0; | 2409 | return 0; |
2422 | 2410 | ||
2423 | err_proto: | 2411 | err_proto: |
2424 | nf_conntrack_helper_pernet_fini(net); | 2412 | nf_conntrack_helper_pernet_fini(net); |
2425 | err_helper: | 2413 | err_helper: |
2426 | nf_conntrack_ecache_pernet_fini(net); | 2414 | nf_conntrack_ecache_pernet_fini(net); |
2427 | err_ecache: | 2415 | err_ecache: |
2428 | nf_conntrack_tstamp_pernet_fini(net); | 2416 | nf_conntrack_tstamp_pernet_fini(net); |
2429 | err_tstamp: | 2417 | err_tstamp: |
2430 | nf_conntrack_acct_pernet_fini(net); | 2418 | nf_conntrack_acct_pernet_fini(net); |
2431 | err_acct: | 2419 | err_acct: |
2432 | nf_conntrack_expect_pernet_fini(net); | 2420 | nf_conntrack_expect_pernet_fini(net); |
2433 | err_expect: | 2421 | err_expect: |
2434 | free_percpu(net->ct.stat); | 2422 | free_percpu(net->ct.stat); |
2435 | err_pcpu_lists: | 2423 | err_pcpu_lists: |
2436 | free_percpu(net->ct.pcpu_lists); | 2424 | free_percpu(net->ct.pcpu_lists); |
2437 | err_stat: | 2425 | err_stat: |
2438 | return ret; | 2426 | return ret; |
2439 | } | 2427 | } |
2440 | 2428 |
net/netfilter/nf_conntrack_proto_dccp.c
1 | /* | 1 | /* |
2 | * DCCP connection tracking protocol helper | 2 | * DCCP connection tracking protocol helper |
3 | * | 3 | * |
4 | * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> | 4 | * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | * | 9 | * |
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
15 | #include <linux/skbuff.h> | 15 | #include <linux/skbuff.h> |
16 | #include <linux/dccp.h> | 16 | #include <linux/dccp.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | 18 | ||
19 | #include <net/net_namespace.h> | 19 | #include <net/net_namespace.h> |
20 | #include <net/netns/generic.h> | 20 | #include <net/netns/generic.h> |
21 | 21 | ||
22 | #include <linux/netfilter/nfnetlink_conntrack.h> | 22 | #include <linux/netfilter/nfnetlink_conntrack.h> |
23 | #include <net/netfilter/nf_conntrack.h> | 23 | #include <net/netfilter/nf_conntrack.h> |
24 | #include <net/netfilter/nf_conntrack_l4proto.h> | 24 | #include <net/netfilter/nf_conntrack_l4proto.h> |
25 | #include <net/netfilter/nf_conntrack_ecache.h> | 25 | #include <net/netfilter/nf_conntrack_ecache.h> |
26 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
26 | #include <net/netfilter/nf_log.h> | 27 | #include <net/netfilter/nf_log.h> |
27 | 28 | ||
28 | /* Timeouts are based on values from RFC4340: | 29 | /* Timeouts are based on values from RFC4340: |
29 | * | 30 | * |
30 | * - REQUEST: | 31 | * - REQUEST: |
31 | * | 32 | * |
32 | * 8.1.2. Client Request | 33 | * 8.1.2. Client Request |
33 | * | 34 | * |
34 | * A client MAY give up on its DCCP-Requests after some time | 35 | * A client MAY give up on its DCCP-Requests after some time |
35 | * (3 minutes, for example). | 36 | * (3 minutes, for example). |
36 | * | 37 | * |
37 | * - RESPOND: | 38 | * - RESPOND: |
38 | * | 39 | * |
39 | * 8.1.3. Server Response | 40 | * 8.1.3. Server Response |
40 | * | 41 | * |
41 | * It MAY also leave the RESPOND state for CLOSED after a timeout of | 42 | * It MAY also leave the RESPOND state for CLOSED after a timeout of |
42 | * not less than 4MSL (8 minutes); | 43 | * not less than 4MSL (8 minutes); |
43 | * | 44 | * |
44 | * - PARTOPEN: | 45 | * - PARTOPEN: |
45 | * | 46 | * |
46 | * 8.1.5. Handshake Completion | 47 | * 8.1.5. Handshake Completion |
47 | * | 48 | * |
48 | * If the client remains in PARTOPEN for more than 4MSL (8 minutes), | 49 | * If the client remains in PARTOPEN for more than 4MSL (8 minutes), |
49 | * it SHOULD reset the connection with Reset Code 2, "Aborted". | 50 | * it SHOULD reset the connection with Reset Code 2, "Aborted". |
50 | * | 51 | * |
51 | * - OPEN: | 52 | * - OPEN: |
52 | * | 53 | * |
53 | * The DCCP timestamp overflows after 11.9 hours. If the connection | 54 | * The DCCP timestamp overflows after 11.9 hours. If the connection |
54 | * stays idle this long the sequence number won't be recognized | 55 | * stays idle this long the sequence number won't be recognized |
55 | * as valid anymore. | 56 | * as valid anymore. |
56 | * | 57 | * |
57 | * - CLOSEREQ/CLOSING: | 58 | * - CLOSEREQ/CLOSING: |
58 | * | 59 | * |
59 | * 8.3. Termination | 60 | * 8.3. Termination |
60 | * | 61 | * |
61 | * The retransmission timer should initially be set to go off in two | 62 | * The retransmission timer should initially be set to go off in two |
62 | * round-trip times and should back off to not less than once every | 63 | * round-trip times and should back off to not less than once every |
63 | * 64 seconds ... | 64 | * 64 seconds ... |
64 | * | 65 | * |
65 | * - TIMEWAIT: | 66 | * - TIMEWAIT: |
66 | * | 67 | * |
67 | * 4.3. States | 68 | * 4.3. States |
68 | * | 69 | * |
69 | * A server or client socket remains in this state for 2MSL (4 minutes) | 70 | * A server or client socket remains in this state for 2MSL (4 minutes) |
70 | * after the connection has been town down, ... | 71 | * after the connection has been town down, ... |
71 | */ | 72 | */ |
72 | 73 | ||
73 | #define DCCP_MSL (2 * 60 * HZ) | 74 | #define DCCP_MSL (2 * 60 * HZ) |
74 | 75 | ||
75 | static const char * const dccp_state_names[] = { | 76 | static const char * const dccp_state_names[] = { |
76 | [CT_DCCP_NONE] = "NONE", | 77 | [CT_DCCP_NONE] = "NONE", |
77 | [CT_DCCP_REQUEST] = "REQUEST", | 78 | [CT_DCCP_REQUEST] = "REQUEST", |
78 | [CT_DCCP_RESPOND] = "RESPOND", | 79 | [CT_DCCP_RESPOND] = "RESPOND", |
79 | [CT_DCCP_PARTOPEN] = "PARTOPEN", | 80 | [CT_DCCP_PARTOPEN] = "PARTOPEN", |
80 | [CT_DCCP_OPEN] = "OPEN", | 81 | [CT_DCCP_OPEN] = "OPEN", |
81 | [CT_DCCP_CLOSEREQ] = "CLOSEREQ", | 82 | [CT_DCCP_CLOSEREQ] = "CLOSEREQ", |
82 | [CT_DCCP_CLOSING] = "CLOSING", | 83 | [CT_DCCP_CLOSING] = "CLOSING", |
83 | [CT_DCCP_TIMEWAIT] = "TIMEWAIT", | 84 | [CT_DCCP_TIMEWAIT] = "TIMEWAIT", |
84 | [CT_DCCP_IGNORE] = "IGNORE", | 85 | [CT_DCCP_IGNORE] = "IGNORE", |
85 | [CT_DCCP_INVALID] = "INVALID", | 86 | [CT_DCCP_INVALID] = "INVALID", |
86 | }; | 87 | }; |
87 | 88 | ||
88 | #define sNO CT_DCCP_NONE | 89 | #define sNO CT_DCCP_NONE |
89 | #define sRQ CT_DCCP_REQUEST | 90 | #define sRQ CT_DCCP_REQUEST |
90 | #define sRS CT_DCCP_RESPOND | 91 | #define sRS CT_DCCP_RESPOND |
91 | #define sPO CT_DCCP_PARTOPEN | 92 | #define sPO CT_DCCP_PARTOPEN |
92 | #define sOP CT_DCCP_OPEN | 93 | #define sOP CT_DCCP_OPEN |
93 | #define sCR CT_DCCP_CLOSEREQ | 94 | #define sCR CT_DCCP_CLOSEREQ |
94 | #define sCG CT_DCCP_CLOSING | 95 | #define sCG CT_DCCP_CLOSING |
95 | #define sTW CT_DCCP_TIMEWAIT | 96 | #define sTW CT_DCCP_TIMEWAIT |
96 | #define sIG CT_DCCP_IGNORE | 97 | #define sIG CT_DCCP_IGNORE |
97 | #define sIV CT_DCCP_INVALID | 98 | #define sIV CT_DCCP_INVALID |
98 | 99 | ||
99 | /* | 100 | /* |
100 | * DCCP state transition table | 101 | * DCCP state transition table |
101 | * | 102 | * |
102 | * The assumption is the same as for TCP tracking: | 103 | * The assumption is the same as for TCP tracking: |
103 | * | 104 | * |
104 | * We are the man in the middle. All the packets go through us but might | 105 | * We are the man in the middle. All the packets go through us but might |
105 | * get lost in transit to the destination. It is assumed that the destination | 106 | * get lost in transit to the destination. It is assumed that the destination |
106 | * can't receive segments we haven't seen. | 107 | * can't receive segments we haven't seen. |
107 | * | 108 | * |
108 | * The following states exist: | 109 | * The following states exist: |
109 | * | 110 | * |
110 | * NONE: Initial state, expecting Request | 111 | * NONE: Initial state, expecting Request |
111 | * REQUEST: Request seen, waiting for Response from server | 112 | * REQUEST: Request seen, waiting for Response from server |
112 | * RESPOND: Response from server seen, waiting for Ack from client | 113 | * RESPOND: Response from server seen, waiting for Ack from client |
113 | * PARTOPEN: Ack after Response seen, waiting for packet other than Response, | 114 | * PARTOPEN: Ack after Response seen, waiting for packet other than Response, |
114 | * Reset or Sync from server | 115 | * Reset or Sync from server |
115 | * OPEN: Packet other than Response, Reset or Sync seen | 116 | * OPEN: Packet other than Response, Reset or Sync seen |
116 | * CLOSEREQ: CloseReq from server seen, expecting Close from client | 117 | * CLOSEREQ: CloseReq from server seen, expecting Close from client |
117 | * CLOSING: Close seen, expecting Reset | 118 | * CLOSING: Close seen, expecting Reset |
118 | * TIMEWAIT: Reset seen | 119 | * TIMEWAIT: Reset seen |
119 | * IGNORE: Not determinable whether packet is valid | 120 | * IGNORE: Not determinable whether packet is valid |
120 | * | 121 | * |
121 | * Some states exist only on one side of the connection: REQUEST, RESPOND, | 122 | * Some states exist only on one side of the connection: REQUEST, RESPOND, |
122 | * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to | 123 | * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to |
123 | * the one it was in before. | 124 | * the one it was in before. |
124 | * | 125 | * |
125 | * Packets are marked as ignored (sIG) if we don't know if they're valid | 126 | * Packets are marked as ignored (sIG) if we don't know if they're valid |
126 | * (for example a reincarnation of a connection we didn't notice is dead | 127 | * (for example a reincarnation of a connection we didn't notice is dead |
127 | * already) and the server may send back a connection closing Reset or a | 128 | * already) and the server may send back a connection closing Reset or a |
128 | * Response. They're also used for Sync/SyncAck packets, which we don't | 129 | * Response. They're also used for Sync/SyncAck packets, which we don't |
129 | * care about. | 130 | * care about. |
130 | */ | 131 | */ |
131 | static const u_int8_t | 132 | static const u_int8_t |
132 | dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { | 133 | dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { |
133 | [CT_DCCP_ROLE_CLIENT] = { | 134 | [CT_DCCP_ROLE_CLIENT] = { |
134 | [DCCP_PKT_REQUEST] = { | 135 | [DCCP_PKT_REQUEST] = { |
135 | /* | 136 | /* |
136 | * sNO -> sRQ Regular Request | 137 | * sNO -> sRQ Regular Request |
137 | * sRQ -> sRQ Retransmitted Request or reincarnation | 138 | * sRQ -> sRQ Retransmitted Request or reincarnation |
138 | * sRS -> sRS Retransmitted Request (apparently Response | 139 | * sRS -> sRS Retransmitted Request (apparently Response |
139 | * got lost after we saw it) or reincarnation | 140 | * got lost after we saw it) or reincarnation |
140 | * sPO -> sIG Ignore, conntrack might be out of sync | 141 | * sPO -> sIG Ignore, conntrack might be out of sync |
141 | * sOP -> sIG Ignore, conntrack might be out of sync | 142 | * sOP -> sIG Ignore, conntrack might be out of sync |
142 | * sCR -> sIG Ignore, conntrack might be out of sync | 143 | * sCR -> sIG Ignore, conntrack might be out of sync |
143 | * sCG -> sIG Ignore, conntrack might be out of sync | 144 | * sCG -> sIG Ignore, conntrack might be out of sync |
144 | * sTW -> sRQ Reincarnation | 145 | * sTW -> sRQ Reincarnation |
145 | * | 146 | * |
146 | * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ | 147 | * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ |
147 | sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, | 148 | sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, |
148 | }, | 149 | }, |
149 | [DCCP_PKT_RESPONSE] = { | 150 | [DCCP_PKT_RESPONSE] = { |
150 | /* | 151 | /* |
151 | * sNO -> sIV Invalid | 152 | * sNO -> sIV Invalid |
152 | * sRQ -> sIG Ignore, might be response to ignored Request | 153 | * sRQ -> sIG Ignore, might be response to ignored Request |
153 | * sRS -> sIG Ignore, might be response to ignored Request | 154 | * sRS -> sIG Ignore, might be response to ignored Request |
154 | * sPO -> sIG Ignore, might be response to ignored Request | 155 | * sPO -> sIG Ignore, might be response to ignored Request |
155 | * sOP -> sIG Ignore, might be response to ignored Request | 156 | * sOP -> sIG Ignore, might be response to ignored Request |
156 | * sCR -> sIG Ignore, might be response to ignored Request | 157 | * sCR -> sIG Ignore, might be response to ignored Request |
157 | * sCG -> sIG Ignore, might be response to ignored Request | 158 | * sCG -> sIG Ignore, might be response to ignored Request |
158 | * sTW -> sIV Invalid, reincarnation in reverse direction | 159 | * sTW -> sIV Invalid, reincarnation in reverse direction |
159 | * goes through sRQ | 160 | * goes through sRQ |
160 | * | 161 | * |
161 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 162 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
162 | sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, | 163 | sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, |
163 | }, | 164 | }, |
164 | [DCCP_PKT_ACK] = { | 165 | [DCCP_PKT_ACK] = { |
165 | /* | 166 | /* |
166 | * sNO -> sIV No connection | 167 | * sNO -> sIV No connection |
167 | * sRQ -> sIV No connection | 168 | * sRQ -> sIV No connection |
168 | * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) | 169 | * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) |
169 | * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN | 170 | * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN |
170 | * sOP -> sOP Regular ACK, remain in OPEN | 171 | * sOP -> sOP Regular ACK, remain in OPEN |
171 | * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) | 172 | * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) |
172 | * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) | 173 | * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) |
173 | * sTW -> sIV | 174 | * sTW -> sIV |
174 | * | 175 | * |
175 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 176 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
176 | sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV | 177 | sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV |
177 | }, | 178 | }, |
178 | [DCCP_PKT_DATA] = { | 179 | [DCCP_PKT_DATA] = { |
179 | /* | 180 | /* |
180 | * sNO -> sIV No connection | 181 | * sNO -> sIV No connection |
181 | * sRQ -> sIV No connection | 182 | * sRQ -> sIV No connection |
182 | * sRS -> sIV No connection | 183 | * sRS -> sIV No connection |
183 | * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) | 184 | * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) |
184 | * sOP -> sOP Regular Data packet | 185 | * sOP -> sOP Regular Data packet |
185 | * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) | 186 | * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) |
186 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) | 187 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) |
187 | * sTW -> sIV | 188 | * sTW -> sIV |
188 | * | 189 | * |
189 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 190 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
190 | sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, | 191 | sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, |
191 | }, | 192 | }, |
192 | [DCCP_PKT_DATAACK] = { | 193 | [DCCP_PKT_DATAACK] = { |
193 | /* | 194 | /* |
194 | * sNO -> sIV No connection | 195 | * sNO -> sIV No connection |
195 | * sRQ -> sIV No connection | 196 | * sRQ -> sIV No connection |
196 | * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) | 197 | * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) |
197 | * sPO -> sPO Remain in PARTOPEN state | 198 | * sPO -> sPO Remain in PARTOPEN state |
198 | * sOP -> sOP Regular DataAck packet in OPEN state | 199 | * sOP -> sOP Regular DataAck packet in OPEN state |
199 | * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) | 200 | * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) |
200 | * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) | 201 | * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) |
201 | * sTW -> sIV | 202 | * sTW -> sIV |
202 | * | 203 | * |
203 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 204 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
204 | sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV | 205 | sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV |
205 | }, | 206 | }, |
206 | [DCCP_PKT_CLOSEREQ] = { | 207 | [DCCP_PKT_CLOSEREQ] = { |
207 | /* | 208 | /* |
208 | * CLOSEREQ may only be sent by the server. | 209 | * CLOSEREQ may only be sent by the server. |
209 | * | 210 | * |
210 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 211 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
211 | sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV | 212 | sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV |
212 | }, | 213 | }, |
213 | [DCCP_PKT_CLOSE] = { | 214 | [DCCP_PKT_CLOSE] = { |
214 | /* | 215 | /* |
215 | * sNO -> sIV No connection | 216 | * sNO -> sIV No connection |
216 | * sRQ -> sIV No connection | 217 | * sRQ -> sIV No connection |
217 | * sRS -> sIV No connection | 218 | * sRS -> sIV No connection |
218 | * sPO -> sCG Client-initiated close | 219 | * sPO -> sCG Client-initiated close |
219 | * sOP -> sCG Client-initiated close | 220 | * sOP -> sCG Client-initiated close |
220 | * sCR -> sCG Close in response to CloseReq (8.3.) | 221 | * sCR -> sCG Close in response to CloseReq (8.3.) |
221 | * sCG -> sCG Retransmit | 222 | * sCG -> sCG Retransmit |
222 | * sTW -> sIV Late retransmit, already in TIME_WAIT | 223 | * sTW -> sIV Late retransmit, already in TIME_WAIT |
223 | * | 224 | * |
224 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 225 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
225 | sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV | 226 | sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV |
226 | }, | 227 | }, |
227 | [DCCP_PKT_RESET] = { | 228 | [DCCP_PKT_RESET] = { |
228 | /* | 229 | /* |
229 | * sNO -> sIV No connection | 230 | * sNO -> sIV No connection |
230 | * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) | 231 | * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) |
231 | * sRS -> sTW Response received without Request | 232 | * sRS -> sTW Response received without Request |
232 | * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) | 233 | * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) |
233 | * sOP -> sTW Connection reset | 234 | * sOP -> sTW Connection reset |
234 | * sCR -> sTW Connection reset | 235 | * sCR -> sTW Connection reset |
235 | * sCG -> sTW Connection reset | 236 | * sCG -> sTW Connection reset |
236 | * sTW -> sIG Ignore (don't refresh timer) | 237 | * sTW -> sIG Ignore (don't refresh timer) |
237 | * | 238 | * |
238 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 239 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
239 | sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG | 240 | sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG |
240 | }, | 241 | }, |
241 | [DCCP_PKT_SYNC] = { | 242 | [DCCP_PKT_SYNC] = { |
242 | /* | 243 | /* |
243 | * We currently ignore Sync packets | 244 | * We currently ignore Sync packets |
244 | * | 245 | * |
245 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 246 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
246 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, | 247 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, |
247 | }, | 248 | }, |
248 | [DCCP_PKT_SYNCACK] = { | 249 | [DCCP_PKT_SYNCACK] = { |
249 | /* | 250 | /* |
250 | * We currently ignore SyncAck packets | 251 | * We currently ignore SyncAck packets |
251 | * | 252 | * |
252 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 253 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
253 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, | 254 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, |
254 | }, | 255 | }, |
255 | }, | 256 | }, |
256 | [CT_DCCP_ROLE_SERVER] = { | 257 | [CT_DCCP_ROLE_SERVER] = { |
257 | [DCCP_PKT_REQUEST] = { | 258 | [DCCP_PKT_REQUEST] = { |
258 | /* | 259 | /* |
259 | * sNO -> sIV Invalid | 260 | * sNO -> sIV Invalid |
260 | * sRQ -> sIG Ignore, conntrack might be out of sync | 261 | * sRQ -> sIG Ignore, conntrack might be out of sync |
261 | * sRS -> sIG Ignore, conntrack might be out of sync | 262 | * sRS -> sIG Ignore, conntrack might be out of sync |
262 | * sPO -> sIG Ignore, conntrack might be out of sync | 263 | * sPO -> sIG Ignore, conntrack might be out of sync |
263 | * sOP -> sIG Ignore, conntrack might be out of sync | 264 | * sOP -> sIG Ignore, conntrack might be out of sync |
264 | * sCR -> sIG Ignore, conntrack might be out of sync | 265 | * sCR -> sIG Ignore, conntrack might be out of sync |
265 | * sCG -> sIG Ignore, conntrack might be out of sync | 266 | * sCG -> sIG Ignore, conntrack might be out of sync |
266 | * sTW -> sRQ Reincarnation, must reverse roles | 267 | * sTW -> sRQ Reincarnation, must reverse roles |
267 | * | 268 | * |
268 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 269 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
269 | sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ | 270 | sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ |
270 | }, | 271 | }, |
271 | [DCCP_PKT_RESPONSE] = { | 272 | [DCCP_PKT_RESPONSE] = { |
272 | /* | 273 | /* |
273 | * sNO -> sIV Response without Request | 274 | * sNO -> sIV Response without Request |
274 | * sRQ -> sRS Response to clients Request | 275 | * sRQ -> sRS Response to clients Request |
275 | * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) | 276 | * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) |
276 | * sPO -> sIG Response to an ignored Request or late retransmit | 277 | * sPO -> sIG Response to an ignored Request or late retransmit |
277 | * sOP -> sIG Ignore, might be response to ignored Request | 278 | * sOP -> sIG Ignore, might be response to ignored Request |
278 | * sCR -> sIG Ignore, might be response to ignored Request | 279 | * sCR -> sIG Ignore, might be response to ignored Request |
279 | * sCG -> sIG Ignore, might be response to ignored Request | 280 | * sCG -> sIG Ignore, might be response to ignored Request |
280 | * sTW -> sIV Invalid, Request from client in sTW moves to sRQ | 281 | * sTW -> sIV Invalid, Request from client in sTW moves to sRQ |
281 | * | 282 | * |
282 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 283 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
283 | sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV | 284 | sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV |
284 | }, | 285 | }, |
285 | [DCCP_PKT_ACK] = { | 286 | [DCCP_PKT_ACK] = { |
286 | /* | 287 | /* |
287 | * sNO -> sIV No connection | 288 | * sNO -> sIV No connection |
288 | * sRQ -> sIV No connection | 289 | * sRQ -> sIV No connection |
289 | * sRS -> sIV No connection | 290 | * sRS -> sIV No connection |
290 | * sPO -> sOP Enter OPEN state (8.1.5.) | 291 | * sPO -> sOP Enter OPEN state (8.1.5.) |
291 | * sOP -> sOP Regular Ack in OPEN state | 292 | * sOP -> sOP Regular Ack in OPEN state |
292 | * sCR -> sIV Waiting for Close from client | 293 | * sCR -> sIV Waiting for Close from client |
293 | * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) | 294 | * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) |
294 | * sTW -> sIV | 295 | * sTW -> sIV |
295 | * | 296 | * |
296 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 297 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
297 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV | 298 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV |
298 | }, | 299 | }, |
299 | [DCCP_PKT_DATA] = { | 300 | [DCCP_PKT_DATA] = { |
300 | /* | 301 | /* |
301 | * sNO -> sIV No connection | 302 | * sNO -> sIV No connection |
302 | * sRQ -> sIV No connection | 303 | * sRQ -> sIV No connection |
303 | * sRS -> sIV No connection | 304 | * sRS -> sIV No connection |
304 | * sPO -> sOP Enter OPEN state (8.1.5.) | 305 | * sPO -> sOP Enter OPEN state (8.1.5.) |
305 | * sOP -> sOP Regular Data packet in OPEN state | 306 | * sOP -> sOP Regular Data packet in OPEN state |
306 | * sCR -> sIV Waiting for Close from client | 307 | * sCR -> sIV Waiting for Close from client |
307 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) | 308 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) |
308 | * sTW -> sIV | 309 | * sTW -> sIV |
309 | * | 310 | * |
310 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 311 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
311 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV | 312 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV |
312 | }, | 313 | }, |
313 | [DCCP_PKT_DATAACK] = { | 314 | [DCCP_PKT_DATAACK] = { |
314 | /* | 315 | /* |
315 | * sNO -> sIV No connection | 316 | * sNO -> sIV No connection |
316 | * sRQ -> sIV No connection | 317 | * sRQ -> sIV No connection |
317 | * sRS -> sIV No connection | 318 | * sRS -> sIV No connection |
318 | * sPO -> sOP Enter OPEN state (8.1.5.) | 319 | * sPO -> sOP Enter OPEN state (8.1.5.) |
319 | * sOP -> sOP Regular DataAck in OPEN state | 320 | * sOP -> sOP Regular DataAck in OPEN state |
320 | * sCR -> sIV Waiting for Close from client | 321 | * sCR -> sIV Waiting for Close from client |
321 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) | 322 | * sCG -> sCG Data in CLOSING MAY be processed (8.3.) |
322 | * sTW -> sIV | 323 | * sTW -> sIV |
323 | * | 324 | * |
324 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 325 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
325 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV | 326 | sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV |
326 | }, | 327 | }, |
327 | [DCCP_PKT_CLOSEREQ] = { | 328 | [DCCP_PKT_CLOSEREQ] = { |
328 | /* | 329 | /* |
329 | * sNO -> sIV No connection | 330 | * sNO -> sIV No connection |
330 | * sRQ -> sIV No connection | 331 | * sRQ -> sIV No connection |
331 | * sRS -> sIV No connection | 332 | * sRS -> sIV No connection |
332 | * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) | 333 | * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) |
333 | * sOP -> sCR CloseReq in OPEN state | 334 | * sOP -> sCR CloseReq in OPEN state |
334 | * sCR -> sCR Retransmit | 335 | * sCR -> sCR Retransmit |
335 | * sCG -> sCR Simultaneous close, client sends another Close | 336 | * sCG -> sCR Simultaneous close, client sends another Close |
336 | * sTW -> sIV Already closed | 337 | * sTW -> sIV Already closed |
337 | * | 338 | * |
338 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 339 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
339 | sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV | 340 | sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV |
340 | }, | 341 | }, |
341 | [DCCP_PKT_CLOSE] = { | 342 | [DCCP_PKT_CLOSE] = { |
342 | /* | 343 | /* |
343 | * sNO -> sIV No connection | 344 | * sNO -> sIV No connection |
344 | * sRQ -> sIV No connection | 345 | * sRQ -> sIV No connection |
345 | * sRS -> sIV No connection | 346 | * sRS -> sIV No connection |
346 | * sPO -> sOP -> sCG Move direcly to CLOSING | 347 | * sPO -> sOP -> sCG Move direcly to CLOSING |
347 | * sOP -> sCG Move to CLOSING | 348 | * sOP -> sCG Move to CLOSING |
348 | * sCR -> sIV Close after CloseReq is invalid | 349 | * sCR -> sIV Close after CloseReq is invalid |
349 | * sCG -> sCG Retransmit | 350 | * sCG -> sCG Retransmit |
350 | * sTW -> sIV Already closed | 351 | * sTW -> sIV Already closed |
351 | * | 352 | * |
352 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 353 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
353 | sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV | 354 | sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV |
354 | }, | 355 | }, |
355 | [DCCP_PKT_RESET] = { | 356 | [DCCP_PKT_RESET] = { |
356 | /* | 357 | /* |
357 | * sNO -> sIV No connection | 358 | * sNO -> sIV No connection |
358 | * sRQ -> sTW Reset in response to Request | 359 | * sRQ -> sTW Reset in response to Request |
359 | * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) | 360 | * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) |
360 | * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) | 361 | * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) |
361 | * sOP -> sTW | 362 | * sOP -> sTW |
362 | * sCR -> sTW | 363 | * sCR -> sTW |
363 | * sCG -> sTW | 364 | * sCG -> sTW |
364 | * sTW -> sIG Ignore (don't refresh timer) | 365 | * sTW -> sIG Ignore (don't refresh timer) |
365 | * | 366 | * |
366 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ | 367 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ |
367 | sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG | 368 | sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG |
368 | }, | 369 | }, |
369 | [DCCP_PKT_SYNC] = { | 370 | [DCCP_PKT_SYNC] = { |
370 | /* | 371 | /* |
371 | * We currently ignore Sync packets | 372 | * We currently ignore Sync packets |
372 | * | 373 | * |
373 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 374 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
374 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, | 375 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, |
375 | }, | 376 | }, |
376 | [DCCP_PKT_SYNCACK] = { | 377 | [DCCP_PKT_SYNCACK] = { |
377 | /* | 378 | /* |
378 | * We currently ignore SyncAck packets | 379 | * We currently ignore SyncAck packets |
379 | * | 380 | * |
380 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ | 381 | * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ |
381 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, | 382 | sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, |
382 | }, | 383 | }, |
383 | }, | 384 | }, |
384 | }; | 385 | }; |
385 | 386 | ||
386 | static inline struct nf_dccp_net *dccp_pernet(struct net *net) | 387 | static inline struct nf_dccp_net *dccp_pernet(struct net *net) |
387 | { | 388 | { |
388 | return &net->ct.nf_ct_proto.dccp; | 389 | return &net->ct.nf_ct_proto.dccp; |
389 | } | 390 | } |
390 | 391 | ||
391 | static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, | 392 | static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, |
392 | unsigned int dataoff, unsigned int *timeouts) | 393 | unsigned int dataoff) |
393 | { | 394 | { |
394 | struct net *net = nf_ct_net(ct); | 395 | struct net *net = nf_ct_net(ct); |
395 | struct nf_dccp_net *dn; | 396 | struct nf_dccp_net *dn; |
396 | struct dccp_hdr _dh, *dh; | 397 | struct dccp_hdr _dh, *dh; |
397 | const char *msg; | 398 | const char *msg; |
398 | u_int8_t state; | 399 | u_int8_t state; |
399 | 400 | ||
400 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); | 401 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); |
401 | BUG_ON(dh == NULL); | 402 | BUG_ON(dh == NULL); |
402 | 403 | ||
403 | state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; | 404 | state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; |
404 | switch (state) { | 405 | switch (state) { |
405 | default: | 406 | default: |
406 | dn = dccp_pernet(net); | 407 | dn = dccp_pernet(net); |
407 | if (dn->dccp_loose == 0) { | 408 | if (dn->dccp_loose == 0) { |
408 | msg = "not picking up existing connection "; | 409 | msg = "not picking up existing connection "; |
409 | goto out_invalid; | 410 | goto out_invalid; |
410 | } | 411 | } |
411 | case CT_DCCP_REQUEST: | 412 | case CT_DCCP_REQUEST: |
412 | break; | 413 | break; |
413 | case CT_DCCP_INVALID: | 414 | case CT_DCCP_INVALID: |
414 | msg = "invalid state transition "; | 415 | msg = "invalid state transition "; |
415 | goto out_invalid; | 416 | goto out_invalid; |
416 | } | 417 | } |
417 | 418 | ||
418 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; | 419 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; |
419 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; | 420 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; |
420 | ct->proto.dccp.state = CT_DCCP_NONE; | 421 | ct->proto.dccp.state = CT_DCCP_NONE; |
421 | ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; | 422 | ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; |
422 | ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; | 423 | ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; |
423 | ct->proto.dccp.handshake_seq = 0; | 424 | ct->proto.dccp.handshake_seq = 0; |
424 | return true; | 425 | return true; |
425 | 426 | ||
426 | out_invalid: | 427 | out_invalid: |
427 | nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); | 428 | nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); |
428 | return false; | 429 | return false; |
429 | } | 430 | } |
430 | 431 | ||
431 | static u64 dccp_ack_seq(const struct dccp_hdr *dh) | 432 | static u64 dccp_ack_seq(const struct dccp_hdr *dh) |
432 | { | 433 | { |
433 | const struct dccp_hdr_ack_bits *dhack; | 434 | const struct dccp_hdr_ack_bits *dhack; |
434 | 435 | ||
435 | dhack = (void *)dh + __dccp_basic_hdr_len(dh); | 436 | dhack = (void *)dh + __dccp_basic_hdr_len(dh); |
436 | return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + | 437 | return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + |
437 | ntohl(dhack->dccph_ack_nr_low); | 438 | ntohl(dhack->dccph_ack_nr_low); |
438 | } | 439 | } |
439 | 440 | ||
440 | static unsigned int *dccp_get_timeouts(struct net *net) | ||
441 | { | ||
442 | return dccp_pernet(net)->dccp_timeout; | ||
443 | } | ||
444 | |||
445 | static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, | 441 | static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, |
446 | unsigned int dataoff, enum ip_conntrack_info ctinfo, | 442 | unsigned int dataoff, enum ip_conntrack_info ctinfo) |
447 | unsigned int *timeouts) | ||
448 | { | 443 | { |
449 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 444 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
450 | struct dccp_hdr _dh, *dh; | 445 | struct dccp_hdr _dh, *dh; |
451 | u_int8_t type, old_state, new_state; | 446 | u_int8_t type, old_state, new_state; |
452 | enum ct_dccp_roles role; | 447 | enum ct_dccp_roles role; |
448 | unsigned int *timeouts; | ||
453 | 449 | ||
454 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); | 450 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); |
455 | BUG_ON(dh == NULL); | 451 | BUG_ON(dh == NULL); |
456 | type = dh->dccph_type; | 452 | type = dh->dccph_type; |
457 | 453 | ||
458 | if (type == DCCP_PKT_RESET && | 454 | if (type == DCCP_PKT_RESET && |
459 | !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | 455 | !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { |
460 | /* Tear down connection immediately if only reply is a RESET */ | 456 | /* Tear down connection immediately if only reply is a RESET */ |
461 | nf_ct_kill_acct(ct, ctinfo, skb); | 457 | nf_ct_kill_acct(ct, ctinfo, skb); |
462 | return NF_ACCEPT; | 458 | return NF_ACCEPT; |
463 | } | 459 | } |
464 | 460 | ||
465 | spin_lock_bh(&ct->lock); | 461 | spin_lock_bh(&ct->lock); |
466 | 462 | ||
467 | role = ct->proto.dccp.role[dir]; | 463 | role = ct->proto.dccp.role[dir]; |
468 | old_state = ct->proto.dccp.state; | 464 | old_state = ct->proto.dccp.state; |
469 | new_state = dccp_state_table[role][type][old_state]; | 465 | new_state = dccp_state_table[role][type][old_state]; |
470 | 466 | ||
471 | switch (new_state) { | 467 | switch (new_state) { |
472 | case CT_DCCP_REQUEST: | 468 | case CT_DCCP_REQUEST: |
473 | if (old_state == CT_DCCP_TIMEWAIT && | 469 | if (old_state == CT_DCCP_TIMEWAIT && |
474 | role == CT_DCCP_ROLE_SERVER) { | 470 | role == CT_DCCP_ROLE_SERVER) { |
475 | /* Reincarnation in the reverse direction: reopen and | 471 | /* Reincarnation in the reverse direction: reopen and |
476 | * reverse client/server roles. */ | 472 | * reverse client/server roles. */ |
477 | ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; | 473 | ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; |
478 | ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; | 474 | ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; |
479 | } | 475 | } |
480 | break; | 476 | break; |
481 | case CT_DCCP_RESPOND: | 477 | case CT_DCCP_RESPOND: |
482 | if (old_state == CT_DCCP_REQUEST) | 478 | if (old_state == CT_DCCP_REQUEST) |
483 | ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); | 479 | ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); |
484 | break; | 480 | break; |
485 | case CT_DCCP_PARTOPEN: | 481 | case CT_DCCP_PARTOPEN: |
486 | if (old_state == CT_DCCP_RESPOND && | 482 | if (old_state == CT_DCCP_RESPOND && |
487 | type == DCCP_PKT_ACK && | 483 | type == DCCP_PKT_ACK && |
488 | dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) | 484 | dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) |
489 | set_bit(IPS_ASSURED_BIT, &ct->status); | 485 | set_bit(IPS_ASSURED_BIT, &ct->status); |
490 | break; | 486 | break; |
491 | case CT_DCCP_IGNORE: | 487 | case CT_DCCP_IGNORE: |
492 | /* | 488 | /* |
493 | * Connection tracking might be out of sync, so we ignore | 489 | * Connection tracking might be out of sync, so we ignore |
494 | * packets that might establish a new connection and resync | 490 | * packets that might establish a new connection and resync |
495 | * if the server responds with a valid Response. | 491 | * if the server responds with a valid Response. |
496 | */ | 492 | */ |
497 | if (ct->proto.dccp.last_dir == !dir && | 493 | if (ct->proto.dccp.last_dir == !dir && |
498 | ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && | 494 | ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && |
499 | type == DCCP_PKT_RESPONSE) { | 495 | type == DCCP_PKT_RESPONSE) { |
500 | ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; | 496 | ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; |
501 | ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; | 497 | ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; |
502 | ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); | 498 | ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); |
503 | new_state = CT_DCCP_RESPOND; | 499 | new_state = CT_DCCP_RESPOND; |
504 | break; | 500 | break; |
505 | } | 501 | } |
506 | ct->proto.dccp.last_dir = dir; | 502 | ct->proto.dccp.last_dir = dir; |
507 | ct->proto.dccp.last_pkt = type; | 503 | ct->proto.dccp.last_pkt = type; |
508 | 504 | ||
509 | spin_unlock_bh(&ct->lock); | 505 | spin_unlock_bh(&ct->lock); |
510 | nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); | 506 | nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); |
511 | return NF_ACCEPT; | 507 | return NF_ACCEPT; |
512 | case CT_DCCP_INVALID: | 508 | case CT_DCCP_INVALID: |
513 | spin_unlock_bh(&ct->lock); | 509 | spin_unlock_bh(&ct->lock); |
514 | nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); | 510 | nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); |
515 | return -NF_ACCEPT; | 511 | return -NF_ACCEPT; |
516 | } | 512 | } |
517 | 513 | ||
518 | ct->proto.dccp.last_dir = dir; | 514 | ct->proto.dccp.last_dir = dir; |
519 | ct->proto.dccp.last_pkt = type; | 515 | ct->proto.dccp.last_pkt = type; |
520 | ct->proto.dccp.state = new_state; | 516 | ct->proto.dccp.state = new_state; |
521 | spin_unlock_bh(&ct->lock); | 517 | spin_unlock_bh(&ct->lock); |
522 | 518 | ||
523 | if (new_state != old_state) | 519 | if (new_state != old_state) |
524 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); | 520 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); |
525 | 521 | ||
522 | timeouts = nf_ct_timeout_lookup(ct); | ||
523 | if (!timeouts) | ||
524 | timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout; | ||
526 | nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); | 525 | nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); |
527 | 526 | ||
528 | return NF_ACCEPT; | 527 | return NF_ACCEPT; |
529 | } | 528 | } |
530 | 529 | ||
531 | static int dccp_error(struct net *net, struct nf_conn *tmpl, | 530 | static int dccp_error(struct net *net, struct nf_conn *tmpl, |
532 | struct sk_buff *skb, unsigned int dataoff, | 531 | struct sk_buff *skb, unsigned int dataoff, |
533 | u_int8_t pf, unsigned int hooknum) | 532 | u_int8_t pf, unsigned int hooknum) |
534 | { | 533 | { |
535 | struct dccp_hdr _dh, *dh; | 534 | struct dccp_hdr _dh, *dh; |
536 | unsigned int dccp_len = skb->len - dataoff; | 535 | unsigned int dccp_len = skb->len - dataoff; |
537 | unsigned int cscov; | 536 | unsigned int cscov; |
538 | const char *msg; | 537 | const char *msg; |
539 | 538 | ||
540 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); | 539 | dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); |
541 | if (dh == NULL) { | 540 | if (dh == NULL) { |
542 | msg = "nf_ct_dccp: short packet "; | 541 | msg = "nf_ct_dccp: short packet "; |
543 | goto out_invalid; | 542 | goto out_invalid; |
544 | } | 543 | } |
545 | 544 | ||
546 | if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || | 545 | if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || |
547 | dh->dccph_doff * 4 > dccp_len) { | 546 | dh->dccph_doff * 4 > dccp_len) { |
548 | msg = "nf_ct_dccp: truncated/malformed packet "; | 547 | msg = "nf_ct_dccp: truncated/malformed packet "; |
549 | goto out_invalid; | 548 | goto out_invalid; |
550 | } | 549 | } |
551 | 550 | ||
552 | cscov = dccp_len; | 551 | cscov = dccp_len; |
553 | if (dh->dccph_cscov) { | 552 | if (dh->dccph_cscov) { |
554 | cscov = (dh->dccph_cscov - 1) * 4; | 553 | cscov = (dh->dccph_cscov - 1) * 4; |
555 | if (cscov > dccp_len) { | 554 | if (cscov > dccp_len) { |
556 | msg = "nf_ct_dccp: bad checksum coverage "; | 555 | msg = "nf_ct_dccp: bad checksum coverage "; |
557 | goto out_invalid; | 556 | goto out_invalid; |
558 | } | 557 | } |
559 | } | 558 | } |
560 | 559 | ||
561 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 560 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
562 | nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, | 561 | nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, |
563 | pf)) { | 562 | pf)) { |
564 | msg = "nf_ct_dccp: bad checksum "; | 563 | msg = "nf_ct_dccp: bad checksum "; |
565 | goto out_invalid; | 564 | goto out_invalid; |
566 | } | 565 | } |
567 | 566 | ||
568 | if (dh->dccph_type >= DCCP_PKT_INVALID) { | 567 | if (dh->dccph_type >= DCCP_PKT_INVALID) { |
569 | msg = "nf_ct_dccp: reserved packet type "; | 568 | msg = "nf_ct_dccp: reserved packet type "; |
570 | goto out_invalid; | 569 | goto out_invalid; |
571 | } | 570 | } |
572 | 571 | ||
573 | return NF_ACCEPT; | 572 | return NF_ACCEPT; |
574 | 573 | ||
575 | out_invalid: | 574 | out_invalid: |
576 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); | 575 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); |
577 | return -NF_ACCEPT; | 576 | return -NF_ACCEPT; |
578 | } | 577 | } |
579 | 578 | ||
580 | static bool dccp_can_early_drop(const struct nf_conn *ct) | 579 | static bool dccp_can_early_drop(const struct nf_conn *ct) |
581 | { | 580 | { |
582 | switch (ct->proto.dccp.state) { | 581 | switch (ct->proto.dccp.state) { |
583 | case CT_DCCP_CLOSEREQ: | 582 | case CT_DCCP_CLOSEREQ: |
584 | case CT_DCCP_CLOSING: | 583 | case CT_DCCP_CLOSING: |
585 | case CT_DCCP_TIMEWAIT: | 584 | case CT_DCCP_TIMEWAIT: |
586 | return true; | 585 | return true; |
587 | default: | 586 | default: |
588 | break; | 587 | break; |
589 | } | 588 | } |
590 | 589 | ||
591 | return false; | 590 | return false; |
592 | } | 591 | } |
593 | 592 | ||
594 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 593 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
595 | static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) | 594 | static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) |
596 | { | 595 | { |
597 | seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); | 596 | seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); |
598 | } | 597 | } |
599 | #endif | 598 | #endif |
600 | 599 | ||
601 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 600 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
602 | static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, | 601 | static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, |
603 | struct nf_conn *ct) | 602 | struct nf_conn *ct) |
604 | { | 603 | { |
605 | struct nlattr *nest_parms; | 604 | struct nlattr *nest_parms; |
606 | 605 | ||
607 | spin_lock_bh(&ct->lock); | 606 | spin_lock_bh(&ct->lock); |
608 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); | 607 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); |
609 | if (!nest_parms) | 608 | if (!nest_parms) |
610 | goto nla_put_failure; | 609 | goto nla_put_failure; |
611 | if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || | 610 | if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || |
612 | nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, | 611 | nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, |
613 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || | 612 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || |
614 | nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, | 613 | nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, |
615 | cpu_to_be64(ct->proto.dccp.handshake_seq), | 614 | cpu_to_be64(ct->proto.dccp.handshake_seq), |
616 | CTA_PROTOINFO_DCCP_PAD)) | 615 | CTA_PROTOINFO_DCCP_PAD)) |
617 | goto nla_put_failure; | 616 | goto nla_put_failure; |
618 | nla_nest_end(skb, nest_parms); | 617 | nla_nest_end(skb, nest_parms); |
619 | spin_unlock_bh(&ct->lock); | 618 | spin_unlock_bh(&ct->lock); |
620 | return 0; | 619 | return 0; |
621 | 620 | ||
622 | nla_put_failure: | 621 | nla_put_failure: |
623 | spin_unlock_bh(&ct->lock); | 622 | spin_unlock_bh(&ct->lock); |
624 | return -1; | 623 | return -1; |
625 | } | 624 | } |
626 | 625 | ||
627 | static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { | 626 | static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { |
628 | [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, | 627 | [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, |
629 | [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, | 628 | [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, |
630 | [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, | 629 | [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, |
631 | [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, | 630 | [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, |
632 | }; | 631 | }; |
633 | 632 | ||
634 | #define DCCP_NLATTR_SIZE ( \ | 633 | #define DCCP_NLATTR_SIZE ( \ |
635 | NLA_ALIGN(NLA_HDRLEN + 1) + \ | 634 | NLA_ALIGN(NLA_HDRLEN + 1) + \ |
636 | NLA_ALIGN(NLA_HDRLEN + 1) + \ | 635 | NLA_ALIGN(NLA_HDRLEN + 1) + \ |
637 | NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ | 636 | NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ |
638 | NLA_ALIGN(NLA_HDRLEN + 0)) | 637 | NLA_ALIGN(NLA_HDRLEN + 0)) |
639 | 638 | ||
640 | static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) | 639 | static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) |
641 | { | 640 | { |
642 | struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; | 641 | struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; |
643 | struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; | 642 | struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; |
644 | int err; | 643 | int err; |
645 | 644 | ||
646 | if (!attr) | 645 | if (!attr) |
647 | return 0; | 646 | return 0; |
648 | 647 | ||
649 | err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, | 648 | err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, |
650 | dccp_nla_policy, NULL); | 649 | dccp_nla_policy, NULL); |
651 | if (err < 0) | 650 | if (err < 0) |
652 | return err; | 651 | return err; |
653 | 652 | ||
654 | if (!tb[CTA_PROTOINFO_DCCP_STATE] || | 653 | if (!tb[CTA_PROTOINFO_DCCP_STATE] || |
655 | !tb[CTA_PROTOINFO_DCCP_ROLE] || | 654 | !tb[CTA_PROTOINFO_DCCP_ROLE] || |
656 | nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || | 655 | nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || |
657 | nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { | 656 | nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { |
658 | return -EINVAL; | 657 | return -EINVAL; |
659 | } | 658 | } |
660 | 659 | ||
661 | spin_lock_bh(&ct->lock); | 660 | spin_lock_bh(&ct->lock); |
662 | ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); | 661 | ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); |
663 | if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { | 662 | if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { |
664 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; | 663 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; |
665 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; | 664 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; |
666 | } else { | 665 | } else { |
667 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; | 666 | ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; |
668 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; | 667 | ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; |
669 | } | 668 | } |
670 | if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { | 669 | if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { |
671 | ct->proto.dccp.handshake_seq = | 670 | ct->proto.dccp.handshake_seq = |
672 | be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); | 671 | be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); |
673 | } | 672 | } |
674 | spin_unlock_bh(&ct->lock); | 673 | spin_unlock_bh(&ct->lock); |
675 | return 0; | 674 | return 0; |
676 | } | 675 | } |
677 | #endif | 676 | #endif |
678 | 677 | ||
679 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 678 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
680 | 679 | ||
681 | #include <linux/netfilter/nfnetlink.h> | 680 | #include <linux/netfilter/nfnetlink.h> |
682 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 681 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
683 | 682 | ||
684 | static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], | 683 | static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], |
685 | struct net *net, void *data) | 684 | struct net *net, void *data) |
686 | { | 685 | { |
687 | struct nf_dccp_net *dn = dccp_pernet(net); | 686 | struct nf_dccp_net *dn = dccp_pernet(net); |
688 | unsigned int *timeouts = data; | 687 | unsigned int *timeouts = data; |
689 | int i; | 688 | int i; |
690 | 689 | ||
691 | /* set default DCCP timeouts. */ | 690 | /* set default DCCP timeouts. */ |
692 | for (i=0; i<CT_DCCP_MAX; i++) | 691 | for (i=0; i<CT_DCCP_MAX; i++) |
693 | timeouts[i] = dn->dccp_timeout[i]; | 692 | timeouts[i] = dn->dccp_timeout[i]; |
694 | 693 | ||
695 | /* there's a 1:1 mapping between attributes and protocol states. */ | 694 | /* there's a 1:1 mapping between attributes and protocol states. */ |
696 | for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { | 695 | for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { |
697 | if (tb[i]) { | 696 | if (tb[i]) { |
698 | timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; | 697 | timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; |
699 | } | 698 | } |
700 | } | 699 | } |
701 | return 0; | 700 | return 0; |
702 | } | 701 | } |
703 | 702 | ||
704 | static int | 703 | static int |
705 | dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 704 | dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
706 | { | 705 | { |
707 | const unsigned int *timeouts = data; | 706 | const unsigned int *timeouts = data; |
708 | int i; | 707 | int i; |
709 | 708 | ||
710 | for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { | 709 | for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { |
711 | if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) | 710 | if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) |
712 | goto nla_put_failure; | 711 | goto nla_put_failure; |
713 | } | 712 | } |
714 | return 0; | 713 | return 0; |
715 | 714 | ||
716 | nla_put_failure: | 715 | nla_put_failure: |
717 | return -ENOSPC; | 716 | return -ENOSPC; |
718 | } | 717 | } |
719 | 718 | ||
720 | static const struct nla_policy | 719 | static const struct nla_policy |
721 | dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { | 720 | dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { |
722 | [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, | 721 | [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, |
723 | [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, | 722 | [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, |
724 | [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, | 723 | [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, |
725 | [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, | 724 | [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, |
726 | [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, | 725 | [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, |
727 | [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, | 726 | [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, |
728 | [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, | 727 | [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, |
729 | }; | 728 | }; |
730 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 729 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
731 | 730 | ||
732 | #ifdef CONFIG_SYSCTL | 731 | #ifdef CONFIG_SYSCTL |
733 | /* template, data assigned later */ | 732 | /* template, data assigned later */ |
734 | static struct ctl_table dccp_sysctl_table[] = { | 733 | static struct ctl_table dccp_sysctl_table[] = { |
735 | { | 734 | { |
736 | .procname = "nf_conntrack_dccp_timeout_request", | 735 | .procname = "nf_conntrack_dccp_timeout_request", |
737 | .maxlen = sizeof(unsigned int), | 736 | .maxlen = sizeof(unsigned int), |
738 | .mode = 0644, | 737 | .mode = 0644, |
739 | .proc_handler = proc_dointvec_jiffies, | 738 | .proc_handler = proc_dointvec_jiffies, |
740 | }, | 739 | }, |
741 | { | 740 | { |
742 | .procname = "nf_conntrack_dccp_timeout_respond", | 741 | .procname = "nf_conntrack_dccp_timeout_respond", |
743 | .maxlen = sizeof(unsigned int), | 742 | .maxlen = sizeof(unsigned int), |
744 | .mode = 0644, | 743 | .mode = 0644, |
745 | .proc_handler = proc_dointvec_jiffies, | 744 | .proc_handler = proc_dointvec_jiffies, |
746 | }, | 745 | }, |
747 | { | 746 | { |
748 | .procname = "nf_conntrack_dccp_timeout_partopen", | 747 | .procname = "nf_conntrack_dccp_timeout_partopen", |
749 | .maxlen = sizeof(unsigned int), | 748 | .maxlen = sizeof(unsigned int), |
750 | .mode = 0644, | 749 | .mode = 0644, |
751 | .proc_handler = proc_dointvec_jiffies, | 750 | .proc_handler = proc_dointvec_jiffies, |
752 | }, | 751 | }, |
753 | { | 752 | { |
754 | .procname = "nf_conntrack_dccp_timeout_open", | 753 | .procname = "nf_conntrack_dccp_timeout_open", |
755 | .maxlen = sizeof(unsigned int), | 754 | .maxlen = sizeof(unsigned int), |
756 | .mode = 0644, | 755 | .mode = 0644, |
757 | .proc_handler = proc_dointvec_jiffies, | 756 | .proc_handler = proc_dointvec_jiffies, |
758 | }, | 757 | }, |
759 | { | 758 | { |
760 | .procname = "nf_conntrack_dccp_timeout_closereq", | 759 | .procname = "nf_conntrack_dccp_timeout_closereq", |
761 | .maxlen = sizeof(unsigned int), | 760 | .maxlen = sizeof(unsigned int), |
762 | .mode = 0644, | 761 | .mode = 0644, |
763 | .proc_handler = proc_dointvec_jiffies, | 762 | .proc_handler = proc_dointvec_jiffies, |
764 | }, | 763 | }, |
765 | { | 764 | { |
766 | .procname = "nf_conntrack_dccp_timeout_closing", | 765 | .procname = "nf_conntrack_dccp_timeout_closing", |
767 | .maxlen = sizeof(unsigned int), | 766 | .maxlen = sizeof(unsigned int), |
768 | .mode = 0644, | 767 | .mode = 0644, |
769 | .proc_handler = proc_dointvec_jiffies, | 768 | .proc_handler = proc_dointvec_jiffies, |
770 | }, | 769 | }, |
771 | { | 770 | { |
772 | .procname = "nf_conntrack_dccp_timeout_timewait", | 771 | .procname = "nf_conntrack_dccp_timeout_timewait", |
773 | .maxlen = sizeof(unsigned int), | 772 | .maxlen = sizeof(unsigned int), |
774 | .mode = 0644, | 773 | .mode = 0644, |
775 | .proc_handler = proc_dointvec_jiffies, | 774 | .proc_handler = proc_dointvec_jiffies, |
776 | }, | 775 | }, |
777 | { | 776 | { |
778 | .procname = "nf_conntrack_dccp_loose", | 777 | .procname = "nf_conntrack_dccp_loose", |
779 | .maxlen = sizeof(int), | 778 | .maxlen = sizeof(int), |
780 | .mode = 0644, | 779 | .mode = 0644, |
781 | .proc_handler = proc_dointvec, | 780 | .proc_handler = proc_dointvec, |
782 | }, | 781 | }, |
783 | { } | 782 | { } |
784 | }; | 783 | }; |
785 | #endif /* CONFIG_SYSCTL */ | 784 | #endif /* CONFIG_SYSCTL */ |
786 | 785 | ||
787 | static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, | 786 | static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, |
788 | struct nf_dccp_net *dn) | 787 | struct nf_dccp_net *dn) |
789 | { | 788 | { |
790 | #ifdef CONFIG_SYSCTL | 789 | #ifdef CONFIG_SYSCTL |
791 | if (pn->ctl_table) | 790 | if (pn->ctl_table) |
792 | return 0; | 791 | return 0; |
793 | 792 | ||
794 | pn->ctl_table = kmemdup(dccp_sysctl_table, | 793 | pn->ctl_table = kmemdup(dccp_sysctl_table, |
795 | sizeof(dccp_sysctl_table), | 794 | sizeof(dccp_sysctl_table), |
796 | GFP_KERNEL); | 795 | GFP_KERNEL); |
797 | if (!pn->ctl_table) | 796 | if (!pn->ctl_table) |
798 | return -ENOMEM; | 797 | return -ENOMEM; |
799 | 798 | ||
800 | pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; | 799 | pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; |
801 | pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; | 800 | pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; |
802 | pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; | 801 | pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; |
803 | pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; | 802 | pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; |
804 | pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; | 803 | pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; |
805 | pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; | 804 | pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; |
806 | pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; | 805 | pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; |
807 | pn->ctl_table[7].data = &dn->dccp_loose; | 806 | pn->ctl_table[7].data = &dn->dccp_loose; |
808 | 807 | ||
809 | /* Don't export sysctls to unprivileged users */ | 808 | /* Don't export sysctls to unprivileged users */ |
810 | if (net->user_ns != &init_user_ns) | 809 | if (net->user_ns != &init_user_ns) |
811 | pn->ctl_table[0].procname = NULL; | 810 | pn->ctl_table[0].procname = NULL; |
812 | #endif | 811 | #endif |
813 | return 0; | 812 | return 0; |
814 | } | 813 | } |
815 | 814 | ||
816 | static int dccp_init_net(struct net *net, u_int16_t proto) | 815 | static int dccp_init_net(struct net *net, u_int16_t proto) |
817 | { | 816 | { |
818 | struct nf_dccp_net *dn = dccp_pernet(net); | 817 | struct nf_dccp_net *dn = dccp_pernet(net); |
819 | struct nf_proto_net *pn = &dn->pn; | 818 | struct nf_proto_net *pn = &dn->pn; |
820 | 819 | ||
821 | if (!pn->users) { | 820 | if (!pn->users) { |
822 | /* default values */ | 821 | /* default values */ |
823 | dn->dccp_loose = 1; | 822 | dn->dccp_loose = 1; |
824 | dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; | 823 | dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; |
825 | dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; | 824 | dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; |
826 | dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; | 825 | dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; |
827 | dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; | 826 | dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; |
828 | dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; | 827 | dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; |
829 | dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; | 828 | dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; |
830 | dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; | 829 | dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; |
831 | } | 830 | } |
832 | 831 | ||
833 | return dccp_kmemdup_sysctl_table(net, pn, dn); | 832 | return dccp_kmemdup_sysctl_table(net, pn, dn); |
834 | } | 833 | } |
835 | 834 | ||
836 | static struct nf_proto_net *dccp_get_net_proto(struct net *net) | 835 | static struct nf_proto_net *dccp_get_net_proto(struct net *net) |
837 | { | 836 | { |
838 | return &net->ct.nf_ct_proto.dccp.pn; | 837 | return &net->ct.nf_ct_proto.dccp.pn; |
839 | } | 838 | } |
840 | 839 | ||
841 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { | 840 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { |
842 | .l3proto = AF_INET, | 841 | .l3proto = AF_INET, |
843 | .l4proto = IPPROTO_DCCP, | 842 | .l4proto = IPPROTO_DCCP, |
844 | .new = dccp_new, | 843 | .new = dccp_new, |
845 | .packet = dccp_packet, | 844 | .packet = dccp_packet, |
846 | .get_timeouts = dccp_get_timeouts, | ||
847 | .error = dccp_error, | 845 | .error = dccp_error, |
848 | .can_early_drop = dccp_can_early_drop, | 846 | .can_early_drop = dccp_can_early_drop, |
849 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 847 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
850 | .print_conntrack = dccp_print_conntrack, | 848 | .print_conntrack = dccp_print_conntrack, |
851 | #endif | 849 | #endif |
852 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 850 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
853 | .nlattr_size = DCCP_NLATTR_SIZE, | 851 | .nlattr_size = DCCP_NLATTR_SIZE, |
854 | .to_nlattr = dccp_to_nlattr, | 852 | .to_nlattr = dccp_to_nlattr, |
855 | .from_nlattr = nlattr_to_dccp, | 853 | .from_nlattr = nlattr_to_dccp, |
856 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 854 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
857 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 855 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
858 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 856 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
859 | .nla_policy = nf_ct_port_nla_policy, | 857 | .nla_policy = nf_ct_port_nla_policy, |
860 | #endif | 858 | #endif |
861 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 859 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
862 | .ctnl_timeout = { | 860 | .ctnl_timeout = { |
863 | .nlattr_to_obj = dccp_timeout_nlattr_to_obj, | 861 | .nlattr_to_obj = dccp_timeout_nlattr_to_obj, |
864 | .obj_to_nlattr = dccp_timeout_obj_to_nlattr, | 862 | .obj_to_nlattr = dccp_timeout_obj_to_nlattr, |
865 | .nlattr_max = CTA_TIMEOUT_DCCP_MAX, | 863 | .nlattr_max = CTA_TIMEOUT_DCCP_MAX, |
866 | .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, | 864 | .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, |
867 | .nla_policy = dccp_timeout_nla_policy, | 865 | .nla_policy = dccp_timeout_nla_policy, |
868 | }, | 866 | }, |
869 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 867 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
870 | .init_net = dccp_init_net, | 868 | .init_net = dccp_init_net, |
871 | .get_net_proto = dccp_get_net_proto, | 869 | .get_net_proto = dccp_get_net_proto, |
872 | }; | 870 | }; |
873 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); | 871 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); |
874 | 872 | ||
875 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { | 873 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { |
876 | .l3proto = AF_INET6, | 874 | .l3proto = AF_INET6, |
877 | .l4proto = IPPROTO_DCCP, | 875 | .l4proto = IPPROTO_DCCP, |
878 | .new = dccp_new, | 876 | .new = dccp_new, |
879 | .packet = dccp_packet, | 877 | .packet = dccp_packet, |
880 | .get_timeouts = dccp_get_timeouts, | ||
881 | .error = dccp_error, | 878 | .error = dccp_error, |
882 | .can_early_drop = dccp_can_early_drop, | 879 | .can_early_drop = dccp_can_early_drop, |
883 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 880 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
884 | .print_conntrack = dccp_print_conntrack, | 881 | .print_conntrack = dccp_print_conntrack, |
885 | #endif | 882 | #endif |
886 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 883 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
887 | .nlattr_size = DCCP_NLATTR_SIZE, | 884 | .nlattr_size = DCCP_NLATTR_SIZE, |
888 | .to_nlattr = dccp_to_nlattr, | 885 | .to_nlattr = dccp_to_nlattr, |
889 | .from_nlattr = nlattr_to_dccp, | 886 | .from_nlattr = nlattr_to_dccp, |
890 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 887 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
891 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 888 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
892 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 889 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
893 | .nla_policy = nf_ct_port_nla_policy, | 890 | .nla_policy = nf_ct_port_nla_policy, |
894 | #endif | 891 | #endif |
895 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 892 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
896 | .ctnl_timeout = { | 893 | .ctnl_timeout = { |
897 | .nlattr_to_obj = dccp_timeout_nlattr_to_obj, | 894 | .nlattr_to_obj = dccp_timeout_nlattr_to_obj, |
898 | .obj_to_nlattr = dccp_timeout_obj_to_nlattr, | 895 | .obj_to_nlattr = dccp_timeout_obj_to_nlattr, |
899 | .nlattr_max = CTA_TIMEOUT_DCCP_MAX, | 896 | .nlattr_max = CTA_TIMEOUT_DCCP_MAX, |
900 | .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, | 897 | .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, |
901 | .nla_policy = dccp_timeout_nla_policy, | 898 | .nla_policy = dccp_timeout_nla_policy, |
902 | }, | 899 | }, |
903 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 900 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
net/netfilter/nf_conntrack_proto_generic.c
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License version 2 as | 5 | * it under the terms of the GNU General Public License version 2 as |
6 | * published by the Free Software Foundation. | 6 | * published by the Free Software Foundation. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/jiffies.h> | 10 | #include <linux/jiffies.h> |
11 | #include <linux/timer.h> | 11 | #include <linux/timer.h> |
12 | #include <linux/netfilter.h> | 12 | #include <linux/netfilter.h> |
13 | #include <net/netfilter/nf_conntrack_l4proto.h> | 13 | #include <net/netfilter/nf_conntrack_l4proto.h> |
14 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
14 | 15 | ||
15 | static const unsigned int nf_ct_generic_timeout = 600*HZ; | 16 | static const unsigned int nf_ct_generic_timeout = 600*HZ; |
16 | 17 | ||
17 | static bool nf_generic_should_process(u8 proto) | 18 | static bool nf_generic_should_process(u8 proto) |
18 | { | 19 | { |
19 | switch (proto) { | 20 | switch (proto) { |
20 | #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE | 21 | #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE |
21 | case IPPROTO_GRE: | 22 | case IPPROTO_GRE: |
22 | return false; | 23 | return false; |
23 | #endif | 24 | #endif |
24 | default: | 25 | default: |
25 | return true; | 26 | return true; |
26 | } | 27 | } |
27 | } | 28 | } |
28 | 29 | ||
29 | static inline struct nf_generic_net *generic_pernet(struct net *net) | 30 | static inline struct nf_generic_net *generic_pernet(struct net *net) |
30 | { | 31 | { |
31 | return &net->ct.nf_ct_proto.generic; | 32 | return &net->ct.nf_ct_proto.generic; |
32 | } | 33 | } |
33 | 34 | ||
34 | static bool generic_pkt_to_tuple(const struct sk_buff *skb, | 35 | static bool generic_pkt_to_tuple(const struct sk_buff *skb, |
35 | unsigned int dataoff, | 36 | unsigned int dataoff, |
36 | struct net *net, struct nf_conntrack_tuple *tuple) | 37 | struct net *net, struct nf_conntrack_tuple *tuple) |
37 | { | 38 | { |
38 | tuple->src.u.all = 0; | 39 | tuple->src.u.all = 0; |
39 | tuple->dst.u.all = 0; | 40 | tuple->dst.u.all = 0; |
40 | 41 | ||
41 | return true; | 42 | return true; |
42 | } | 43 | } |
43 | 44 | ||
44 | static unsigned int *generic_get_timeouts(struct net *net) | ||
45 | { | ||
46 | return &(generic_pernet(net)->timeout); | ||
47 | } | ||
48 | |||
49 | /* Returns verdict for packet, or -1 for invalid. */ | 45 | /* Returns verdict for packet, or -1 for invalid. */ |
50 | static int generic_packet(struct nf_conn *ct, | 46 | static int generic_packet(struct nf_conn *ct, |
51 | const struct sk_buff *skb, | 47 | const struct sk_buff *skb, |
52 | unsigned int dataoff, | 48 | unsigned int dataoff, |
53 | enum ip_conntrack_info ctinfo, | 49 | enum ip_conntrack_info ctinfo) |
54 | unsigned int *timeout) | ||
55 | { | 50 | { |
51 | const unsigned int *timeout = nf_ct_timeout_lookup(ct); | ||
52 | |||
53 | if (!timeout) | ||
54 | timeout = &generic_pernet(nf_ct_net(ct))->timeout; | ||
55 | |||
56 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); | 56 | nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); |
57 | return NF_ACCEPT; | 57 | return NF_ACCEPT; |
58 | } | 58 | } |
59 | 59 | ||
60 | /* Called when a new connection for this protocol found. */ | 60 | /* Called when a new connection for this protocol found. */ |
61 | static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, | 61 | static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, |
62 | unsigned int dataoff, unsigned int *timeouts) | 62 | unsigned int dataoff) |
63 | { | 63 | { |
64 | bool ret; | 64 | bool ret; |
65 | 65 | ||
66 | ret = nf_generic_should_process(nf_ct_protonum(ct)); | 66 | ret = nf_generic_should_process(nf_ct_protonum(ct)); |
67 | if (!ret) | 67 | if (!ret) |
68 | pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", | 68 | pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", |
69 | nf_ct_protonum(ct)); | 69 | nf_ct_protonum(ct)); |
70 | return ret; | 70 | return ret; |
71 | } | 71 | } |
72 | 72 | ||
73 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 73 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
74 | 74 | ||
75 | #include <linux/netfilter/nfnetlink.h> | 75 | #include <linux/netfilter/nfnetlink.h> |
76 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 76 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
77 | 77 | ||
78 | static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], | 78 | static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], |
79 | struct net *net, void *data) | 79 | struct net *net, void *data) |
80 | { | 80 | { |
81 | unsigned int *timeout = data; | ||
82 | struct nf_generic_net *gn = generic_pernet(net); | 81 | struct nf_generic_net *gn = generic_pernet(net); |
82 | unsigned int *timeout = data; | ||
83 | 83 | ||
84 | if (!timeout) | ||
85 | timeout = &gn->timeout; | ||
86 | |||
84 | if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) | 87 | if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) |
85 | *timeout = | 88 | *timeout = |
86 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; | 89 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; |
87 | else { | 90 | else { |
88 | /* Set default generic timeout. */ | 91 | /* Set default generic timeout. */ |
89 | *timeout = gn->timeout; | 92 | *timeout = gn->timeout; |
90 | } | 93 | } |
91 | 94 | ||
92 | return 0; | 95 | return 0; |
93 | } | 96 | } |
94 | 97 | ||
95 | static int | 98 | static int |
96 | generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 99 | generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
97 | { | 100 | { |
98 | const unsigned int *timeout = data; | 101 | const unsigned int *timeout = data; |
99 | 102 | ||
100 | if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) | 103 | if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) |
101 | goto nla_put_failure; | 104 | goto nla_put_failure; |
102 | 105 | ||
103 | return 0; | 106 | return 0; |
104 | 107 | ||
105 | nla_put_failure: | 108 | nla_put_failure: |
106 | return -ENOSPC; | 109 | return -ENOSPC; |
107 | } | 110 | } |
108 | 111 | ||
109 | static const struct nla_policy | 112 | static const struct nla_policy |
110 | generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { | 113 | generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { |
111 | [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, | 114 | [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, |
112 | }; | 115 | }; |
113 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 116 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
114 | 117 | ||
115 | #ifdef CONFIG_SYSCTL | 118 | #ifdef CONFIG_SYSCTL |
116 | static struct ctl_table generic_sysctl_table[] = { | 119 | static struct ctl_table generic_sysctl_table[] = { |
117 | { | 120 | { |
118 | .procname = "nf_conntrack_generic_timeout", | 121 | .procname = "nf_conntrack_generic_timeout", |
119 | .maxlen = sizeof(unsigned int), | 122 | .maxlen = sizeof(unsigned int), |
120 | .mode = 0644, | 123 | .mode = 0644, |
121 | .proc_handler = proc_dointvec_jiffies, | 124 | .proc_handler = proc_dointvec_jiffies, |
122 | }, | 125 | }, |
123 | { } | 126 | { } |
124 | }; | 127 | }; |
125 | #endif /* CONFIG_SYSCTL */ | 128 | #endif /* CONFIG_SYSCTL */ |
126 | 129 | ||
127 | static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, | 130 | static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, |
128 | struct nf_generic_net *gn) | 131 | struct nf_generic_net *gn) |
129 | { | 132 | { |
130 | #ifdef CONFIG_SYSCTL | 133 | #ifdef CONFIG_SYSCTL |
131 | pn->ctl_table = kmemdup(generic_sysctl_table, | 134 | pn->ctl_table = kmemdup(generic_sysctl_table, |
132 | sizeof(generic_sysctl_table), | 135 | sizeof(generic_sysctl_table), |
133 | GFP_KERNEL); | 136 | GFP_KERNEL); |
134 | if (!pn->ctl_table) | 137 | if (!pn->ctl_table) |
135 | return -ENOMEM; | 138 | return -ENOMEM; |
136 | 139 | ||
137 | pn->ctl_table[0].data = &gn->timeout; | 140 | pn->ctl_table[0].data = &gn->timeout; |
138 | #endif | 141 | #endif |
139 | return 0; | 142 | return 0; |
140 | } | 143 | } |
141 | 144 | ||
142 | static int generic_init_net(struct net *net, u_int16_t proto) | 145 | static int generic_init_net(struct net *net, u_int16_t proto) |
143 | { | 146 | { |
144 | struct nf_generic_net *gn = generic_pernet(net); | 147 | struct nf_generic_net *gn = generic_pernet(net); |
145 | struct nf_proto_net *pn = &gn->pn; | 148 | struct nf_proto_net *pn = &gn->pn; |
146 | 149 | ||
147 | gn->timeout = nf_ct_generic_timeout; | 150 | gn->timeout = nf_ct_generic_timeout; |
148 | 151 | ||
149 | return generic_kmemdup_sysctl_table(pn, gn); | 152 | return generic_kmemdup_sysctl_table(pn, gn); |
150 | } | 153 | } |
151 | 154 | ||
152 | static struct nf_proto_net *generic_get_net_proto(struct net *net) | 155 | static struct nf_proto_net *generic_get_net_proto(struct net *net) |
153 | { | 156 | { |
154 | return &net->ct.nf_ct_proto.generic.pn; | 157 | return &net->ct.nf_ct_proto.generic.pn; |
155 | } | 158 | } |
156 | 159 | ||
157 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = | 160 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = |
158 | { | 161 | { |
159 | .l3proto = PF_UNSPEC, | 162 | .l3proto = PF_UNSPEC, |
160 | .l4proto = 255, | 163 | .l4proto = 255, |
161 | .pkt_to_tuple = generic_pkt_to_tuple, | 164 | .pkt_to_tuple = generic_pkt_to_tuple, |
162 | .packet = generic_packet, | 165 | .packet = generic_packet, |
163 | .get_timeouts = generic_get_timeouts, | ||
164 | .new = generic_new, | 166 | .new = generic_new, |
165 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 167 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
166 | .ctnl_timeout = { | 168 | .ctnl_timeout = { |
167 | .nlattr_to_obj = generic_timeout_nlattr_to_obj, | 169 | .nlattr_to_obj = generic_timeout_nlattr_to_obj, |
168 | .obj_to_nlattr = generic_timeout_obj_to_nlattr, | 170 | .obj_to_nlattr = generic_timeout_obj_to_nlattr, |
169 | .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, | 171 | .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, |
net/netfilter/nf_conntrack_proto_gre.c
1 | /* | 1 | /* |
2 | * ip_conntrack_proto_gre.c - Version 3.0 | 2 | * ip_conntrack_proto_gre.c - Version 3.0 |
3 | * | 3 | * |
4 | * Connection tracking protocol helper module for GRE. | 4 | * Connection tracking protocol helper module for GRE. |
5 | * | 5 | * |
6 | * GRE is a generic encapsulation protocol, which is generally not very | 6 | * GRE is a generic encapsulation protocol, which is generally not very |
7 | * suited for NAT, as it has no protocol-specific part as port numbers. | 7 | * suited for NAT, as it has no protocol-specific part as port numbers. |
8 | * | 8 | * |
9 | * It has an optional key field, which may help us distinguishing two | 9 | * It has an optional key field, which may help us distinguishing two |
10 | * connections between the same two hosts. | 10 | * connections between the same two hosts. |
11 | * | 11 | * |
12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 | 12 | * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 |
13 | * | 13 | * |
14 | * PPTP is built on top of a modified version of GRE, and has a mandatory | 14 | * PPTP is built on top of a modified version of GRE, and has a mandatory |
15 | * field called "CallID", which serves us for the same purpose as the key | 15 | * field called "CallID", which serves us for the same purpose as the key |
16 | * field in plain GRE. | 16 | * field in plain GRE. |
17 | * | 17 | * |
18 | * Documentation about PPTP can be found in RFC 2637 | 18 | * Documentation about PPTP can be found in RFC 2637 |
19 | * | 19 | * |
20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> | 20 | * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> |
21 | * | 21 | * |
22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) | 22 | * Development of this code funded by Astaro AG (http://www.astaro.com/) |
23 | * | 23 | * |
24 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> | 24 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/timer.h> | 29 | #include <linux/timer.h> |
30 | #include <linux/list.h> | 30 | #include <linux/list.h> |
31 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
32 | #include <linux/in.h> | 32 | #include <linux/in.h> |
33 | #include <linux/netdevice.h> | 33 | #include <linux/netdevice.h> |
34 | #include <linux/skbuff.h> | 34 | #include <linux/skbuff.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <net/dst.h> | 36 | #include <net/dst.h> |
37 | #include <net/net_namespace.h> | 37 | #include <net/net_namespace.h> |
38 | #include <net/netns/generic.h> | 38 | #include <net/netns/generic.h> |
39 | #include <net/netfilter/nf_conntrack_l4proto.h> | 39 | #include <net/netfilter/nf_conntrack_l4proto.h> |
40 | #include <net/netfilter/nf_conntrack_helper.h> | 40 | #include <net/netfilter/nf_conntrack_helper.h> |
41 | #include <net/netfilter/nf_conntrack_core.h> | 41 | #include <net/netfilter/nf_conntrack_core.h> |
42 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
42 | #include <linux/netfilter/nf_conntrack_proto_gre.h> | 43 | #include <linux/netfilter/nf_conntrack_proto_gre.h> |
43 | #include <linux/netfilter/nf_conntrack_pptp.h> | 44 | #include <linux/netfilter/nf_conntrack_pptp.h> |
44 | 45 | ||
45 | enum grep_conntrack { | 46 | enum grep_conntrack { |
46 | GRE_CT_UNREPLIED, | 47 | GRE_CT_UNREPLIED, |
47 | GRE_CT_REPLIED, | 48 | GRE_CT_REPLIED, |
48 | GRE_CT_MAX | 49 | GRE_CT_MAX |
49 | }; | 50 | }; |
50 | 51 | ||
51 | static const unsigned int gre_timeouts[GRE_CT_MAX] = { | 52 | static const unsigned int gre_timeouts[GRE_CT_MAX] = { |
52 | [GRE_CT_UNREPLIED] = 30*HZ, | 53 | [GRE_CT_UNREPLIED] = 30*HZ, |
53 | [GRE_CT_REPLIED] = 180*HZ, | 54 | [GRE_CT_REPLIED] = 180*HZ, |
54 | }; | 55 | }; |
55 | 56 | ||
56 | static unsigned int proto_gre_net_id __read_mostly; | 57 | static unsigned int proto_gre_net_id __read_mostly; |
57 | struct netns_proto_gre { | 58 | struct netns_proto_gre { |
58 | struct nf_proto_net nf; | 59 | struct nf_proto_net nf; |
59 | rwlock_t keymap_lock; | 60 | rwlock_t keymap_lock; |
60 | struct list_head keymap_list; | 61 | struct list_head keymap_list; |
61 | unsigned int gre_timeouts[GRE_CT_MAX]; | 62 | unsigned int gre_timeouts[GRE_CT_MAX]; |
62 | }; | 63 | }; |
63 | 64 | ||
64 | static inline struct netns_proto_gre *gre_pernet(struct net *net) | 65 | static inline struct netns_proto_gre *gre_pernet(struct net *net) |
65 | { | 66 | { |
66 | return net_generic(net, proto_gre_net_id); | 67 | return net_generic(net, proto_gre_net_id); |
67 | } | 68 | } |
68 | 69 | ||
69 | static void nf_ct_gre_keymap_flush(struct net *net) | 70 | static void nf_ct_gre_keymap_flush(struct net *net) |
70 | { | 71 | { |
71 | struct netns_proto_gre *net_gre = gre_pernet(net); | 72 | struct netns_proto_gre *net_gre = gre_pernet(net); |
72 | struct nf_ct_gre_keymap *km, *tmp; | 73 | struct nf_ct_gre_keymap *km, *tmp; |
73 | 74 | ||
74 | write_lock_bh(&net_gre->keymap_lock); | 75 | write_lock_bh(&net_gre->keymap_lock); |
75 | list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { | 76 | list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { |
76 | list_del(&km->list); | 77 | list_del(&km->list); |
77 | kfree(km); | 78 | kfree(km); |
78 | } | 79 | } |
79 | write_unlock_bh(&net_gre->keymap_lock); | 80 | write_unlock_bh(&net_gre->keymap_lock); |
80 | } | 81 | } |
81 | 82 | ||
82 | static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, | 83 | static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, |
83 | const struct nf_conntrack_tuple *t) | 84 | const struct nf_conntrack_tuple *t) |
84 | { | 85 | { |
85 | return km->tuple.src.l3num == t->src.l3num && | 86 | return km->tuple.src.l3num == t->src.l3num && |
86 | !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) && | 87 | !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) && |
87 | !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) && | 88 | !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) && |
88 | km->tuple.dst.protonum == t->dst.protonum && | 89 | km->tuple.dst.protonum == t->dst.protonum && |
89 | km->tuple.dst.u.all == t->dst.u.all; | 90 | km->tuple.dst.u.all == t->dst.u.all; |
90 | } | 91 | } |
91 | 92 | ||
92 | /* look up the source key for a given tuple */ | 93 | /* look up the source key for a given tuple */ |
93 | static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) | 94 | static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) |
94 | { | 95 | { |
95 | struct netns_proto_gre *net_gre = gre_pernet(net); | 96 | struct netns_proto_gre *net_gre = gre_pernet(net); |
96 | struct nf_ct_gre_keymap *km; | 97 | struct nf_ct_gre_keymap *km; |
97 | __be16 key = 0; | 98 | __be16 key = 0; |
98 | 99 | ||
99 | read_lock_bh(&net_gre->keymap_lock); | 100 | read_lock_bh(&net_gre->keymap_lock); |
100 | list_for_each_entry(km, &net_gre->keymap_list, list) { | 101 | list_for_each_entry(km, &net_gre->keymap_list, list) { |
101 | if (gre_key_cmpfn(km, t)) { | 102 | if (gre_key_cmpfn(km, t)) { |
102 | key = km->tuple.src.u.gre.key; | 103 | key = km->tuple.src.u.gre.key; |
103 | break; | 104 | break; |
104 | } | 105 | } |
105 | } | 106 | } |
106 | read_unlock_bh(&net_gre->keymap_lock); | 107 | read_unlock_bh(&net_gre->keymap_lock); |
107 | 108 | ||
108 | pr_debug("lookup src key 0x%x for ", key); | 109 | pr_debug("lookup src key 0x%x for ", key); |
109 | nf_ct_dump_tuple(t); | 110 | nf_ct_dump_tuple(t); |
110 | 111 | ||
111 | return key; | 112 | return key; |
112 | } | 113 | } |
113 | 114 | ||
114 | /* add a single keymap entry, associate with specified master ct */ | 115 | /* add a single keymap entry, associate with specified master ct */ |
115 | int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, | 116 | int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, |
116 | struct nf_conntrack_tuple *t) | 117 | struct nf_conntrack_tuple *t) |
117 | { | 118 | { |
118 | struct net *net = nf_ct_net(ct); | 119 | struct net *net = nf_ct_net(ct); |
119 | struct netns_proto_gre *net_gre = gre_pernet(net); | 120 | struct netns_proto_gre *net_gre = gre_pernet(net); |
120 | struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); | 121 | struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); |
121 | struct nf_ct_gre_keymap **kmp, *km; | 122 | struct nf_ct_gre_keymap **kmp, *km; |
122 | 123 | ||
123 | kmp = &ct_pptp_info->keymap[dir]; | 124 | kmp = &ct_pptp_info->keymap[dir]; |
124 | if (*kmp) { | 125 | if (*kmp) { |
125 | /* check whether it's a retransmission */ | 126 | /* check whether it's a retransmission */ |
126 | read_lock_bh(&net_gre->keymap_lock); | 127 | read_lock_bh(&net_gre->keymap_lock); |
127 | list_for_each_entry(km, &net_gre->keymap_list, list) { | 128 | list_for_each_entry(km, &net_gre->keymap_list, list) { |
128 | if (gre_key_cmpfn(km, t) && km == *kmp) { | 129 | if (gre_key_cmpfn(km, t) && km == *kmp) { |
129 | read_unlock_bh(&net_gre->keymap_lock); | 130 | read_unlock_bh(&net_gre->keymap_lock); |
130 | return 0; | 131 | return 0; |
131 | } | 132 | } |
132 | } | 133 | } |
133 | read_unlock_bh(&net_gre->keymap_lock); | 134 | read_unlock_bh(&net_gre->keymap_lock); |
134 | pr_debug("trying to override keymap_%s for ct %p\n", | 135 | pr_debug("trying to override keymap_%s for ct %p\n", |
135 | dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); | 136 | dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); |
136 | return -EEXIST; | 137 | return -EEXIST; |
137 | } | 138 | } |
138 | 139 | ||
139 | km = kmalloc(sizeof(*km), GFP_ATOMIC); | 140 | km = kmalloc(sizeof(*km), GFP_ATOMIC); |
140 | if (!km) | 141 | if (!km) |
141 | return -ENOMEM; | 142 | return -ENOMEM; |
142 | memcpy(&km->tuple, t, sizeof(*t)); | 143 | memcpy(&km->tuple, t, sizeof(*t)); |
143 | *kmp = km; | 144 | *kmp = km; |
144 | 145 | ||
145 | pr_debug("adding new entry %p: ", km); | 146 | pr_debug("adding new entry %p: ", km); |
146 | nf_ct_dump_tuple(&km->tuple); | 147 | nf_ct_dump_tuple(&km->tuple); |
147 | 148 | ||
148 | write_lock_bh(&net_gre->keymap_lock); | 149 | write_lock_bh(&net_gre->keymap_lock); |
149 | list_add_tail(&km->list, &net_gre->keymap_list); | 150 | list_add_tail(&km->list, &net_gre->keymap_list); |
150 | write_unlock_bh(&net_gre->keymap_lock); | 151 | write_unlock_bh(&net_gre->keymap_lock); |
151 | 152 | ||
152 | return 0; | 153 | return 0; |
153 | } | 154 | } |
154 | EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); | 155 | EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); |
155 | 156 | ||
156 | /* destroy the keymap entries associated with specified master ct */ | 157 | /* destroy the keymap entries associated with specified master ct */ |
157 | void nf_ct_gre_keymap_destroy(struct nf_conn *ct) | 158 | void nf_ct_gre_keymap_destroy(struct nf_conn *ct) |
158 | { | 159 | { |
159 | struct net *net = nf_ct_net(ct); | 160 | struct net *net = nf_ct_net(ct); |
160 | struct netns_proto_gre *net_gre = gre_pernet(net); | 161 | struct netns_proto_gre *net_gre = gre_pernet(net); |
161 | struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); | 162 | struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); |
162 | enum ip_conntrack_dir dir; | 163 | enum ip_conntrack_dir dir; |
163 | 164 | ||
164 | pr_debug("entering for ct %p\n", ct); | 165 | pr_debug("entering for ct %p\n", ct); |
165 | 166 | ||
166 | write_lock_bh(&net_gre->keymap_lock); | 167 | write_lock_bh(&net_gre->keymap_lock); |
167 | for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { | 168 | for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { |
168 | if (ct_pptp_info->keymap[dir]) { | 169 | if (ct_pptp_info->keymap[dir]) { |
169 | pr_debug("removing %p from list\n", | 170 | pr_debug("removing %p from list\n", |
170 | ct_pptp_info->keymap[dir]); | 171 | ct_pptp_info->keymap[dir]); |
171 | list_del(&ct_pptp_info->keymap[dir]->list); | 172 | list_del(&ct_pptp_info->keymap[dir]->list); |
172 | kfree(ct_pptp_info->keymap[dir]); | 173 | kfree(ct_pptp_info->keymap[dir]); |
173 | ct_pptp_info->keymap[dir] = NULL; | 174 | ct_pptp_info->keymap[dir] = NULL; |
174 | } | 175 | } |
175 | } | 176 | } |
176 | write_unlock_bh(&net_gre->keymap_lock); | 177 | write_unlock_bh(&net_gre->keymap_lock); |
177 | } | 178 | } |
178 | EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); | 179 | EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); |
179 | 180 | ||
180 | /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ | 181 | /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ |
181 | 182 | ||
182 | /* gre hdr info to tuple */ | 183 | /* gre hdr info to tuple */ |
183 | static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, | 184 | static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, |
184 | struct net *net, struct nf_conntrack_tuple *tuple) | 185 | struct net *net, struct nf_conntrack_tuple *tuple) |
185 | { | 186 | { |
186 | const struct pptp_gre_header *pgrehdr; | 187 | const struct pptp_gre_header *pgrehdr; |
187 | struct pptp_gre_header _pgrehdr; | 188 | struct pptp_gre_header _pgrehdr; |
188 | __be16 srckey; | 189 | __be16 srckey; |
189 | const struct gre_base_hdr *grehdr; | 190 | const struct gre_base_hdr *grehdr; |
190 | struct gre_base_hdr _grehdr; | 191 | struct gre_base_hdr _grehdr; |
191 | 192 | ||
192 | /* first only delinearize old RFC1701 GRE header */ | 193 | /* first only delinearize old RFC1701 GRE header */ |
193 | grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); | 194 | grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); |
194 | if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { | 195 | if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { |
195 | /* try to behave like "nf_conntrack_proto_generic" */ | 196 | /* try to behave like "nf_conntrack_proto_generic" */ |
196 | tuple->src.u.all = 0; | 197 | tuple->src.u.all = 0; |
197 | tuple->dst.u.all = 0; | 198 | tuple->dst.u.all = 0; |
198 | return true; | 199 | return true; |
199 | } | 200 | } |
200 | 201 | ||
201 | /* PPTP header is variable length, only need up to the call_id field */ | 202 | /* PPTP header is variable length, only need up to the call_id field */ |
202 | pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); | 203 | pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); |
203 | if (!pgrehdr) | 204 | if (!pgrehdr) |
204 | return true; | 205 | return true; |
205 | 206 | ||
206 | if (grehdr->protocol != GRE_PROTO_PPP) { | 207 | if (grehdr->protocol != GRE_PROTO_PPP) { |
207 | pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); | 208 | pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); |
208 | return false; | 209 | return false; |
209 | } | 210 | } |
210 | 211 | ||
211 | tuple->dst.u.gre.key = pgrehdr->call_id; | 212 | tuple->dst.u.gre.key = pgrehdr->call_id; |
212 | srckey = gre_keymap_lookup(net, tuple); | 213 | srckey = gre_keymap_lookup(net, tuple); |
213 | tuple->src.u.gre.key = srckey; | 214 | tuple->src.u.gre.key = srckey; |
214 | 215 | ||
215 | return true; | 216 | return true; |
216 | } | 217 | } |
217 | 218 | ||
218 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 219 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
219 | /* print private data for conntrack */ | 220 | /* print private data for conntrack */ |
220 | static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) | 221 | static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) |
221 | { | 222 | { |
222 | seq_printf(s, "timeout=%u, stream_timeout=%u ", | 223 | seq_printf(s, "timeout=%u, stream_timeout=%u ", |
223 | (ct->proto.gre.timeout / HZ), | 224 | (ct->proto.gre.timeout / HZ), |
224 | (ct->proto.gre.stream_timeout / HZ)); | 225 | (ct->proto.gre.stream_timeout / HZ)); |
225 | } | 226 | } |
226 | #endif | 227 | #endif |
227 | 228 | ||
228 | static unsigned int *gre_get_timeouts(struct net *net) | 229 | static unsigned int *gre_get_timeouts(struct net *net) |
229 | { | 230 | { |
230 | return gre_pernet(net)->gre_timeouts; | 231 | return gre_pernet(net)->gre_timeouts; |
231 | } | 232 | } |
232 | 233 | ||
233 | /* Returns verdict for packet, and may modify conntrack */ | 234 | /* Returns verdict for packet, and may modify conntrack */ |
234 | static int gre_packet(struct nf_conn *ct, | 235 | static int gre_packet(struct nf_conn *ct, |
235 | const struct sk_buff *skb, | 236 | const struct sk_buff *skb, |
236 | unsigned int dataoff, | 237 | unsigned int dataoff, |
237 | enum ip_conntrack_info ctinfo, | 238 | enum ip_conntrack_info ctinfo) |
238 | unsigned int *timeouts) | ||
239 | { | 239 | { |
240 | /* If we've seen traffic both ways, this is a GRE connection. | 240 | /* If we've seen traffic both ways, this is a GRE connection. |
241 | * Extend timeout. */ | 241 | * Extend timeout. */ |
242 | if (ct->status & IPS_SEEN_REPLY) { | 242 | if (ct->status & IPS_SEEN_REPLY) { |
243 | nf_ct_refresh_acct(ct, ctinfo, skb, | 243 | nf_ct_refresh_acct(ct, ctinfo, skb, |
244 | ct->proto.gre.stream_timeout); | 244 | ct->proto.gre.stream_timeout); |
245 | /* Also, more likely to be important, and not a probe. */ | 245 | /* Also, more likely to be important, and not a probe. */ |
246 | if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) | 246 | if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) |
247 | nf_conntrack_event_cache(IPCT_ASSURED, ct); | 247 | nf_conntrack_event_cache(IPCT_ASSURED, ct); |
248 | } else | 248 | } else |
249 | nf_ct_refresh_acct(ct, ctinfo, skb, | 249 | nf_ct_refresh_acct(ct, ctinfo, skb, |
250 | ct->proto.gre.timeout); | 250 | ct->proto.gre.timeout); |
251 | 251 | ||
252 | return NF_ACCEPT; | 252 | return NF_ACCEPT; |
253 | } | 253 | } |
254 | 254 | ||
255 | /* Called when a new connection for this protocol found. */ | 255 | /* Called when a new connection for this protocol found. */ |
256 | static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, | 256 | static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, |
257 | unsigned int dataoff, unsigned int *timeouts) | 257 | unsigned int dataoff) |
258 | { | 258 | { |
259 | unsigned int *timeouts = nf_ct_timeout_lookup(ct); | ||
260 | |||
261 | if (!timeouts) | ||
262 | timeouts = gre_get_timeouts(nf_ct_net(ct)); | ||
263 | |||
259 | pr_debug(": "); | 264 | pr_debug(": "); |
260 | nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 265 | nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
261 | 266 | ||
262 | /* initialize to sane value. Ideally a conntrack helper | 267 | /* initialize to sane value. Ideally a conntrack helper |
263 | * (e.g. in case of pptp) is increasing them */ | 268 | * (e.g. in case of pptp) is increasing them */ |
264 | ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; | 269 | ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; |
265 | ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; | 270 | ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; |
266 | 271 | ||
267 | return true; | 272 | return true; |
268 | } | 273 | } |
269 | 274 | ||
270 | /* Called when a conntrack entry has already been removed from the hashes | 275 | /* Called when a conntrack entry has already been removed from the hashes |
271 | * and is about to be deleted from memory */ | 276 | * and is about to be deleted from memory */ |
272 | static void gre_destroy(struct nf_conn *ct) | 277 | static void gre_destroy(struct nf_conn *ct) |
273 | { | 278 | { |
274 | struct nf_conn *master = ct->master; | 279 | struct nf_conn *master = ct->master; |
275 | pr_debug(" entering\n"); | 280 | pr_debug(" entering\n"); |
276 | 281 | ||
277 | if (!master) | 282 | if (!master) |
278 | pr_debug("no master !?!\n"); | 283 | pr_debug("no master !?!\n"); |
279 | else | 284 | else |
280 | nf_ct_gre_keymap_destroy(master); | 285 | nf_ct_gre_keymap_destroy(master); |
281 | } | 286 | } |
282 | 287 | ||
283 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 288 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
284 | 289 | ||
285 | #include <linux/netfilter/nfnetlink.h> | 290 | #include <linux/netfilter/nfnetlink.h> |
286 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 291 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
287 | 292 | ||
288 | static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], | 293 | static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], |
289 | struct net *net, void *data) | 294 | struct net *net, void *data) |
290 | { | 295 | { |
291 | unsigned int *timeouts = data; | 296 | unsigned int *timeouts = data; |
292 | struct netns_proto_gre *net_gre = gre_pernet(net); | 297 | struct netns_proto_gre *net_gre = gre_pernet(net); |
293 | 298 | ||
299 | if (!timeouts) | ||
300 | timeouts = gre_get_timeouts(net); | ||
294 | /* set default timeouts for GRE. */ | 301 | /* set default timeouts for GRE. */ |
295 | timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; | 302 | timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; |
296 | timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; | 303 | timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; |
297 | 304 | ||
298 | if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { | 305 | if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { |
299 | timeouts[GRE_CT_UNREPLIED] = | 306 | timeouts[GRE_CT_UNREPLIED] = |
300 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; | 307 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; |
301 | } | 308 | } |
302 | if (tb[CTA_TIMEOUT_GRE_REPLIED]) { | 309 | if (tb[CTA_TIMEOUT_GRE_REPLIED]) { |
303 | timeouts[GRE_CT_REPLIED] = | 310 | timeouts[GRE_CT_REPLIED] = |
304 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; | 311 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; |
305 | } | 312 | } |
306 | return 0; | 313 | return 0; |
307 | } | 314 | } |
308 | 315 | ||
309 | static int | 316 | static int |
310 | gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 317 | gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
311 | { | 318 | { |
312 | const unsigned int *timeouts = data; | 319 | const unsigned int *timeouts = data; |
313 | 320 | ||
314 | if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED, | 321 | if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED, |
315 | htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) || | 322 | htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) || |
316 | nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED, | 323 | nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED, |
317 | htonl(timeouts[GRE_CT_REPLIED] / HZ))) | 324 | htonl(timeouts[GRE_CT_REPLIED] / HZ))) |
318 | goto nla_put_failure; | 325 | goto nla_put_failure; |
319 | return 0; | 326 | return 0; |
320 | 327 | ||
321 | nla_put_failure: | 328 | nla_put_failure: |
322 | return -ENOSPC; | 329 | return -ENOSPC; |
323 | } | 330 | } |
324 | 331 | ||
325 | static const struct nla_policy | 332 | static const struct nla_policy |
326 | gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { | 333 | gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { |
327 | [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, | 334 | [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, |
328 | [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, | 335 | [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, |
329 | }; | 336 | }; |
330 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 337 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
331 | 338 | ||
332 | static int gre_init_net(struct net *net, u_int16_t proto) | 339 | static int gre_init_net(struct net *net, u_int16_t proto) |
333 | { | 340 | { |
334 | struct netns_proto_gre *net_gre = gre_pernet(net); | 341 | struct netns_proto_gre *net_gre = gre_pernet(net); |
335 | int i; | 342 | int i; |
336 | 343 | ||
337 | rwlock_init(&net_gre->keymap_lock); | 344 | rwlock_init(&net_gre->keymap_lock); |
338 | INIT_LIST_HEAD(&net_gre->keymap_list); | 345 | INIT_LIST_HEAD(&net_gre->keymap_list); |
339 | for (i = 0; i < GRE_CT_MAX; i++) | 346 | for (i = 0; i < GRE_CT_MAX; i++) |
340 | net_gre->gre_timeouts[i] = gre_timeouts[i]; | 347 | net_gre->gre_timeouts[i] = gre_timeouts[i]; |
341 | 348 | ||
342 | return 0; | 349 | return 0; |
343 | } | 350 | } |
344 | 351 | ||
345 | /* protocol helper struct */ | 352 | /* protocol helper struct */ |
346 | static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { | 353 | static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { |
347 | .l3proto = AF_INET, | 354 | .l3proto = AF_INET, |
348 | .l4proto = IPPROTO_GRE, | 355 | .l4proto = IPPROTO_GRE, |
349 | .pkt_to_tuple = gre_pkt_to_tuple, | 356 | .pkt_to_tuple = gre_pkt_to_tuple, |
350 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 357 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
351 | .print_conntrack = gre_print_conntrack, | 358 | .print_conntrack = gre_print_conntrack, |
352 | #endif | 359 | #endif |
353 | .get_timeouts = gre_get_timeouts, | ||
354 | .packet = gre_packet, | 360 | .packet = gre_packet, |
355 | .new = gre_new, | 361 | .new = gre_new, |
356 | .destroy = gre_destroy, | 362 | .destroy = gre_destroy, |
357 | .me = THIS_MODULE, | 363 | .me = THIS_MODULE, |
358 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 364 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
359 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 365 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
360 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 366 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
361 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 367 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
362 | .nla_policy = nf_ct_port_nla_policy, | 368 | .nla_policy = nf_ct_port_nla_policy, |
363 | #endif | 369 | #endif |
364 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 370 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
365 | .ctnl_timeout = { | 371 | .ctnl_timeout = { |
366 | .nlattr_to_obj = gre_timeout_nlattr_to_obj, | 372 | .nlattr_to_obj = gre_timeout_nlattr_to_obj, |
367 | .obj_to_nlattr = gre_timeout_obj_to_nlattr, | 373 | .obj_to_nlattr = gre_timeout_obj_to_nlattr, |
368 | .nlattr_max = CTA_TIMEOUT_GRE_MAX, | 374 | .nlattr_max = CTA_TIMEOUT_GRE_MAX, |
369 | .obj_size = sizeof(unsigned int) * GRE_CT_MAX, | 375 | .obj_size = sizeof(unsigned int) * GRE_CT_MAX, |
370 | .nla_policy = gre_timeout_nla_policy, | 376 | .nla_policy = gre_timeout_nla_policy, |
371 | }, | 377 | }, |
372 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 378 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
373 | .net_id = &proto_gre_net_id, | 379 | .net_id = &proto_gre_net_id, |
374 | .init_net = gre_init_net, | 380 | .init_net = gre_init_net, |
375 | }; | 381 | }; |
376 | 382 | ||
377 | static int proto_gre_net_init(struct net *net) | 383 | static int proto_gre_net_init(struct net *net) |
378 | { | 384 | { |
379 | int ret = 0; | 385 | int ret = 0; |
380 | 386 | ||
381 | ret = nf_ct_l4proto_pernet_register_one(net, | 387 | ret = nf_ct_l4proto_pernet_register_one(net, |
382 | &nf_conntrack_l4proto_gre4); | 388 | &nf_conntrack_l4proto_gre4); |
383 | if (ret < 0) | 389 | if (ret < 0) |
384 | pr_err("nf_conntrack_gre4: pernet registration failed.\n"); | 390 | pr_err("nf_conntrack_gre4: pernet registration failed.\n"); |
385 | return ret; | 391 | return ret; |
386 | } | 392 | } |
387 | 393 | ||
388 | static void proto_gre_net_exit(struct net *net) | 394 | static void proto_gre_net_exit(struct net *net) |
389 | { | 395 | { |
390 | nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); | 396 | nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); |
391 | nf_ct_gre_keymap_flush(net); | 397 | nf_ct_gre_keymap_flush(net); |
392 | } | 398 | } |
393 | 399 | ||
394 | static struct pernet_operations proto_gre_net_ops = { | 400 | static struct pernet_operations proto_gre_net_ops = { |
395 | .init = proto_gre_net_init, | 401 | .init = proto_gre_net_init, |
396 | .exit = proto_gre_net_exit, | 402 | .exit = proto_gre_net_exit, |
397 | .id = &proto_gre_net_id, | 403 | .id = &proto_gre_net_id, |
398 | .size = sizeof(struct netns_proto_gre), | 404 | .size = sizeof(struct netns_proto_gre), |
399 | }; | 405 | }; |
400 | 406 | ||
401 | static int __init nf_ct_proto_gre_init(void) | 407 | static int __init nf_ct_proto_gre_init(void) |
402 | { | 408 | { |
403 | int ret; | 409 | int ret; |
404 | 410 | ||
405 | ret = register_pernet_subsys(&proto_gre_net_ops); | 411 | ret = register_pernet_subsys(&proto_gre_net_ops); |
406 | if (ret < 0) | 412 | if (ret < 0) |
407 | goto out_pernet; | 413 | goto out_pernet; |
408 | ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); | 414 | ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); |
409 | if (ret < 0) | 415 | if (ret < 0) |
410 | goto out_gre4; | 416 | goto out_gre4; |
411 | 417 | ||
412 | return 0; | 418 | return 0; |
413 | out_gre4: | 419 | out_gre4: |
414 | unregister_pernet_subsys(&proto_gre_net_ops); | 420 | unregister_pernet_subsys(&proto_gre_net_ops); |
415 | out_pernet: | 421 | out_pernet: |
416 | return ret; | 422 | return ret; |
417 | } | 423 | } |
418 | 424 | ||
419 | static void __exit nf_ct_proto_gre_fini(void) | 425 | static void __exit nf_ct_proto_gre_fini(void) |
420 | { | 426 | { |
421 | nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); | 427 | nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); |
422 | unregister_pernet_subsys(&proto_gre_net_ops); | 428 | unregister_pernet_subsys(&proto_gre_net_ops); |
423 | } | 429 | } |
424 | 430 | ||
425 | module_init(nf_ct_proto_gre_init); | 431 | module_init(nf_ct_proto_gre_init); |
426 | module_exit(nf_ct_proto_gre_fini); | 432 | module_exit(nf_ct_proto_gre_fini); |
427 | 433 |
net/netfilter/nf_conntrack_proto_sctp.c
1 | /* | 1 | /* |
2 | * Connection tracking protocol helper module for SCTP. | 2 | * Connection tracking protocol helper module for SCTP. |
3 | * | 3 | * |
4 | * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> | 4 | * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> |
5 | * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> | 5 | * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> |
6 | * | 6 | * |
7 | * SCTP is defined in RFC 2960. References to various sections in this code | 7 | * SCTP is defined in RFC 2960. References to various sections in this code |
8 | * are to this RFC. | 8 | * are to this RFC. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License version 2 as | 11 | * it under the terms of the GNU General Public License version 2 as |
12 | * published by the Free Software Foundation. | 12 | * published by the Free Software Foundation. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/types.h> | 15 | #include <linux/types.h> |
16 | #include <linux/timer.h> | 16 | #include <linux/timer.h> |
17 | #include <linux/netfilter.h> | 17 | #include <linux/netfilter.h> |
18 | #include <linux/in.h> | 18 | #include <linux/in.h> |
19 | #include <linux/ip.h> | 19 | #include <linux/ip.h> |
20 | #include <linux/sctp.h> | 20 | #include <linux/sctp.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/seq_file.h> | 22 | #include <linux/seq_file.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <net/sctp/checksum.h> | 25 | #include <net/sctp/checksum.h> |
26 | 26 | ||
27 | #include <net/netfilter/nf_log.h> | 27 | #include <net/netfilter/nf_log.h> |
28 | #include <net/netfilter/nf_conntrack.h> | 28 | #include <net/netfilter/nf_conntrack.h> |
29 | #include <net/netfilter/nf_conntrack_l4proto.h> | 29 | #include <net/netfilter/nf_conntrack_l4proto.h> |
30 | #include <net/netfilter/nf_conntrack_ecache.h> | 30 | #include <net/netfilter/nf_conntrack_ecache.h> |
31 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
31 | 32 | ||
32 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | 33 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more |
33 | closely. They're more complex. --RR | 34 | closely. They're more complex. --RR |
34 | 35 | ||
35 | And so for me for SCTP :D -Kiran */ | 36 | And so for me for SCTP :D -Kiran */ |
36 | 37 | ||
37 | static const char *const sctp_conntrack_names[] = { | 38 | static const char *const sctp_conntrack_names[] = { |
38 | "NONE", | 39 | "NONE", |
39 | "CLOSED", | 40 | "CLOSED", |
40 | "COOKIE_WAIT", | 41 | "COOKIE_WAIT", |
41 | "COOKIE_ECHOED", | 42 | "COOKIE_ECHOED", |
42 | "ESTABLISHED", | 43 | "ESTABLISHED", |
43 | "SHUTDOWN_SENT", | 44 | "SHUTDOWN_SENT", |
44 | "SHUTDOWN_RECD", | 45 | "SHUTDOWN_RECD", |
45 | "SHUTDOWN_ACK_SENT", | 46 | "SHUTDOWN_ACK_SENT", |
46 | "HEARTBEAT_SENT", | 47 | "HEARTBEAT_SENT", |
47 | "HEARTBEAT_ACKED", | 48 | "HEARTBEAT_ACKED", |
48 | }; | 49 | }; |
49 | 50 | ||
50 | #define SECS * HZ | 51 | #define SECS * HZ |
51 | #define MINS * 60 SECS | 52 | #define MINS * 60 SECS |
52 | #define HOURS * 60 MINS | 53 | #define HOURS * 60 MINS |
53 | #define DAYS * 24 HOURS | 54 | #define DAYS * 24 HOURS |
54 | 55 | ||
55 | static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { | 56 | static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { |
56 | [SCTP_CONNTRACK_CLOSED] = 10 SECS, | 57 | [SCTP_CONNTRACK_CLOSED] = 10 SECS, |
57 | [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, | 58 | [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, |
58 | [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, | 59 | [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, |
59 | [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, | 60 | [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, |
60 | [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, | 61 | [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, |
61 | [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, | 62 | [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, |
62 | [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, | 63 | [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, |
63 | [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, | 64 | [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, |
64 | [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, | 65 | [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, |
65 | }; | 66 | }; |
66 | 67 | ||
67 | #define sNO SCTP_CONNTRACK_NONE | 68 | #define sNO SCTP_CONNTRACK_NONE |
68 | #define sCL SCTP_CONNTRACK_CLOSED | 69 | #define sCL SCTP_CONNTRACK_CLOSED |
69 | #define sCW SCTP_CONNTRACK_COOKIE_WAIT | 70 | #define sCW SCTP_CONNTRACK_COOKIE_WAIT |
70 | #define sCE SCTP_CONNTRACK_COOKIE_ECHOED | 71 | #define sCE SCTP_CONNTRACK_COOKIE_ECHOED |
71 | #define sES SCTP_CONNTRACK_ESTABLISHED | 72 | #define sES SCTP_CONNTRACK_ESTABLISHED |
72 | #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT | 73 | #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT |
73 | #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD | 74 | #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD |
74 | #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT | 75 | #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT |
75 | #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT | 76 | #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT |
76 | #define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED | 77 | #define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED |
77 | #define sIV SCTP_CONNTRACK_MAX | 78 | #define sIV SCTP_CONNTRACK_MAX |
78 | 79 | ||
79 | /* | 80 | /* |
80 | These are the descriptions of the states: | 81 | These are the descriptions of the states: |
81 | 82 | ||
82 | NOTE: These state names are tantalizingly similar to the states of an | 83 | NOTE: These state names are tantalizingly similar to the states of an |
83 | SCTP endpoint. But the interpretation of the states is a little different, | 84 | SCTP endpoint. But the interpretation of the states is a little different, |
84 | considering that these are the states of the connection and not of an end | 85 | considering that these are the states of the connection and not of an end |
85 | point. Please note the subtleties. -Kiran | 86 | point. Please note the subtleties. -Kiran |
86 | 87 | ||
87 | NONE - Nothing so far. | 88 | NONE - Nothing so far. |
88 | COOKIE WAIT - We have seen an INIT chunk in the original direction, or also | 89 | COOKIE WAIT - We have seen an INIT chunk in the original direction, or also |
89 | an INIT_ACK chunk in the reply direction. | 90 | an INIT_ACK chunk in the reply direction. |
90 | COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. | 91 | COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. |
91 | ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. | 92 | ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. |
92 | SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. | 93 | SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. |
93 | SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. | 94 | SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. |
94 | SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite | 95 | SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite |
95 | to that of the SHUTDOWN chunk. | 96 | to that of the SHUTDOWN chunk. |
96 | CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of | 97 | CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of |
97 | the SHUTDOWN chunk. Connection is closed. | 98 | the SHUTDOWN chunk. Connection is closed. |
98 | HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. | 99 | HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. |
99 | HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to | 100 | HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to |
100 | that of the HEARTBEAT chunk. Secondary connection is | 101 | that of the HEARTBEAT chunk. Secondary connection is |
101 | established. | 102 | established. |
102 | */ | 103 | */ |
103 | 104 | ||
104 | /* TODO | 105 | /* TODO |
105 | - I have assumed that the first INIT is in the original direction. | 106 | - I have assumed that the first INIT is in the original direction. |
106 | This messes things when an INIT comes in the reply direction in CLOSED | 107 | This messes things when an INIT comes in the reply direction in CLOSED |
107 | state. | 108 | state. |
108 | - Check the error type in the reply dir before transitioning from | 109 | - Check the error type in the reply dir before transitioning from |
109 | cookie echoed to closed. | 110 | cookie echoed to closed. |
110 | - Sec 5.2.4 of RFC 2960 | 111 | - Sec 5.2.4 of RFC 2960 |
111 | - Full Multi Homing support. | 112 | - Full Multi Homing support. |
112 | */ | 113 | */ |
113 | 114 | ||
114 | /* SCTP conntrack state transitions */ | 115 | /* SCTP conntrack state transitions */ |
115 | static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { | 116 | static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { |
116 | { | 117 | { |
117 | /* ORIGINAL */ | 118 | /* ORIGINAL */ |
118 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ | 119 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ |
119 | /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, | 120 | /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, |
120 | /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, | 121 | /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, |
121 | /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | 122 | /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, |
122 | /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, | 123 | /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, |
123 | /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, | 124 | /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, |
124 | /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ | 125 | /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ |
125 | /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ | 126 | /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ |
126 | /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ | 127 | /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ |
127 | /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, | 128 | /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, |
128 | /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, | 129 | /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, |
129 | /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} | 130 | /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} |
130 | }, | 131 | }, |
131 | { | 132 | { |
132 | /* REPLY */ | 133 | /* REPLY */ |
133 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ | 134 | /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ |
134 | /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ | 135 | /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ |
135 | /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, | 136 | /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, |
136 | /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, | 137 | /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, |
137 | /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, | 138 | /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, |
138 | /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, | 139 | /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, |
139 | /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, | 140 | /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, |
140 | /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ | 141 | /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ |
141 | /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, | 142 | /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, |
142 | /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, | 143 | /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, |
143 | /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, | 144 | /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, |
144 | /* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} | 145 | /* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} |
145 | } | 146 | } |
146 | }; | 147 | }; |
147 | 148 | ||
148 | static inline struct nf_sctp_net *sctp_pernet(struct net *net) | 149 | static inline struct nf_sctp_net *sctp_pernet(struct net *net) |
149 | { | 150 | { |
150 | return &net->ct.nf_ct_proto.sctp; | 151 | return &net->ct.nf_ct_proto.sctp; |
151 | } | 152 | } |
152 | 153 | ||
153 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 154 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
154 | /* Print out the private part of the conntrack. */ | 155 | /* Print out the private part of the conntrack. */ |
155 | static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) | 156 | static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) |
156 | { | 157 | { |
157 | seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); | 158 | seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); |
158 | } | 159 | } |
159 | #endif | 160 | #endif |
160 | 161 | ||
161 | #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ | 162 | #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ |
162 | for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ | 163 | for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ |
163 | (offset) < (skb)->len && \ | 164 | (offset) < (skb)->len && \ |
164 | ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ | 165 | ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ |
165 | (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) | 166 | (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) |
166 | 167 | ||
167 | /* Some validity checks to make sure the chunks are fine */ | 168 | /* Some validity checks to make sure the chunks are fine */ |
168 | static int do_basic_checks(struct nf_conn *ct, | 169 | static int do_basic_checks(struct nf_conn *ct, |
169 | const struct sk_buff *skb, | 170 | const struct sk_buff *skb, |
170 | unsigned int dataoff, | 171 | unsigned int dataoff, |
171 | unsigned long *map) | 172 | unsigned long *map) |
172 | { | 173 | { |
173 | u_int32_t offset, count; | 174 | u_int32_t offset, count; |
174 | struct sctp_chunkhdr _sch, *sch; | 175 | struct sctp_chunkhdr _sch, *sch; |
175 | int flag; | 176 | int flag; |
176 | 177 | ||
177 | flag = 0; | 178 | flag = 0; |
178 | 179 | ||
179 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | 180 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { |
180 | pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); | 181 | pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); |
181 | 182 | ||
182 | if (sch->type == SCTP_CID_INIT || | 183 | if (sch->type == SCTP_CID_INIT || |
183 | sch->type == SCTP_CID_INIT_ACK || | 184 | sch->type == SCTP_CID_INIT_ACK || |
184 | sch->type == SCTP_CID_SHUTDOWN_COMPLETE) | 185 | sch->type == SCTP_CID_SHUTDOWN_COMPLETE) |
185 | flag = 1; | 186 | flag = 1; |
186 | 187 | ||
187 | /* | 188 | /* |
188 | * Cookie Ack/Echo chunks not the first OR | 189 | * Cookie Ack/Echo chunks not the first OR |
189 | * Init / Init Ack / Shutdown compl chunks not the only chunks | 190 | * Init / Init Ack / Shutdown compl chunks not the only chunks |
190 | * OR zero-length. | 191 | * OR zero-length. |
191 | */ | 192 | */ |
192 | if (((sch->type == SCTP_CID_COOKIE_ACK || | 193 | if (((sch->type == SCTP_CID_COOKIE_ACK || |
193 | sch->type == SCTP_CID_COOKIE_ECHO || | 194 | sch->type == SCTP_CID_COOKIE_ECHO || |
194 | flag) && | 195 | flag) && |
195 | count != 0) || !sch->length) { | 196 | count != 0) || !sch->length) { |
196 | pr_debug("Basic checks failed\n"); | 197 | pr_debug("Basic checks failed\n"); |
197 | return 1; | 198 | return 1; |
198 | } | 199 | } |
199 | 200 | ||
200 | if (map) | 201 | if (map) |
201 | set_bit(sch->type, map); | 202 | set_bit(sch->type, map); |
202 | } | 203 | } |
203 | 204 | ||
204 | pr_debug("Basic checks passed\n"); | 205 | pr_debug("Basic checks passed\n"); |
205 | return count == 0; | 206 | return count == 0; |
206 | } | 207 | } |
207 | 208 | ||
208 | static int sctp_new_state(enum ip_conntrack_dir dir, | 209 | static int sctp_new_state(enum ip_conntrack_dir dir, |
209 | enum sctp_conntrack cur_state, | 210 | enum sctp_conntrack cur_state, |
210 | int chunk_type) | 211 | int chunk_type) |
211 | { | 212 | { |
212 | int i; | 213 | int i; |
213 | 214 | ||
214 | pr_debug("Chunk type: %d\n", chunk_type); | 215 | pr_debug("Chunk type: %d\n", chunk_type); |
215 | 216 | ||
216 | switch (chunk_type) { | 217 | switch (chunk_type) { |
217 | case SCTP_CID_INIT: | 218 | case SCTP_CID_INIT: |
218 | pr_debug("SCTP_CID_INIT\n"); | 219 | pr_debug("SCTP_CID_INIT\n"); |
219 | i = 0; | 220 | i = 0; |
220 | break; | 221 | break; |
221 | case SCTP_CID_INIT_ACK: | 222 | case SCTP_CID_INIT_ACK: |
222 | pr_debug("SCTP_CID_INIT_ACK\n"); | 223 | pr_debug("SCTP_CID_INIT_ACK\n"); |
223 | i = 1; | 224 | i = 1; |
224 | break; | 225 | break; |
225 | case SCTP_CID_ABORT: | 226 | case SCTP_CID_ABORT: |
226 | pr_debug("SCTP_CID_ABORT\n"); | 227 | pr_debug("SCTP_CID_ABORT\n"); |
227 | i = 2; | 228 | i = 2; |
228 | break; | 229 | break; |
229 | case SCTP_CID_SHUTDOWN: | 230 | case SCTP_CID_SHUTDOWN: |
230 | pr_debug("SCTP_CID_SHUTDOWN\n"); | 231 | pr_debug("SCTP_CID_SHUTDOWN\n"); |
231 | i = 3; | 232 | i = 3; |
232 | break; | 233 | break; |
233 | case SCTP_CID_SHUTDOWN_ACK: | 234 | case SCTP_CID_SHUTDOWN_ACK: |
234 | pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); | 235 | pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); |
235 | i = 4; | 236 | i = 4; |
236 | break; | 237 | break; |
237 | case SCTP_CID_ERROR: | 238 | case SCTP_CID_ERROR: |
238 | pr_debug("SCTP_CID_ERROR\n"); | 239 | pr_debug("SCTP_CID_ERROR\n"); |
239 | i = 5; | 240 | i = 5; |
240 | break; | 241 | break; |
241 | case SCTP_CID_COOKIE_ECHO: | 242 | case SCTP_CID_COOKIE_ECHO: |
242 | pr_debug("SCTP_CID_COOKIE_ECHO\n"); | 243 | pr_debug("SCTP_CID_COOKIE_ECHO\n"); |
243 | i = 6; | 244 | i = 6; |
244 | break; | 245 | break; |
245 | case SCTP_CID_COOKIE_ACK: | 246 | case SCTP_CID_COOKIE_ACK: |
246 | pr_debug("SCTP_CID_COOKIE_ACK\n"); | 247 | pr_debug("SCTP_CID_COOKIE_ACK\n"); |
247 | i = 7; | 248 | i = 7; |
248 | break; | 249 | break; |
249 | case SCTP_CID_SHUTDOWN_COMPLETE: | 250 | case SCTP_CID_SHUTDOWN_COMPLETE: |
250 | pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); | 251 | pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); |
251 | i = 8; | 252 | i = 8; |
252 | break; | 253 | break; |
253 | case SCTP_CID_HEARTBEAT: | 254 | case SCTP_CID_HEARTBEAT: |
254 | pr_debug("SCTP_CID_HEARTBEAT"); | 255 | pr_debug("SCTP_CID_HEARTBEAT"); |
255 | i = 9; | 256 | i = 9; |
256 | break; | 257 | break; |
257 | case SCTP_CID_HEARTBEAT_ACK: | 258 | case SCTP_CID_HEARTBEAT_ACK: |
258 | pr_debug("SCTP_CID_HEARTBEAT_ACK"); | 259 | pr_debug("SCTP_CID_HEARTBEAT_ACK"); |
259 | i = 10; | 260 | i = 10; |
260 | break; | 261 | break; |
261 | default: | 262 | default: |
262 | /* Other chunks like DATA or SACK do not change the state */ | 263 | /* Other chunks like DATA or SACK do not change the state */ |
263 | pr_debug("Unknown chunk type, Will stay in %s\n", | 264 | pr_debug("Unknown chunk type, Will stay in %s\n", |
264 | sctp_conntrack_names[cur_state]); | 265 | sctp_conntrack_names[cur_state]); |
265 | return cur_state; | 266 | return cur_state; |
266 | } | 267 | } |
267 | 268 | ||
268 | pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", | 269 | pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", |
269 | dir, sctp_conntrack_names[cur_state], chunk_type, | 270 | dir, sctp_conntrack_names[cur_state], chunk_type, |
270 | sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); | 271 | sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); |
271 | 272 | ||
272 | return sctp_conntracks[dir][i][cur_state]; | 273 | return sctp_conntracks[dir][i][cur_state]; |
273 | } | 274 | } |
274 | 275 | ||
275 | static unsigned int *sctp_get_timeouts(struct net *net) | ||
276 | { | ||
277 | return sctp_pernet(net)->timeouts; | ||
278 | } | ||
279 | |||
280 | /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ | 276 | /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ |
281 | static int sctp_packet(struct nf_conn *ct, | 277 | static int sctp_packet(struct nf_conn *ct, |
282 | const struct sk_buff *skb, | 278 | const struct sk_buff *skb, |
283 | unsigned int dataoff, | 279 | unsigned int dataoff, |
284 | enum ip_conntrack_info ctinfo, | 280 | enum ip_conntrack_info ctinfo) |
285 | unsigned int *timeouts) | ||
286 | { | 281 | { |
287 | enum sctp_conntrack new_state, old_state; | 282 | enum sctp_conntrack new_state, old_state; |
288 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 283 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
289 | const struct sctphdr *sh; | 284 | const struct sctphdr *sh; |
290 | struct sctphdr _sctph; | 285 | struct sctphdr _sctph; |
291 | const struct sctp_chunkhdr *sch; | 286 | const struct sctp_chunkhdr *sch; |
292 | struct sctp_chunkhdr _sch; | 287 | struct sctp_chunkhdr _sch; |
293 | u_int32_t offset, count; | 288 | u_int32_t offset, count; |
289 | unsigned int *timeouts; | ||
294 | unsigned long map[256 / sizeof(unsigned long)] = { 0 }; | 290 | unsigned long map[256 / sizeof(unsigned long)] = { 0 }; |
295 | 291 | ||
296 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | 292 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); |
297 | if (sh == NULL) | 293 | if (sh == NULL) |
298 | goto out; | 294 | goto out; |
299 | 295 | ||
300 | if (do_basic_checks(ct, skb, dataoff, map) != 0) | 296 | if (do_basic_checks(ct, skb, dataoff, map) != 0) |
301 | goto out; | 297 | goto out; |
302 | 298 | ||
303 | /* Check the verification tag (Sec 8.5) */ | 299 | /* Check the verification tag (Sec 8.5) */ |
304 | if (!test_bit(SCTP_CID_INIT, map) && | 300 | if (!test_bit(SCTP_CID_INIT, map) && |
305 | !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && | 301 | !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && |
306 | !test_bit(SCTP_CID_COOKIE_ECHO, map) && | 302 | !test_bit(SCTP_CID_COOKIE_ECHO, map) && |
307 | !test_bit(SCTP_CID_ABORT, map) && | 303 | !test_bit(SCTP_CID_ABORT, map) && |
308 | !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && | 304 | !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && |
309 | !test_bit(SCTP_CID_HEARTBEAT, map) && | 305 | !test_bit(SCTP_CID_HEARTBEAT, map) && |
310 | !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && | 306 | !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && |
311 | sh->vtag != ct->proto.sctp.vtag[dir]) { | 307 | sh->vtag != ct->proto.sctp.vtag[dir]) { |
312 | pr_debug("Verification tag check failed\n"); | 308 | pr_debug("Verification tag check failed\n"); |
313 | goto out; | 309 | goto out; |
314 | } | 310 | } |
315 | 311 | ||
316 | old_state = new_state = SCTP_CONNTRACK_NONE; | 312 | old_state = new_state = SCTP_CONNTRACK_NONE; |
317 | spin_lock_bh(&ct->lock); | 313 | spin_lock_bh(&ct->lock); |
318 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | 314 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { |
319 | /* Special cases of Verification tag check (Sec 8.5.1) */ | 315 | /* Special cases of Verification tag check (Sec 8.5.1) */ |
320 | if (sch->type == SCTP_CID_INIT) { | 316 | if (sch->type == SCTP_CID_INIT) { |
321 | /* Sec 8.5.1 (A) */ | 317 | /* Sec 8.5.1 (A) */ |
322 | if (sh->vtag != 0) | 318 | if (sh->vtag != 0) |
323 | goto out_unlock; | 319 | goto out_unlock; |
324 | } else if (sch->type == SCTP_CID_ABORT) { | 320 | } else if (sch->type == SCTP_CID_ABORT) { |
325 | /* Sec 8.5.1 (B) */ | 321 | /* Sec 8.5.1 (B) */ |
326 | if (sh->vtag != ct->proto.sctp.vtag[dir] && | 322 | if (sh->vtag != ct->proto.sctp.vtag[dir] && |
327 | sh->vtag != ct->proto.sctp.vtag[!dir]) | 323 | sh->vtag != ct->proto.sctp.vtag[!dir]) |
328 | goto out_unlock; | 324 | goto out_unlock; |
329 | } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { | 325 | } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { |
330 | /* Sec 8.5.1 (C) */ | 326 | /* Sec 8.5.1 (C) */ |
331 | if (sh->vtag != ct->proto.sctp.vtag[dir] && | 327 | if (sh->vtag != ct->proto.sctp.vtag[dir] && |
332 | sh->vtag != ct->proto.sctp.vtag[!dir] && | 328 | sh->vtag != ct->proto.sctp.vtag[!dir] && |
333 | sch->flags & SCTP_CHUNK_FLAG_T) | 329 | sch->flags & SCTP_CHUNK_FLAG_T) |
334 | goto out_unlock; | 330 | goto out_unlock; |
335 | } else if (sch->type == SCTP_CID_COOKIE_ECHO) { | 331 | } else if (sch->type == SCTP_CID_COOKIE_ECHO) { |
336 | /* Sec 8.5.1 (D) */ | 332 | /* Sec 8.5.1 (D) */ |
337 | if (sh->vtag != ct->proto.sctp.vtag[dir]) | 333 | if (sh->vtag != ct->proto.sctp.vtag[dir]) |
338 | goto out_unlock; | 334 | goto out_unlock; |
339 | } else if (sch->type == SCTP_CID_HEARTBEAT || | 335 | } else if (sch->type == SCTP_CID_HEARTBEAT || |
340 | sch->type == SCTP_CID_HEARTBEAT_ACK) { | 336 | sch->type == SCTP_CID_HEARTBEAT_ACK) { |
341 | if (ct->proto.sctp.vtag[dir] == 0) { | 337 | if (ct->proto.sctp.vtag[dir] == 0) { |
342 | pr_debug("Setting vtag %x for dir %d\n", | 338 | pr_debug("Setting vtag %x for dir %d\n", |
343 | sh->vtag, dir); | 339 | sh->vtag, dir); |
344 | ct->proto.sctp.vtag[dir] = sh->vtag; | 340 | ct->proto.sctp.vtag[dir] = sh->vtag; |
345 | } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { | 341 | } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { |
346 | pr_debug("Verification tag check failed\n"); | 342 | pr_debug("Verification tag check failed\n"); |
347 | goto out_unlock; | 343 | goto out_unlock; |
348 | } | 344 | } |
349 | } | 345 | } |
350 | 346 | ||
351 | old_state = ct->proto.sctp.state; | 347 | old_state = ct->proto.sctp.state; |
352 | new_state = sctp_new_state(dir, old_state, sch->type); | 348 | new_state = sctp_new_state(dir, old_state, sch->type); |
353 | 349 | ||
354 | /* Invalid */ | 350 | /* Invalid */ |
355 | if (new_state == SCTP_CONNTRACK_MAX) { | 351 | if (new_state == SCTP_CONNTRACK_MAX) { |
356 | pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " | 352 | pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " |
357 | "conntrack=%u\n", | 353 | "conntrack=%u\n", |
358 | dir, sch->type, old_state); | 354 | dir, sch->type, old_state); |
359 | goto out_unlock; | 355 | goto out_unlock; |
360 | } | 356 | } |
361 | 357 | ||
362 | /* If it is an INIT or an INIT ACK note down the vtag */ | 358 | /* If it is an INIT or an INIT ACK note down the vtag */ |
363 | if (sch->type == SCTP_CID_INIT || | 359 | if (sch->type == SCTP_CID_INIT || |
364 | sch->type == SCTP_CID_INIT_ACK) { | 360 | sch->type == SCTP_CID_INIT_ACK) { |
365 | struct sctp_inithdr _inithdr, *ih; | 361 | struct sctp_inithdr _inithdr, *ih; |
366 | 362 | ||
367 | ih = skb_header_pointer(skb, offset + sizeof(_sch), | 363 | ih = skb_header_pointer(skb, offset + sizeof(_sch), |
368 | sizeof(_inithdr), &_inithdr); | 364 | sizeof(_inithdr), &_inithdr); |
369 | if (ih == NULL) | 365 | if (ih == NULL) |
370 | goto out_unlock; | 366 | goto out_unlock; |
371 | pr_debug("Setting vtag %x for dir %d\n", | 367 | pr_debug("Setting vtag %x for dir %d\n", |
372 | ih->init_tag, !dir); | 368 | ih->init_tag, !dir); |
373 | ct->proto.sctp.vtag[!dir] = ih->init_tag; | 369 | ct->proto.sctp.vtag[!dir] = ih->init_tag; |
374 | } | 370 | } |
375 | 371 | ||
376 | ct->proto.sctp.state = new_state; | 372 | ct->proto.sctp.state = new_state; |
377 | if (old_state != new_state) | 373 | if (old_state != new_state) |
378 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); | 374 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); |
379 | } | 375 | } |
380 | spin_unlock_bh(&ct->lock); | 376 | spin_unlock_bh(&ct->lock); |
381 | 377 | ||
378 | timeouts = nf_ct_timeout_lookup(ct); | ||
379 | if (!timeouts) | ||
380 | timeouts = sctp_pernet(nf_ct_net(ct))->timeouts; | ||
381 | |||
382 | nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); | 382 | nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); |
383 | 383 | ||
384 | if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && | 384 | if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && |
385 | dir == IP_CT_DIR_REPLY && | 385 | dir == IP_CT_DIR_REPLY && |
386 | new_state == SCTP_CONNTRACK_ESTABLISHED) { | 386 | new_state == SCTP_CONNTRACK_ESTABLISHED) { |
387 | pr_debug("Setting assured bit\n"); | 387 | pr_debug("Setting assured bit\n"); |
388 | set_bit(IPS_ASSURED_BIT, &ct->status); | 388 | set_bit(IPS_ASSURED_BIT, &ct->status); |
389 | nf_conntrack_event_cache(IPCT_ASSURED, ct); | 389 | nf_conntrack_event_cache(IPCT_ASSURED, ct); |
390 | } | 390 | } |
391 | 391 | ||
392 | return NF_ACCEPT; | 392 | return NF_ACCEPT; |
393 | 393 | ||
394 | out_unlock: | 394 | out_unlock: |
395 | spin_unlock_bh(&ct->lock); | 395 | spin_unlock_bh(&ct->lock); |
396 | out: | 396 | out: |
397 | return -NF_ACCEPT; | 397 | return -NF_ACCEPT; |
398 | } | 398 | } |
399 | 399 | ||
400 | /* Called when a new connection for this protocol found. */ | 400 | /* Called when a new connection for this protocol found. */ |
401 | static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, | 401 | static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, |
402 | unsigned int dataoff, unsigned int *timeouts) | 402 | unsigned int dataoff) |
403 | { | 403 | { |
404 | enum sctp_conntrack new_state; | 404 | enum sctp_conntrack new_state; |
405 | const struct sctphdr *sh; | 405 | const struct sctphdr *sh; |
406 | struct sctphdr _sctph; | 406 | struct sctphdr _sctph; |
407 | const struct sctp_chunkhdr *sch; | 407 | const struct sctp_chunkhdr *sch; |
408 | struct sctp_chunkhdr _sch; | 408 | struct sctp_chunkhdr _sch; |
409 | u_int32_t offset, count; | 409 | u_int32_t offset, count; |
410 | unsigned long map[256 / sizeof(unsigned long)] = { 0 }; | 410 | unsigned long map[256 / sizeof(unsigned long)] = { 0 }; |
411 | 411 | ||
412 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); | 412 | sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); |
413 | if (sh == NULL) | 413 | if (sh == NULL) |
414 | return false; | 414 | return false; |
415 | 415 | ||
416 | if (do_basic_checks(ct, skb, dataoff, map) != 0) | 416 | if (do_basic_checks(ct, skb, dataoff, map) != 0) |
417 | return false; | 417 | return false; |
418 | 418 | ||
419 | /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ | 419 | /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ |
420 | if (test_bit(SCTP_CID_ABORT, map) || | 420 | if (test_bit(SCTP_CID_ABORT, map) || |
421 | test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) || | 421 | test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) || |
422 | test_bit(SCTP_CID_COOKIE_ACK, map)) | 422 | test_bit(SCTP_CID_COOKIE_ACK, map)) |
423 | return false; | 423 | return false; |
424 | 424 | ||
425 | memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); | 425 | memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); |
426 | new_state = SCTP_CONNTRACK_MAX; | 426 | new_state = SCTP_CONNTRACK_MAX; |
427 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { | 427 | for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { |
428 | /* Don't need lock here: this conntrack not in circulation yet */ | 428 | /* Don't need lock here: this conntrack not in circulation yet */ |
429 | new_state = sctp_new_state(IP_CT_DIR_ORIGINAL, | 429 | new_state = sctp_new_state(IP_CT_DIR_ORIGINAL, |
430 | SCTP_CONNTRACK_NONE, sch->type); | 430 | SCTP_CONNTRACK_NONE, sch->type); |
431 | 431 | ||
432 | /* Invalid: delete conntrack */ | 432 | /* Invalid: delete conntrack */ |
433 | if (new_state == SCTP_CONNTRACK_NONE || | 433 | if (new_state == SCTP_CONNTRACK_NONE || |
434 | new_state == SCTP_CONNTRACK_MAX) { | 434 | new_state == SCTP_CONNTRACK_MAX) { |
435 | pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); | 435 | pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); |
436 | return false; | 436 | return false; |
437 | } | 437 | } |
438 | 438 | ||
439 | /* Copy the vtag into the state info */ | 439 | /* Copy the vtag into the state info */ |
440 | if (sch->type == SCTP_CID_INIT) { | 440 | if (sch->type == SCTP_CID_INIT) { |
441 | struct sctp_inithdr _inithdr, *ih; | 441 | struct sctp_inithdr _inithdr, *ih; |
442 | /* Sec 8.5.1 (A) */ | 442 | /* Sec 8.5.1 (A) */ |
443 | if (sh->vtag) | 443 | if (sh->vtag) |
444 | return false; | 444 | return false; |
445 | 445 | ||
446 | ih = skb_header_pointer(skb, offset + sizeof(_sch), | 446 | ih = skb_header_pointer(skb, offset + sizeof(_sch), |
447 | sizeof(_inithdr), &_inithdr); | 447 | sizeof(_inithdr), &_inithdr); |
448 | if (!ih) | 448 | if (!ih) |
449 | return false; | 449 | return false; |
450 | 450 | ||
451 | pr_debug("Setting vtag %x for new conn\n", | 451 | pr_debug("Setting vtag %x for new conn\n", |
452 | ih->init_tag); | 452 | ih->init_tag); |
453 | 453 | ||
454 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; | 454 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; |
455 | } else if (sch->type == SCTP_CID_HEARTBEAT) { | 455 | } else if (sch->type == SCTP_CID_HEARTBEAT) { |
456 | pr_debug("Setting vtag %x for secondary conntrack\n", | 456 | pr_debug("Setting vtag %x for secondary conntrack\n", |
457 | sh->vtag); | 457 | sh->vtag); |
458 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; | 458 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; |
459 | } | 459 | } |
460 | /* If it is a shutdown ack OOTB packet, we expect a return | 460 | /* If it is a shutdown ack OOTB packet, we expect a return |
461 | shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ | 461 | shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ |
462 | else { | 462 | else { |
463 | pr_debug("Setting vtag %x for new conn OOTB\n", | 463 | pr_debug("Setting vtag %x for new conn OOTB\n", |
464 | sh->vtag); | 464 | sh->vtag); |
465 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; | 465 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; |
466 | } | 466 | } |
467 | 467 | ||
468 | ct->proto.sctp.state = new_state; | 468 | ct->proto.sctp.state = new_state; |
469 | } | 469 | } |
470 | 470 | ||
471 | return true; | 471 | return true; |
472 | } | 472 | } |
473 | 473 | ||
474 | static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, | 474 | static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, |
475 | unsigned int dataoff, | 475 | unsigned int dataoff, |
476 | u8 pf, unsigned int hooknum) | 476 | u8 pf, unsigned int hooknum) |
477 | { | 477 | { |
478 | const struct sctphdr *sh; | 478 | const struct sctphdr *sh; |
479 | const char *logmsg; | 479 | const char *logmsg; |
480 | 480 | ||
481 | if (skb->len < dataoff + sizeof(struct sctphdr)) { | 481 | if (skb->len < dataoff + sizeof(struct sctphdr)) { |
482 | logmsg = "nf_ct_sctp: short packet "; | 482 | logmsg = "nf_ct_sctp: short packet "; |
483 | goto out_invalid; | 483 | goto out_invalid; |
484 | } | 484 | } |
485 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 485 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
486 | skb->ip_summed == CHECKSUM_NONE) { | 486 | skb->ip_summed == CHECKSUM_NONE) { |
487 | if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) { | 487 | if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) { |
488 | logmsg = "nf_ct_sctp: failed to read header "; | 488 | logmsg = "nf_ct_sctp: failed to read header "; |
489 | goto out_invalid; | 489 | goto out_invalid; |
490 | } | 490 | } |
491 | sh = (const struct sctphdr *)(skb->data + dataoff); | 491 | sh = (const struct sctphdr *)(skb->data + dataoff); |
492 | if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { | 492 | if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { |
493 | logmsg = "nf_ct_sctp: bad CRC "; | 493 | logmsg = "nf_ct_sctp: bad CRC "; |
494 | goto out_invalid; | 494 | goto out_invalid; |
495 | } | 495 | } |
496 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 496 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
497 | } | 497 | } |
498 | return NF_ACCEPT; | 498 | return NF_ACCEPT; |
499 | out_invalid: | 499 | out_invalid: |
500 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); | 500 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); |
501 | return -NF_ACCEPT; | 501 | return -NF_ACCEPT; |
502 | } | 502 | } |
503 | 503 | ||
504 | static bool sctp_can_early_drop(const struct nf_conn *ct) | 504 | static bool sctp_can_early_drop(const struct nf_conn *ct) |
505 | { | 505 | { |
506 | switch (ct->proto.sctp.state) { | 506 | switch (ct->proto.sctp.state) { |
507 | case SCTP_CONNTRACK_SHUTDOWN_SENT: | 507 | case SCTP_CONNTRACK_SHUTDOWN_SENT: |
508 | case SCTP_CONNTRACK_SHUTDOWN_RECD: | 508 | case SCTP_CONNTRACK_SHUTDOWN_RECD: |
509 | case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: | 509 | case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: |
510 | return true; | 510 | return true; |
511 | default: | 511 | default: |
512 | break; | 512 | break; |
513 | } | 513 | } |
514 | 514 | ||
515 | return false; | 515 | return false; |
516 | } | 516 | } |
517 | 517 | ||
518 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 518 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
519 | 519 | ||
520 | #include <linux/netfilter/nfnetlink.h> | 520 | #include <linux/netfilter/nfnetlink.h> |
521 | #include <linux/netfilter/nfnetlink_conntrack.h> | 521 | #include <linux/netfilter/nfnetlink_conntrack.h> |
522 | 522 | ||
523 | static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, | 523 | static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, |
524 | struct nf_conn *ct) | 524 | struct nf_conn *ct) |
525 | { | 525 | { |
526 | struct nlattr *nest_parms; | 526 | struct nlattr *nest_parms; |
527 | 527 | ||
528 | spin_lock_bh(&ct->lock); | 528 | spin_lock_bh(&ct->lock); |
529 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); | 529 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); |
530 | if (!nest_parms) | 530 | if (!nest_parms) |
531 | goto nla_put_failure; | 531 | goto nla_put_failure; |
532 | 532 | ||
533 | if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state) || | 533 | if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state) || |
534 | nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, | 534 | nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, |
535 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) || | 535 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) || |
536 | nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, | 536 | nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, |
537 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY])) | 537 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY])) |
538 | goto nla_put_failure; | 538 | goto nla_put_failure; |
539 | 539 | ||
540 | spin_unlock_bh(&ct->lock); | 540 | spin_unlock_bh(&ct->lock); |
541 | 541 | ||
542 | nla_nest_end(skb, nest_parms); | 542 | nla_nest_end(skb, nest_parms); |
543 | 543 | ||
544 | return 0; | 544 | return 0; |
545 | 545 | ||
546 | nla_put_failure: | 546 | nla_put_failure: |
547 | spin_unlock_bh(&ct->lock); | 547 | spin_unlock_bh(&ct->lock); |
548 | return -1; | 548 | return -1; |
549 | } | 549 | } |
550 | 550 | ||
551 | static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { | 551 | static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { |
552 | [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, | 552 | [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, |
553 | [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, | 553 | [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, |
554 | [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, | 554 | [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, |
555 | }; | 555 | }; |
556 | 556 | ||
557 | #define SCTP_NLATTR_SIZE ( \ | 557 | #define SCTP_NLATTR_SIZE ( \ |
558 | NLA_ALIGN(NLA_HDRLEN + 1) + \ | 558 | NLA_ALIGN(NLA_HDRLEN + 1) + \ |
559 | NLA_ALIGN(NLA_HDRLEN + 4) + \ | 559 | NLA_ALIGN(NLA_HDRLEN + 4) + \ |
560 | NLA_ALIGN(NLA_HDRLEN + 4)) | 560 | NLA_ALIGN(NLA_HDRLEN + 4)) |
561 | 561 | ||
562 | static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) | 562 | static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) |
563 | { | 563 | { |
564 | struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; | 564 | struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; |
565 | struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; | 565 | struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; |
566 | int err; | 566 | int err; |
567 | 567 | ||
568 | /* updates may not contain the internal protocol info, skip parsing */ | 568 | /* updates may not contain the internal protocol info, skip parsing */ |
569 | if (!attr) | 569 | if (!attr) |
570 | return 0; | 570 | return 0; |
571 | 571 | ||
572 | err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, | 572 | err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, |
573 | sctp_nla_policy, NULL); | 573 | sctp_nla_policy, NULL); |
574 | if (err < 0) | 574 | if (err < 0) |
575 | return err; | 575 | return err; |
576 | 576 | ||
577 | if (!tb[CTA_PROTOINFO_SCTP_STATE] || | 577 | if (!tb[CTA_PROTOINFO_SCTP_STATE] || |
578 | !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || | 578 | !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || |
579 | !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) | 579 | !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) |
580 | return -EINVAL; | 580 | return -EINVAL; |
581 | 581 | ||
582 | spin_lock_bh(&ct->lock); | 582 | spin_lock_bh(&ct->lock); |
583 | ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); | 583 | ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); |
584 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = | 584 | ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = |
585 | nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); | 585 | nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); |
586 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = | 586 | ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = |
587 | nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); | 587 | nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); |
588 | spin_unlock_bh(&ct->lock); | 588 | spin_unlock_bh(&ct->lock); |
589 | 589 | ||
590 | return 0; | 590 | return 0; |
591 | } | 591 | } |
592 | #endif | 592 | #endif |
593 | 593 | ||
594 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 594 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
595 | 595 | ||
596 | #include <linux/netfilter/nfnetlink.h> | 596 | #include <linux/netfilter/nfnetlink.h> |
597 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 597 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
598 | 598 | ||
599 | static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], | 599 | static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], |
600 | struct net *net, void *data) | 600 | struct net *net, void *data) |
601 | { | 601 | { |
602 | unsigned int *timeouts = data; | 602 | unsigned int *timeouts = data; |
603 | struct nf_sctp_net *sn = sctp_pernet(net); | 603 | struct nf_sctp_net *sn = sctp_pernet(net); |
604 | int i; | 604 | int i; |
605 | 605 | ||
606 | /* set default SCTP timeouts. */ | 606 | /* set default SCTP timeouts. */ |
607 | for (i=0; i<SCTP_CONNTRACK_MAX; i++) | 607 | for (i=0; i<SCTP_CONNTRACK_MAX; i++) |
608 | timeouts[i] = sn->timeouts[i]; | 608 | timeouts[i] = sn->timeouts[i]; |
609 | 609 | ||
610 | /* there's a 1:1 mapping between attributes and protocol states. */ | 610 | /* there's a 1:1 mapping between attributes and protocol states. */ |
611 | for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { | 611 | for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { |
612 | if (tb[i]) { | 612 | if (tb[i]) { |
613 | timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; | 613 | timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; |
614 | } | 614 | } |
615 | } | 615 | } |
616 | return 0; | 616 | return 0; |
617 | } | 617 | } |
618 | 618 | ||
619 | static int | 619 | static int |
620 | sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 620 | sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
621 | { | 621 | { |
622 | const unsigned int *timeouts = data; | 622 | const unsigned int *timeouts = data; |
623 | int i; | 623 | int i; |
624 | 624 | ||
625 | for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { | 625 | for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { |
626 | if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) | 626 | if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) |
627 | goto nla_put_failure; | 627 | goto nla_put_failure; |
628 | } | 628 | } |
629 | return 0; | 629 | return 0; |
630 | 630 | ||
631 | nla_put_failure: | 631 | nla_put_failure: |
632 | return -ENOSPC; | 632 | return -ENOSPC; |
633 | } | 633 | } |
634 | 634 | ||
635 | static const struct nla_policy | 635 | static const struct nla_policy |
636 | sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { | 636 | sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { |
637 | [CTA_TIMEOUT_SCTP_CLOSED] = { .type = NLA_U32 }, | 637 | [CTA_TIMEOUT_SCTP_CLOSED] = { .type = NLA_U32 }, |
638 | [CTA_TIMEOUT_SCTP_COOKIE_WAIT] = { .type = NLA_U32 }, | 638 | [CTA_TIMEOUT_SCTP_COOKIE_WAIT] = { .type = NLA_U32 }, |
639 | [CTA_TIMEOUT_SCTP_COOKIE_ECHOED] = { .type = NLA_U32 }, | 639 | [CTA_TIMEOUT_SCTP_COOKIE_ECHOED] = { .type = NLA_U32 }, |
640 | [CTA_TIMEOUT_SCTP_ESTABLISHED] = { .type = NLA_U32 }, | 640 | [CTA_TIMEOUT_SCTP_ESTABLISHED] = { .type = NLA_U32 }, |
641 | [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, | 641 | [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, |
642 | [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, | 642 | [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, |
643 | [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, | 643 | [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, |
644 | [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, | 644 | [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, |
645 | [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, | 645 | [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, |
646 | }; | 646 | }; |
647 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 647 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
648 | 648 | ||
649 | 649 | ||
650 | #ifdef CONFIG_SYSCTL | 650 | #ifdef CONFIG_SYSCTL |
651 | static struct ctl_table sctp_sysctl_table[] = { | 651 | static struct ctl_table sctp_sysctl_table[] = { |
652 | { | 652 | { |
653 | .procname = "nf_conntrack_sctp_timeout_closed", | 653 | .procname = "nf_conntrack_sctp_timeout_closed", |
654 | .maxlen = sizeof(unsigned int), | 654 | .maxlen = sizeof(unsigned int), |
655 | .mode = 0644, | 655 | .mode = 0644, |
656 | .proc_handler = proc_dointvec_jiffies, | 656 | .proc_handler = proc_dointvec_jiffies, |
657 | }, | 657 | }, |
658 | { | 658 | { |
659 | .procname = "nf_conntrack_sctp_timeout_cookie_wait", | 659 | .procname = "nf_conntrack_sctp_timeout_cookie_wait", |
660 | .maxlen = sizeof(unsigned int), | 660 | .maxlen = sizeof(unsigned int), |
661 | .mode = 0644, | 661 | .mode = 0644, |
662 | .proc_handler = proc_dointvec_jiffies, | 662 | .proc_handler = proc_dointvec_jiffies, |
663 | }, | 663 | }, |
664 | { | 664 | { |
665 | .procname = "nf_conntrack_sctp_timeout_cookie_echoed", | 665 | .procname = "nf_conntrack_sctp_timeout_cookie_echoed", |
666 | .maxlen = sizeof(unsigned int), | 666 | .maxlen = sizeof(unsigned int), |
667 | .mode = 0644, | 667 | .mode = 0644, |
668 | .proc_handler = proc_dointvec_jiffies, | 668 | .proc_handler = proc_dointvec_jiffies, |
669 | }, | 669 | }, |
670 | { | 670 | { |
671 | .procname = "nf_conntrack_sctp_timeout_established", | 671 | .procname = "nf_conntrack_sctp_timeout_established", |
672 | .maxlen = sizeof(unsigned int), | 672 | .maxlen = sizeof(unsigned int), |
673 | .mode = 0644, | 673 | .mode = 0644, |
674 | .proc_handler = proc_dointvec_jiffies, | 674 | .proc_handler = proc_dointvec_jiffies, |
675 | }, | 675 | }, |
676 | { | 676 | { |
677 | .procname = "nf_conntrack_sctp_timeout_shutdown_sent", | 677 | .procname = "nf_conntrack_sctp_timeout_shutdown_sent", |
678 | .maxlen = sizeof(unsigned int), | 678 | .maxlen = sizeof(unsigned int), |
679 | .mode = 0644, | 679 | .mode = 0644, |
680 | .proc_handler = proc_dointvec_jiffies, | 680 | .proc_handler = proc_dointvec_jiffies, |
681 | }, | 681 | }, |
682 | { | 682 | { |
683 | .procname = "nf_conntrack_sctp_timeout_shutdown_recd", | 683 | .procname = "nf_conntrack_sctp_timeout_shutdown_recd", |
684 | .maxlen = sizeof(unsigned int), | 684 | .maxlen = sizeof(unsigned int), |
685 | .mode = 0644, | 685 | .mode = 0644, |
686 | .proc_handler = proc_dointvec_jiffies, | 686 | .proc_handler = proc_dointvec_jiffies, |
687 | }, | 687 | }, |
688 | { | 688 | { |
689 | .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", | 689 | .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", |
690 | .maxlen = sizeof(unsigned int), | 690 | .maxlen = sizeof(unsigned int), |
691 | .mode = 0644, | 691 | .mode = 0644, |
692 | .proc_handler = proc_dointvec_jiffies, | 692 | .proc_handler = proc_dointvec_jiffies, |
693 | }, | 693 | }, |
694 | { | 694 | { |
695 | .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", | 695 | .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", |
696 | .maxlen = sizeof(unsigned int), | 696 | .maxlen = sizeof(unsigned int), |
697 | .mode = 0644, | 697 | .mode = 0644, |
698 | .proc_handler = proc_dointvec_jiffies, | 698 | .proc_handler = proc_dointvec_jiffies, |
699 | }, | 699 | }, |
700 | { | 700 | { |
701 | .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", | 701 | .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", |
702 | .maxlen = sizeof(unsigned int), | 702 | .maxlen = sizeof(unsigned int), |
703 | .mode = 0644, | 703 | .mode = 0644, |
704 | .proc_handler = proc_dointvec_jiffies, | 704 | .proc_handler = proc_dointvec_jiffies, |
705 | }, | 705 | }, |
706 | { } | 706 | { } |
707 | }; | 707 | }; |
708 | #endif | 708 | #endif |
709 | 709 | ||
710 | static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, | 710 | static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, |
711 | struct nf_sctp_net *sn) | 711 | struct nf_sctp_net *sn) |
712 | { | 712 | { |
713 | #ifdef CONFIG_SYSCTL | 713 | #ifdef CONFIG_SYSCTL |
714 | if (pn->ctl_table) | 714 | if (pn->ctl_table) |
715 | return 0; | 715 | return 0; |
716 | 716 | ||
717 | pn->ctl_table = kmemdup(sctp_sysctl_table, | 717 | pn->ctl_table = kmemdup(sctp_sysctl_table, |
718 | sizeof(sctp_sysctl_table), | 718 | sizeof(sctp_sysctl_table), |
719 | GFP_KERNEL); | 719 | GFP_KERNEL); |
720 | if (!pn->ctl_table) | 720 | if (!pn->ctl_table) |
721 | return -ENOMEM; | 721 | return -ENOMEM; |
722 | 722 | ||
723 | pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; | 723 | pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; |
724 | pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; | 724 | pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; |
725 | pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; | 725 | pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; |
726 | pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; | 726 | pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; |
727 | pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; | 727 | pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; |
728 | pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; | 728 | pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; |
729 | pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; | 729 | pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; |
730 | pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; | 730 | pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; |
731 | pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; | 731 | pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; |
732 | #endif | 732 | #endif |
733 | return 0; | 733 | return 0; |
734 | } | 734 | } |
735 | 735 | ||
736 | static int sctp_init_net(struct net *net, u_int16_t proto) | 736 | static int sctp_init_net(struct net *net, u_int16_t proto) |
737 | { | 737 | { |
738 | struct nf_sctp_net *sn = sctp_pernet(net); | 738 | struct nf_sctp_net *sn = sctp_pernet(net); |
739 | struct nf_proto_net *pn = &sn->pn; | 739 | struct nf_proto_net *pn = &sn->pn; |
740 | 740 | ||
741 | if (!pn->users) { | 741 | if (!pn->users) { |
742 | int i; | 742 | int i; |
743 | 743 | ||
744 | for (i = 0; i < SCTP_CONNTRACK_MAX; i++) | 744 | for (i = 0; i < SCTP_CONNTRACK_MAX; i++) |
745 | sn->timeouts[i] = sctp_timeouts[i]; | 745 | sn->timeouts[i] = sctp_timeouts[i]; |
746 | } | 746 | } |
747 | 747 | ||
748 | return sctp_kmemdup_sysctl_table(pn, sn); | 748 | return sctp_kmemdup_sysctl_table(pn, sn); |
749 | } | 749 | } |
750 | 750 | ||
751 | static struct nf_proto_net *sctp_get_net_proto(struct net *net) | 751 | static struct nf_proto_net *sctp_get_net_proto(struct net *net) |
752 | { | 752 | { |
753 | return &net->ct.nf_ct_proto.sctp.pn; | 753 | return &net->ct.nf_ct_proto.sctp.pn; |
754 | } | 754 | } |
755 | 755 | ||
756 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { | 756 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { |
757 | .l3proto = PF_INET, | 757 | .l3proto = PF_INET, |
758 | .l4proto = IPPROTO_SCTP, | 758 | .l4proto = IPPROTO_SCTP, |
759 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 759 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
760 | .print_conntrack = sctp_print_conntrack, | 760 | .print_conntrack = sctp_print_conntrack, |
761 | #endif | 761 | #endif |
762 | .packet = sctp_packet, | 762 | .packet = sctp_packet, |
763 | .get_timeouts = sctp_get_timeouts, | ||
764 | .new = sctp_new, | 763 | .new = sctp_new, |
765 | .error = sctp_error, | 764 | .error = sctp_error, |
766 | .can_early_drop = sctp_can_early_drop, | 765 | .can_early_drop = sctp_can_early_drop, |
767 | .me = THIS_MODULE, | 766 | .me = THIS_MODULE, |
768 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 767 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
769 | .nlattr_size = SCTP_NLATTR_SIZE, | 768 | .nlattr_size = SCTP_NLATTR_SIZE, |
770 | .to_nlattr = sctp_to_nlattr, | 769 | .to_nlattr = sctp_to_nlattr, |
771 | .from_nlattr = nlattr_to_sctp, | 770 | .from_nlattr = nlattr_to_sctp, |
772 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 771 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
773 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 772 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
774 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 773 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
775 | .nla_policy = nf_ct_port_nla_policy, | 774 | .nla_policy = nf_ct_port_nla_policy, |
776 | #endif | 775 | #endif |
777 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 776 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
778 | .ctnl_timeout = { | 777 | .ctnl_timeout = { |
779 | .nlattr_to_obj = sctp_timeout_nlattr_to_obj, | 778 | .nlattr_to_obj = sctp_timeout_nlattr_to_obj, |
780 | .obj_to_nlattr = sctp_timeout_obj_to_nlattr, | 779 | .obj_to_nlattr = sctp_timeout_obj_to_nlattr, |
781 | .nlattr_max = CTA_TIMEOUT_SCTP_MAX, | 780 | .nlattr_max = CTA_TIMEOUT_SCTP_MAX, |
782 | .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, | 781 | .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, |
783 | .nla_policy = sctp_timeout_nla_policy, | 782 | .nla_policy = sctp_timeout_nla_policy, |
784 | }, | 783 | }, |
785 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 784 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
786 | .init_net = sctp_init_net, | 785 | .init_net = sctp_init_net, |
787 | .get_net_proto = sctp_get_net_proto, | 786 | .get_net_proto = sctp_get_net_proto, |
788 | }; | 787 | }; |
789 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); | 788 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); |
790 | 789 | ||
791 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { | 790 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { |
792 | .l3proto = PF_INET6, | 791 | .l3proto = PF_INET6, |
793 | .l4proto = IPPROTO_SCTP, | 792 | .l4proto = IPPROTO_SCTP, |
794 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 793 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
795 | .print_conntrack = sctp_print_conntrack, | 794 | .print_conntrack = sctp_print_conntrack, |
796 | #endif | 795 | #endif |
797 | .packet = sctp_packet, | 796 | .packet = sctp_packet, |
798 | .get_timeouts = sctp_get_timeouts, | ||
799 | .new = sctp_new, | 797 | .new = sctp_new, |
800 | .error = sctp_error, | 798 | .error = sctp_error, |
801 | .can_early_drop = sctp_can_early_drop, | 799 | .can_early_drop = sctp_can_early_drop, |
802 | .me = THIS_MODULE, | 800 | .me = THIS_MODULE, |
803 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 801 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
804 | .nlattr_size = SCTP_NLATTR_SIZE, | 802 | .nlattr_size = SCTP_NLATTR_SIZE, |
805 | .to_nlattr = sctp_to_nlattr, | 803 | .to_nlattr = sctp_to_nlattr, |
806 | .from_nlattr = nlattr_to_sctp, | 804 | .from_nlattr = nlattr_to_sctp, |
807 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 805 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
808 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 806 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
809 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 807 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
810 | .nla_policy = nf_ct_port_nla_policy, | 808 | .nla_policy = nf_ct_port_nla_policy, |
811 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 809 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
812 | .ctnl_timeout = { | 810 | .ctnl_timeout = { |
813 | .nlattr_to_obj = sctp_timeout_nlattr_to_obj, | 811 | .nlattr_to_obj = sctp_timeout_nlattr_to_obj, |
814 | .obj_to_nlattr = sctp_timeout_obj_to_nlattr, | 812 | .obj_to_nlattr = sctp_timeout_obj_to_nlattr, |
815 | .nlattr_max = CTA_TIMEOUT_SCTP_MAX, | 813 | .nlattr_max = CTA_TIMEOUT_SCTP_MAX, |
816 | .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, | 814 | .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, |
817 | .nla_policy = sctp_timeout_nla_policy, | 815 | .nla_policy = sctp_timeout_nla_policy, |
818 | }, | 816 | }, |
819 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 817 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
net/netfilter/nf_conntrack_proto_tcp.c
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> |
3 | * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> | 3 | * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> |
4 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> | 4 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/timer.h> | 12 | #include <linux/timer.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/in.h> | 14 | #include <linux/in.h> |
15 | #include <linux/tcp.h> | 15 | #include <linux/tcp.h> |
16 | #include <linux/spinlock.h> | 16 | #include <linux/spinlock.h> |
17 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
18 | #include <linux/ipv6.h> | 18 | #include <linux/ipv6.h> |
19 | #include <net/ip6_checksum.h> | 19 | #include <net/ip6_checksum.h> |
20 | #include <asm/unaligned.h> | 20 | #include <asm/unaligned.h> |
21 | 21 | ||
22 | #include <net/tcp.h> | 22 | #include <net/tcp.h> |
23 | 23 | ||
24 | #include <linux/netfilter.h> | 24 | #include <linux/netfilter.h> |
25 | #include <linux/netfilter_ipv4.h> | 25 | #include <linux/netfilter_ipv4.h> |
26 | #include <linux/netfilter_ipv6.h> | 26 | #include <linux/netfilter_ipv6.h> |
27 | #include <net/netfilter/nf_conntrack.h> | 27 | #include <net/netfilter/nf_conntrack.h> |
28 | #include <net/netfilter/nf_conntrack_l4proto.h> | 28 | #include <net/netfilter/nf_conntrack_l4proto.h> |
29 | #include <net/netfilter/nf_conntrack_ecache.h> | 29 | #include <net/netfilter/nf_conntrack_ecache.h> |
30 | #include <net/netfilter/nf_conntrack_seqadj.h> | 30 | #include <net/netfilter/nf_conntrack_seqadj.h> |
31 | #include <net/netfilter/nf_conntrack_synproxy.h> | 31 | #include <net/netfilter/nf_conntrack_synproxy.h> |
32 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
32 | #include <net/netfilter/nf_log.h> | 33 | #include <net/netfilter/nf_log.h> |
33 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 34 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
34 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> | 35 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> |
35 | 36 | ||
36 | /* "Be conservative in what you do, | 37 | /* "Be conservative in what you do, |
37 | be liberal in what you accept from others." | 38 | be liberal in what you accept from others." |
38 | If it's non-zero, we mark only out of window RST segments as INVALID. */ | 39 | If it's non-zero, we mark only out of window RST segments as INVALID. */ |
39 | static int nf_ct_tcp_be_liberal __read_mostly = 0; | 40 | static int nf_ct_tcp_be_liberal __read_mostly = 0; |
40 | 41 | ||
41 | /* If it is set to zero, we disable picking up already established | 42 | /* If it is set to zero, we disable picking up already established |
42 | connections. */ | 43 | connections. */ |
43 | static int nf_ct_tcp_loose __read_mostly = 1; | 44 | static int nf_ct_tcp_loose __read_mostly = 1; |
44 | 45 | ||
45 | /* Max number of the retransmitted packets without receiving an (acceptable) | 46 | /* Max number of the retransmitted packets without receiving an (acceptable) |
46 | ACK from the destination. If this number is reached, a shorter timer | 47 | ACK from the destination. If this number is reached, a shorter timer |
47 | will be started. */ | 48 | will be started. */ |
48 | static int nf_ct_tcp_max_retrans __read_mostly = 3; | 49 | static int nf_ct_tcp_max_retrans __read_mostly = 3; |
49 | 50 | ||
50 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more | 51 | /* FIXME: Examine ipfilter's timeouts and conntrack transitions more |
51 | closely. They're more complex. --RR */ | 52 | closely. They're more complex. --RR */ |
52 | 53 | ||
53 | static const char *const tcp_conntrack_names[] = { | 54 | static const char *const tcp_conntrack_names[] = { |
54 | "NONE", | 55 | "NONE", |
55 | "SYN_SENT", | 56 | "SYN_SENT", |
56 | "SYN_RECV", | 57 | "SYN_RECV", |
57 | "ESTABLISHED", | 58 | "ESTABLISHED", |
58 | "FIN_WAIT", | 59 | "FIN_WAIT", |
59 | "CLOSE_WAIT", | 60 | "CLOSE_WAIT", |
60 | "LAST_ACK", | 61 | "LAST_ACK", |
61 | "TIME_WAIT", | 62 | "TIME_WAIT", |
62 | "CLOSE", | 63 | "CLOSE", |
63 | "SYN_SENT2", | 64 | "SYN_SENT2", |
64 | }; | 65 | }; |
65 | 66 | ||
66 | #define SECS * HZ | 67 | #define SECS * HZ |
67 | #define MINS * 60 SECS | 68 | #define MINS * 60 SECS |
68 | #define HOURS * 60 MINS | 69 | #define HOURS * 60 MINS |
69 | #define DAYS * 24 HOURS | 70 | #define DAYS * 24 HOURS |
70 | 71 | ||
71 | static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = { | 72 | static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = { |
72 | [TCP_CONNTRACK_SYN_SENT] = 2 MINS, | 73 | [TCP_CONNTRACK_SYN_SENT] = 2 MINS, |
73 | [TCP_CONNTRACK_SYN_RECV] = 60 SECS, | 74 | [TCP_CONNTRACK_SYN_RECV] = 60 SECS, |
74 | [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, | 75 | [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, |
75 | [TCP_CONNTRACK_FIN_WAIT] = 2 MINS, | 76 | [TCP_CONNTRACK_FIN_WAIT] = 2 MINS, |
76 | [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS, | 77 | [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS, |
77 | [TCP_CONNTRACK_LAST_ACK] = 30 SECS, | 78 | [TCP_CONNTRACK_LAST_ACK] = 30 SECS, |
78 | [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, | 79 | [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, |
79 | [TCP_CONNTRACK_CLOSE] = 10 SECS, | 80 | [TCP_CONNTRACK_CLOSE] = 10 SECS, |
80 | [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, | 81 | [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, |
81 | /* RFC1122 says the R2 limit should be at least 100 seconds. | 82 | /* RFC1122 says the R2 limit should be at least 100 seconds. |
82 | Linux uses 15 packets as limit, which corresponds | 83 | Linux uses 15 packets as limit, which corresponds |
83 | to ~13-30min depending on RTO. */ | 84 | to ~13-30min depending on RTO. */ |
84 | [TCP_CONNTRACK_RETRANS] = 5 MINS, | 85 | [TCP_CONNTRACK_RETRANS] = 5 MINS, |
85 | [TCP_CONNTRACK_UNACK] = 5 MINS, | 86 | [TCP_CONNTRACK_UNACK] = 5 MINS, |
86 | }; | 87 | }; |
87 | 88 | ||
88 | #define sNO TCP_CONNTRACK_NONE | 89 | #define sNO TCP_CONNTRACK_NONE |
89 | #define sSS TCP_CONNTRACK_SYN_SENT | 90 | #define sSS TCP_CONNTRACK_SYN_SENT |
90 | #define sSR TCP_CONNTRACK_SYN_RECV | 91 | #define sSR TCP_CONNTRACK_SYN_RECV |
91 | #define sES TCP_CONNTRACK_ESTABLISHED | 92 | #define sES TCP_CONNTRACK_ESTABLISHED |
92 | #define sFW TCP_CONNTRACK_FIN_WAIT | 93 | #define sFW TCP_CONNTRACK_FIN_WAIT |
93 | #define sCW TCP_CONNTRACK_CLOSE_WAIT | 94 | #define sCW TCP_CONNTRACK_CLOSE_WAIT |
94 | #define sLA TCP_CONNTRACK_LAST_ACK | 95 | #define sLA TCP_CONNTRACK_LAST_ACK |
95 | #define sTW TCP_CONNTRACK_TIME_WAIT | 96 | #define sTW TCP_CONNTRACK_TIME_WAIT |
96 | #define sCL TCP_CONNTRACK_CLOSE | 97 | #define sCL TCP_CONNTRACK_CLOSE |
97 | #define sS2 TCP_CONNTRACK_SYN_SENT2 | 98 | #define sS2 TCP_CONNTRACK_SYN_SENT2 |
98 | #define sIV TCP_CONNTRACK_MAX | 99 | #define sIV TCP_CONNTRACK_MAX |
99 | #define sIG TCP_CONNTRACK_IGNORE | 100 | #define sIG TCP_CONNTRACK_IGNORE |
100 | 101 | ||
101 | /* What TCP flags are set from RST/SYN/FIN/ACK. */ | 102 | /* What TCP flags are set from RST/SYN/FIN/ACK. */ |
102 | enum tcp_bit_set { | 103 | enum tcp_bit_set { |
103 | TCP_SYN_SET, | 104 | TCP_SYN_SET, |
104 | TCP_SYNACK_SET, | 105 | TCP_SYNACK_SET, |
105 | TCP_FIN_SET, | 106 | TCP_FIN_SET, |
106 | TCP_ACK_SET, | 107 | TCP_ACK_SET, |
107 | TCP_RST_SET, | 108 | TCP_RST_SET, |
108 | TCP_NONE_SET, | 109 | TCP_NONE_SET, |
109 | }; | 110 | }; |
110 | 111 | ||
111 | /* | 112 | /* |
112 | * The TCP state transition table needs a few words... | 113 | * The TCP state transition table needs a few words... |
113 | * | 114 | * |
114 | * We are the man in the middle. All the packets go through us | 115 | * We are the man in the middle. All the packets go through us |
115 | * but might get lost in transit to the destination. | 116 | * but might get lost in transit to the destination. |
116 | * It is assumed that the destinations can't receive segments | 117 | * It is assumed that the destinations can't receive segments |
117 | * we haven't seen. | 118 | * we haven't seen. |
118 | * | 119 | * |
119 | * The checked segment is in window, but our windows are *not* | 120 | * The checked segment is in window, but our windows are *not* |
120 | * equivalent with the ones of the sender/receiver. We always | 121 | * equivalent with the ones of the sender/receiver. We always |
121 | * try to guess the state of the current sender. | 122 | * try to guess the state of the current sender. |
122 | * | 123 | * |
123 | * The meaning of the states are: | 124 | * The meaning of the states are: |
124 | * | 125 | * |
125 | * NONE: initial state | 126 | * NONE: initial state |
126 | * SYN_SENT: SYN-only packet seen | 127 | * SYN_SENT: SYN-only packet seen |
127 | * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open | 128 | * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open |
128 | * SYN_RECV: SYN-ACK packet seen | 129 | * SYN_RECV: SYN-ACK packet seen |
129 | * ESTABLISHED: ACK packet seen | 130 | * ESTABLISHED: ACK packet seen |
130 | * FIN_WAIT: FIN packet seen | 131 | * FIN_WAIT: FIN packet seen |
131 | * CLOSE_WAIT: ACK seen (after FIN) | 132 | * CLOSE_WAIT: ACK seen (after FIN) |
132 | * LAST_ACK: FIN seen (after FIN) | 133 | * LAST_ACK: FIN seen (after FIN) |
133 | * TIME_WAIT: last ACK seen | 134 | * TIME_WAIT: last ACK seen |
134 | * CLOSE: closed connection (RST) | 135 | * CLOSE: closed connection (RST) |
135 | * | 136 | * |
136 | * Packets marked as IGNORED (sIG): | 137 | * Packets marked as IGNORED (sIG): |
137 | * if they may be either invalid or valid | 138 | * if they may be either invalid or valid |
138 | * and the receiver may send back a connection | 139 | * and the receiver may send back a connection |
139 | * closing RST or a SYN/ACK. | 140 | * closing RST or a SYN/ACK. |
140 | * | 141 | * |
141 | * Packets marked as INVALID (sIV): | 142 | * Packets marked as INVALID (sIV): |
142 | * if we regard them as truly invalid packets | 143 | * if we regard them as truly invalid packets |
143 | */ | 144 | */ |
144 | static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { | 145 | static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { |
145 | { | 146 | { |
146 | /* ORIGINAL */ | 147 | /* ORIGINAL */ |
147 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 148 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
148 | /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, | 149 | /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, |
149 | /* | 150 | /* |
150 | * sNO -> sSS Initialize a new connection | 151 | * sNO -> sSS Initialize a new connection |
151 | * sSS -> sSS Retransmitted SYN | 152 | * sSS -> sSS Retransmitted SYN |
152 | * sS2 -> sS2 Late retransmitted SYN | 153 | * sS2 -> sS2 Late retransmitted SYN |
153 | * sSR -> sIG | 154 | * sSR -> sIG |
154 | * sES -> sIG Error: SYNs in window outside the SYN_SENT state | 155 | * sES -> sIG Error: SYNs in window outside the SYN_SENT state |
155 | * are errors. Receiver will reply with RST | 156 | * are errors. Receiver will reply with RST |
156 | * and close the connection. | 157 | * and close the connection. |
157 | * Or we are not in sync and hold a dead connection. | 158 | * Or we are not in sync and hold a dead connection. |
158 | * sFW -> sIG | 159 | * sFW -> sIG |
159 | * sCW -> sIG | 160 | * sCW -> sIG |
160 | * sLA -> sIG | 161 | * sLA -> sIG |
161 | * sTW -> sSS Reopened connection (RFC 1122). | 162 | * sTW -> sSS Reopened connection (RFC 1122). |
162 | * sCL -> sSS | 163 | * sCL -> sSS |
163 | */ | 164 | */ |
164 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 165 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
165 | /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, | 166 | /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, |
166 | /* | 167 | /* |
167 | * sNO -> sIV Too late and no reason to do anything | 168 | * sNO -> sIV Too late and no reason to do anything |
168 | * sSS -> sIV Client can't send SYN and then SYN/ACK | 169 | * sSS -> sIV Client can't send SYN and then SYN/ACK |
169 | * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open | 170 | * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open |
170 | * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open | 171 | * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open |
171 | * sES -> sIV Invalid SYN/ACK packets sent by the client | 172 | * sES -> sIV Invalid SYN/ACK packets sent by the client |
172 | * sFW -> sIV | 173 | * sFW -> sIV |
173 | * sCW -> sIV | 174 | * sCW -> sIV |
174 | * sLA -> sIV | 175 | * sLA -> sIV |
175 | * sTW -> sIV | 176 | * sTW -> sIV |
176 | * sCL -> sIV | 177 | * sCL -> sIV |
177 | */ | 178 | */ |
178 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 179 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
179 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | 180 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, |
180 | /* | 181 | /* |
181 | * sNO -> sIV Too late and no reason to do anything... | 182 | * sNO -> sIV Too late and no reason to do anything... |
182 | * sSS -> sIV Client migth not send FIN in this state: | 183 | * sSS -> sIV Client migth not send FIN in this state: |
183 | * we enforce waiting for a SYN/ACK reply first. | 184 | * we enforce waiting for a SYN/ACK reply first. |
184 | * sS2 -> sIV | 185 | * sS2 -> sIV |
185 | * sSR -> sFW Close started. | 186 | * sSR -> sFW Close started. |
186 | * sES -> sFW | 187 | * sES -> sFW |
187 | * sFW -> sLA FIN seen in both directions, waiting for | 188 | * sFW -> sLA FIN seen in both directions, waiting for |
188 | * the last ACK. | 189 | * the last ACK. |
189 | * Migth be a retransmitted FIN as well... | 190 | * Migth be a retransmitted FIN as well... |
190 | * sCW -> sLA | 191 | * sCW -> sLA |
191 | * sLA -> sLA Retransmitted FIN. Remain in the same state. | 192 | * sLA -> sLA Retransmitted FIN. Remain in the same state. |
192 | * sTW -> sTW | 193 | * sTW -> sTW |
193 | * sCL -> sCL | 194 | * sCL -> sCL |
194 | */ | 195 | */ |
195 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 196 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
196 | /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, | 197 | /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, |
197 | /* | 198 | /* |
198 | * sNO -> sES Assumed. | 199 | * sNO -> sES Assumed. |
199 | * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. | 200 | * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. |
200 | * sS2 -> sIV | 201 | * sS2 -> sIV |
201 | * sSR -> sES Established state is reached. | 202 | * sSR -> sES Established state is reached. |
202 | * sES -> sES :-) | 203 | * sES -> sES :-) |
203 | * sFW -> sCW Normal close request answered by ACK. | 204 | * sFW -> sCW Normal close request answered by ACK. |
204 | * sCW -> sCW | 205 | * sCW -> sCW |
205 | * sLA -> sTW Last ACK detected (RFC5961 challenged) | 206 | * sLA -> sTW Last ACK detected (RFC5961 challenged) |
206 | * sTW -> sTW Retransmitted last ACK. Remain in the same state. | 207 | * sTW -> sTW Retransmitted last ACK. Remain in the same state. |
207 | * sCL -> sCL | 208 | * sCL -> sCL |
208 | */ | 209 | */ |
209 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 210 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
210 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, | 211 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, |
211 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | 212 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } |
212 | }, | 213 | }, |
213 | { | 214 | { |
214 | /* REPLY */ | 215 | /* REPLY */ |
215 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 216 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
216 | /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, | 217 | /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, |
217 | /* | 218 | /* |
218 | * sNO -> sIV Never reached. | 219 | * sNO -> sIV Never reached. |
219 | * sSS -> sS2 Simultaneous open | 220 | * sSS -> sS2 Simultaneous open |
220 | * sS2 -> sS2 Retransmitted simultaneous SYN | 221 | * sS2 -> sS2 Retransmitted simultaneous SYN |
221 | * sSR -> sIV Invalid SYN packets sent by the server | 222 | * sSR -> sIV Invalid SYN packets sent by the server |
222 | * sES -> sIV | 223 | * sES -> sIV |
223 | * sFW -> sIV | 224 | * sFW -> sIV |
224 | * sCW -> sIV | 225 | * sCW -> sIV |
225 | * sLA -> sIV | 226 | * sLA -> sIV |
226 | * sTW -> sSS Reopened connection, but server may have switched role | 227 | * sTW -> sSS Reopened connection, but server may have switched role |
227 | * sCL -> sIV | 228 | * sCL -> sIV |
228 | */ | 229 | */ |
229 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 230 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
230 | /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, | 231 | /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, |
231 | /* | 232 | /* |
232 | * sSS -> sSR Standard open. | 233 | * sSS -> sSR Standard open. |
233 | * sS2 -> sSR Simultaneous open | 234 | * sS2 -> sSR Simultaneous open |
234 | * sSR -> sIG Retransmitted SYN/ACK, ignore it. | 235 | * sSR -> sIG Retransmitted SYN/ACK, ignore it. |
235 | * sES -> sIG Late retransmitted SYN/ACK? | 236 | * sES -> sIG Late retransmitted SYN/ACK? |
236 | * sFW -> sIG Might be SYN/ACK answering ignored SYN | 237 | * sFW -> sIG Might be SYN/ACK answering ignored SYN |
237 | * sCW -> sIG | 238 | * sCW -> sIG |
238 | * sLA -> sIG | 239 | * sLA -> sIG |
239 | * sTW -> sIG | 240 | * sTW -> sIG |
240 | * sCL -> sIG | 241 | * sCL -> sIG |
241 | */ | 242 | */ |
242 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 243 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
243 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, | 244 | /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, |
244 | /* | 245 | /* |
245 | * sSS -> sIV Server might not send FIN in this state. | 246 | * sSS -> sIV Server might not send FIN in this state. |
246 | * sS2 -> sIV | 247 | * sS2 -> sIV |
247 | * sSR -> sFW Close started. | 248 | * sSR -> sFW Close started. |
248 | * sES -> sFW | 249 | * sES -> sFW |
249 | * sFW -> sLA FIN seen in both directions. | 250 | * sFW -> sLA FIN seen in both directions. |
250 | * sCW -> sLA | 251 | * sCW -> sLA |
251 | * sLA -> sLA Retransmitted FIN. | 252 | * sLA -> sLA Retransmitted FIN. |
252 | * sTW -> sTW | 253 | * sTW -> sTW |
253 | * sCL -> sCL | 254 | * sCL -> sCL |
254 | */ | 255 | */ |
255 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 256 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
256 | /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, | 257 | /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, |
257 | /* | 258 | /* |
258 | * sSS -> sIG Might be a half-open connection. | 259 | * sSS -> sIG Might be a half-open connection. |
259 | * sS2 -> sIG | 260 | * sS2 -> sIG |
260 | * sSR -> sSR Might answer late resent SYN. | 261 | * sSR -> sSR Might answer late resent SYN. |
261 | * sES -> sES :-) | 262 | * sES -> sES :-) |
262 | * sFW -> sCW Normal close request answered by ACK. | 263 | * sFW -> sCW Normal close request answered by ACK. |
263 | * sCW -> sCW | 264 | * sCW -> sCW |
264 | * sLA -> sTW Last ACK detected (RFC5961 challenged) | 265 | * sLA -> sTW Last ACK detected (RFC5961 challenged) |
265 | * sTW -> sTW Retransmitted last ACK. | 266 | * sTW -> sTW Retransmitted last ACK. |
266 | * sCL -> sCL | 267 | * sCL -> sCL |
267 | */ | 268 | */ |
268 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ | 269 | /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ |
269 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, | 270 | /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, |
270 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } | 271 | /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } |
271 | } | 272 | } |
272 | }; | 273 | }; |
273 | 274 | ||
274 | static inline struct nf_tcp_net *tcp_pernet(struct net *net) | 275 | static inline struct nf_tcp_net *tcp_pernet(struct net *net) |
275 | { | 276 | { |
276 | return &net->ct.nf_ct_proto.tcp; | 277 | return &net->ct.nf_ct_proto.tcp; |
277 | } | 278 | } |
278 | 279 | ||
279 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 280 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
280 | /* Print out the private part of the conntrack. */ | 281 | /* Print out the private part of the conntrack. */ |
281 | static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) | 282 | static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) |
282 | { | 283 | { |
283 | if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) | 284 | if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
284 | return; | 285 | return; |
285 | 286 | ||
286 | seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); | 287 | seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); |
287 | } | 288 | } |
288 | #endif | 289 | #endif |
289 | 290 | ||
290 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) | 291 | static unsigned int get_conntrack_index(const struct tcphdr *tcph) |
291 | { | 292 | { |
292 | if (tcph->rst) return TCP_RST_SET; | 293 | if (tcph->rst) return TCP_RST_SET; |
293 | else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); | 294 | else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); |
294 | else if (tcph->fin) return TCP_FIN_SET; | 295 | else if (tcph->fin) return TCP_FIN_SET; |
295 | else if (tcph->ack) return TCP_ACK_SET; | 296 | else if (tcph->ack) return TCP_ACK_SET; |
296 | else return TCP_NONE_SET; | 297 | else return TCP_NONE_SET; |
297 | } | 298 | } |
298 | 299 | ||
299 | /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering | 300 | /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering |
300 | in IP Filter' by Guido van Rooij. | 301 | in IP Filter' by Guido van Rooij. |
301 | 302 | ||
302 | http://www.sane.nl/events/sane2000/papers.html | 303 | http://www.sane.nl/events/sane2000/papers.html |
303 | http://www.darkart.com/mirrors/www.obfuscation.org/ipf/ | 304 | http://www.darkart.com/mirrors/www.obfuscation.org/ipf/ |
304 | 305 | ||
305 | The boundaries and the conditions are changed according to RFC793: | 306 | The boundaries and the conditions are changed according to RFC793: |
306 | the packet must intersect the window (i.e. segments may be | 307 | the packet must intersect the window (i.e. segments may be |
307 | after the right or before the left edge) and thus receivers may ACK | 308 | after the right or before the left edge) and thus receivers may ACK |
308 | segments after the right edge of the window. | 309 | segments after the right edge of the window. |
309 | 310 | ||
310 | td_maxend = max(sack + max(win,1)) seen in reply packets | 311 | td_maxend = max(sack + max(win,1)) seen in reply packets |
311 | td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets | 312 | td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets |
312 | td_maxwin += seq + len - sender.td_maxend | 313 | td_maxwin += seq + len - sender.td_maxend |
313 | if seq + len > sender.td_maxend | 314 | if seq + len > sender.td_maxend |
314 | td_end = max(seq + len) seen in sent packets | 315 | td_end = max(seq + len) seen in sent packets |
315 | 316 | ||
316 | I. Upper bound for valid data: seq <= sender.td_maxend | 317 | I. Upper bound for valid data: seq <= sender.td_maxend |
317 | II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin | 318 | II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin |
318 | III. Upper bound for valid (s)ack: sack <= receiver.td_end | 319 | III. Upper bound for valid (s)ack: sack <= receiver.td_end |
319 | IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW | 320 | IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW |
320 | 321 | ||
321 | where sack is the highest right edge of sack block found in the packet | 322 | where sack is the highest right edge of sack block found in the packet |
322 | or ack in the case of packet without SACK option. | 323 | or ack in the case of packet without SACK option. |
323 | 324 | ||
324 | The upper bound limit for a valid (s)ack is not ignored - | 325 | The upper bound limit for a valid (s)ack is not ignored - |
325 | we doesn't have to deal with fragments. | 326 | we doesn't have to deal with fragments. |
326 | */ | 327 | */ |
327 | 328 | ||
328 | static inline __u32 segment_seq_plus_len(__u32 seq, | 329 | static inline __u32 segment_seq_plus_len(__u32 seq, |
329 | size_t len, | 330 | size_t len, |
330 | unsigned int dataoff, | 331 | unsigned int dataoff, |
331 | const struct tcphdr *tcph) | 332 | const struct tcphdr *tcph) |
332 | { | 333 | { |
333 | /* XXX Should I use payload length field in IP/IPv6 header ? | 334 | /* XXX Should I use payload length field in IP/IPv6 header ? |
334 | * - YK */ | 335 | * - YK */ |
335 | return (seq + len - dataoff - tcph->doff*4 | 336 | return (seq + len - dataoff - tcph->doff*4 |
336 | + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); | 337 | + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); |
337 | } | 338 | } |
338 | 339 | ||
339 | /* Fixme: what about big packets? */ | 340 | /* Fixme: what about big packets? */ |
340 | #define MAXACKWINCONST 66000 | 341 | #define MAXACKWINCONST 66000 |
341 | #define MAXACKWINDOW(sender) \ | 342 | #define MAXACKWINDOW(sender) \ |
342 | ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ | 343 | ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ |
343 | : MAXACKWINCONST) | 344 | : MAXACKWINCONST) |
344 | 345 | ||
345 | /* | 346 | /* |
346 | * Simplified tcp_parse_options routine from tcp_input.c | 347 | * Simplified tcp_parse_options routine from tcp_input.c |
347 | */ | 348 | */ |
348 | static void tcp_options(const struct sk_buff *skb, | 349 | static void tcp_options(const struct sk_buff *skb, |
349 | unsigned int dataoff, | 350 | unsigned int dataoff, |
350 | const struct tcphdr *tcph, | 351 | const struct tcphdr *tcph, |
351 | struct ip_ct_tcp_state *state) | 352 | struct ip_ct_tcp_state *state) |
352 | { | 353 | { |
353 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | 354 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; |
354 | const unsigned char *ptr; | 355 | const unsigned char *ptr; |
355 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | 356 | int length = (tcph->doff*4) - sizeof(struct tcphdr); |
356 | 357 | ||
357 | if (!length) | 358 | if (!length) |
358 | return; | 359 | return; |
359 | 360 | ||
360 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | 361 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), |
361 | length, buff); | 362 | length, buff); |
362 | BUG_ON(ptr == NULL); | 363 | BUG_ON(ptr == NULL); |
363 | 364 | ||
364 | state->td_scale = | 365 | state->td_scale = |
365 | state->flags = 0; | 366 | state->flags = 0; |
366 | 367 | ||
367 | while (length > 0) { | 368 | while (length > 0) { |
368 | int opcode=*ptr++; | 369 | int opcode=*ptr++; |
369 | int opsize; | 370 | int opsize; |
370 | 371 | ||
371 | switch (opcode) { | 372 | switch (opcode) { |
372 | case TCPOPT_EOL: | 373 | case TCPOPT_EOL: |
373 | return; | 374 | return; |
374 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | 375 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ |
375 | length--; | 376 | length--; |
376 | continue; | 377 | continue; |
377 | default: | 378 | default: |
378 | if (length < 2) | 379 | if (length < 2) |
379 | return; | 380 | return; |
380 | opsize=*ptr++; | 381 | opsize=*ptr++; |
381 | if (opsize < 2) /* "silly options" */ | 382 | if (opsize < 2) /* "silly options" */ |
382 | return; | 383 | return; |
383 | if (opsize > length) | 384 | if (opsize > length) |
384 | return; /* don't parse partial options */ | 385 | return; /* don't parse partial options */ |
385 | 386 | ||
386 | if (opcode == TCPOPT_SACK_PERM | 387 | if (opcode == TCPOPT_SACK_PERM |
387 | && opsize == TCPOLEN_SACK_PERM) | 388 | && opsize == TCPOLEN_SACK_PERM) |
388 | state->flags |= IP_CT_TCP_FLAG_SACK_PERM; | 389 | state->flags |= IP_CT_TCP_FLAG_SACK_PERM; |
389 | else if (opcode == TCPOPT_WINDOW | 390 | else if (opcode == TCPOPT_WINDOW |
390 | && opsize == TCPOLEN_WINDOW) { | 391 | && opsize == TCPOLEN_WINDOW) { |
391 | state->td_scale = *(u_int8_t *)ptr; | 392 | state->td_scale = *(u_int8_t *)ptr; |
392 | 393 | ||
393 | if (state->td_scale > TCP_MAX_WSCALE) | 394 | if (state->td_scale > TCP_MAX_WSCALE) |
394 | state->td_scale = TCP_MAX_WSCALE; | 395 | state->td_scale = TCP_MAX_WSCALE; |
395 | 396 | ||
396 | state->flags |= | 397 | state->flags |= |
397 | IP_CT_TCP_FLAG_WINDOW_SCALE; | 398 | IP_CT_TCP_FLAG_WINDOW_SCALE; |
398 | } | 399 | } |
399 | ptr += opsize - 2; | 400 | ptr += opsize - 2; |
400 | length -= opsize; | 401 | length -= opsize; |
401 | } | 402 | } |
402 | } | 403 | } |
403 | } | 404 | } |
404 | 405 | ||
405 | static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, | 406 | static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, |
406 | const struct tcphdr *tcph, __u32 *sack) | 407 | const struct tcphdr *tcph, __u32 *sack) |
407 | { | 408 | { |
408 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; | 409 | unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; |
409 | const unsigned char *ptr; | 410 | const unsigned char *ptr; |
410 | int length = (tcph->doff*4) - sizeof(struct tcphdr); | 411 | int length = (tcph->doff*4) - sizeof(struct tcphdr); |
411 | __u32 tmp; | 412 | __u32 tmp; |
412 | 413 | ||
413 | if (!length) | 414 | if (!length) |
414 | return; | 415 | return; |
415 | 416 | ||
416 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), | 417 | ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), |
417 | length, buff); | 418 | length, buff); |
418 | BUG_ON(ptr == NULL); | 419 | BUG_ON(ptr == NULL); |
419 | 420 | ||
420 | /* Fast path for timestamp-only option */ | 421 | /* Fast path for timestamp-only option */ |
421 | if (length == TCPOLEN_TSTAMP_ALIGNED | 422 | if (length == TCPOLEN_TSTAMP_ALIGNED |
422 | && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | 423 | && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) |
423 | | (TCPOPT_NOP << 16) | 424 | | (TCPOPT_NOP << 16) |
424 | | (TCPOPT_TIMESTAMP << 8) | 425 | | (TCPOPT_TIMESTAMP << 8) |
425 | | TCPOLEN_TIMESTAMP)) | 426 | | TCPOLEN_TIMESTAMP)) |
426 | return; | 427 | return; |
427 | 428 | ||
428 | while (length > 0) { | 429 | while (length > 0) { |
429 | int opcode = *ptr++; | 430 | int opcode = *ptr++; |
430 | int opsize, i; | 431 | int opsize, i; |
431 | 432 | ||
432 | switch (opcode) { | 433 | switch (opcode) { |
433 | case TCPOPT_EOL: | 434 | case TCPOPT_EOL: |
434 | return; | 435 | return; |
435 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | 436 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ |
436 | length--; | 437 | length--; |
437 | continue; | 438 | continue; |
438 | default: | 439 | default: |
439 | if (length < 2) | 440 | if (length < 2) |
440 | return; | 441 | return; |
441 | opsize = *ptr++; | 442 | opsize = *ptr++; |
442 | if (opsize < 2) /* "silly options" */ | 443 | if (opsize < 2) /* "silly options" */ |
443 | return; | 444 | return; |
444 | if (opsize > length) | 445 | if (opsize > length) |
445 | return; /* don't parse partial options */ | 446 | return; /* don't parse partial options */ |
446 | 447 | ||
447 | if (opcode == TCPOPT_SACK | 448 | if (opcode == TCPOPT_SACK |
448 | && opsize >= (TCPOLEN_SACK_BASE | 449 | && opsize >= (TCPOLEN_SACK_BASE |
449 | + TCPOLEN_SACK_PERBLOCK) | 450 | + TCPOLEN_SACK_PERBLOCK) |
450 | && !((opsize - TCPOLEN_SACK_BASE) | 451 | && !((opsize - TCPOLEN_SACK_BASE) |
451 | % TCPOLEN_SACK_PERBLOCK)) { | 452 | % TCPOLEN_SACK_PERBLOCK)) { |
452 | for (i = 0; | 453 | for (i = 0; |
453 | i < (opsize - TCPOLEN_SACK_BASE); | 454 | i < (opsize - TCPOLEN_SACK_BASE); |
454 | i += TCPOLEN_SACK_PERBLOCK) { | 455 | i += TCPOLEN_SACK_PERBLOCK) { |
455 | tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); | 456 | tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); |
456 | 457 | ||
457 | if (after(tmp, *sack)) | 458 | if (after(tmp, *sack)) |
458 | *sack = tmp; | 459 | *sack = tmp; |
459 | } | 460 | } |
460 | return; | 461 | return; |
461 | } | 462 | } |
462 | ptr += opsize - 2; | 463 | ptr += opsize - 2; |
463 | length -= opsize; | 464 | length -= opsize; |
464 | } | 465 | } |
465 | } | 466 | } |
466 | } | 467 | } |
467 | 468 | ||
468 | static bool tcp_in_window(const struct nf_conn *ct, | 469 | static bool tcp_in_window(const struct nf_conn *ct, |
469 | struct ip_ct_tcp *state, | 470 | struct ip_ct_tcp *state, |
470 | enum ip_conntrack_dir dir, | 471 | enum ip_conntrack_dir dir, |
471 | unsigned int index, | 472 | unsigned int index, |
472 | const struct sk_buff *skb, | 473 | const struct sk_buff *skb, |
473 | unsigned int dataoff, | 474 | unsigned int dataoff, |
474 | const struct tcphdr *tcph) | 475 | const struct tcphdr *tcph) |
475 | { | 476 | { |
476 | struct net *net = nf_ct_net(ct); | 477 | struct net *net = nf_ct_net(ct); |
477 | struct nf_tcp_net *tn = tcp_pernet(net); | 478 | struct nf_tcp_net *tn = tcp_pernet(net); |
478 | struct ip_ct_tcp_state *sender = &state->seen[dir]; | 479 | struct ip_ct_tcp_state *sender = &state->seen[dir]; |
479 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; | 480 | struct ip_ct_tcp_state *receiver = &state->seen[!dir]; |
480 | const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; | 481 | const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; |
481 | __u32 seq, ack, sack, end, win, swin; | 482 | __u32 seq, ack, sack, end, win, swin; |
482 | s32 receiver_offset; | 483 | s32 receiver_offset; |
483 | bool res, in_recv_win; | 484 | bool res, in_recv_win; |
484 | 485 | ||
485 | /* | 486 | /* |
486 | * Get the required data from the packet. | 487 | * Get the required data from the packet. |
487 | */ | 488 | */ |
488 | seq = ntohl(tcph->seq); | 489 | seq = ntohl(tcph->seq); |
489 | ack = sack = ntohl(tcph->ack_seq); | 490 | ack = sack = ntohl(tcph->ack_seq); |
490 | win = ntohs(tcph->window); | 491 | win = ntohs(tcph->window); |
491 | end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); | 492 | end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); |
492 | 493 | ||
493 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) | 494 | if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) |
494 | tcp_sack(skb, dataoff, tcph, &sack); | 495 | tcp_sack(skb, dataoff, tcph, &sack); |
495 | 496 | ||
496 | /* Take into account NAT sequence number mangling */ | 497 | /* Take into account NAT sequence number mangling */ |
497 | receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); | 498 | receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); |
498 | ack -= receiver_offset; | 499 | ack -= receiver_offset; |
499 | sack -= receiver_offset; | 500 | sack -= receiver_offset; |
500 | 501 | ||
501 | pr_debug("tcp_in_window: START\n"); | 502 | pr_debug("tcp_in_window: START\n"); |
502 | pr_debug("tcp_in_window: "); | 503 | pr_debug("tcp_in_window: "); |
503 | nf_ct_dump_tuple(tuple); | 504 | nf_ct_dump_tuple(tuple); |
504 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", | 505 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", |
505 | seq, ack, receiver_offset, sack, receiver_offset, win, end); | 506 | seq, ack, receiver_offset, sack, receiver_offset, win, end); |
506 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | 507 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " |
507 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | 508 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", |
508 | sender->td_end, sender->td_maxend, sender->td_maxwin, | 509 | sender->td_end, sender->td_maxend, sender->td_maxwin, |
509 | sender->td_scale, | 510 | sender->td_scale, |
510 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | 511 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, |
511 | receiver->td_scale); | 512 | receiver->td_scale); |
512 | 513 | ||
513 | if (sender->td_maxwin == 0) { | 514 | if (sender->td_maxwin == 0) { |
514 | /* | 515 | /* |
515 | * Initialize sender data. | 516 | * Initialize sender data. |
516 | */ | 517 | */ |
517 | if (tcph->syn) { | 518 | if (tcph->syn) { |
518 | /* | 519 | /* |
519 | * SYN-ACK in reply to a SYN | 520 | * SYN-ACK in reply to a SYN |
520 | * or SYN from reply direction in simultaneous open. | 521 | * or SYN from reply direction in simultaneous open. |
521 | */ | 522 | */ |
522 | sender->td_end = | 523 | sender->td_end = |
523 | sender->td_maxend = end; | 524 | sender->td_maxend = end; |
524 | sender->td_maxwin = (win == 0 ? 1 : win); | 525 | sender->td_maxwin = (win == 0 ? 1 : win); |
525 | 526 | ||
526 | tcp_options(skb, dataoff, tcph, sender); | 527 | tcp_options(skb, dataoff, tcph, sender); |
527 | /* | 528 | /* |
528 | * RFC 1323: | 529 | * RFC 1323: |
529 | * Both sides must send the Window Scale option | 530 | * Both sides must send the Window Scale option |
530 | * to enable window scaling in either direction. | 531 | * to enable window scaling in either direction. |
531 | */ | 532 | */ |
532 | if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE | 533 | if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE |
533 | && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) | 534 | && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) |
534 | sender->td_scale = | 535 | sender->td_scale = |
535 | receiver->td_scale = 0; | 536 | receiver->td_scale = 0; |
536 | if (!tcph->ack) | 537 | if (!tcph->ack) |
537 | /* Simultaneous open */ | 538 | /* Simultaneous open */ |
538 | return true; | 539 | return true; |
539 | } else { | 540 | } else { |
540 | /* | 541 | /* |
541 | * We are in the middle of a connection, | 542 | * We are in the middle of a connection, |
542 | * its history is lost for us. | 543 | * its history is lost for us. |
543 | * Let's try to use the data from the packet. | 544 | * Let's try to use the data from the packet. |
544 | */ | 545 | */ |
545 | sender->td_end = end; | 546 | sender->td_end = end; |
546 | swin = win << sender->td_scale; | 547 | swin = win << sender->td_scale; |
547 | sender->td_maxwin = (swin == 0 ? 1 : swin); | 548 | sender->td_maxwin = (swin == 0 ? 1 : swin); |
548 | sender->td_maxend = end + sender->td_maxwin; | 549 | sender->td_maxend = end + sender->td_maxwin; |
549 | /* | 550 | /* |
550 | * We haven't seen traffic in the other direction yet | 551 | * We haven't seen traffic in the other direction yet |
551 | * but we have to tweak window tracking to pass III | 552 | * but we have to tweak window tracking to pass III |
552 | * and IV until that happens. | 553 | * and IV until that happens. |
553 | */ | 554 | */ |
554 | if (receiver->td_maxwin == 0) | 555 | if (receiver->td_maxwin == 0) |
555 | receiver->td_end = receiver->td_maxend = sack; | 556 | receiver->td_end = receiver->td_maxend = sack; |
556 | } | 557 | } |
557 | } else if (((state->state == TCP_CONNTRACK_SYN_SENT | 558 | } else if (((state->state == TCP_CONNTRACK_SYN_SENT |
558 | && dir == IP_CT_DIR_ORIGINAL) | 559 | && dir == IP_CT_DIR_ORIGINAL) |
559 | || (state->state == TCP_CONNTRACK_SYN_RECV | 560 | || (state->state == TCP_CONNTRACK_SYN_RECV |
560 | && dir == IP_CT_DIR_REPLY)) | 561 | && dir == IP_CT_DIR_REPLY)) |
561 | && after(end, sender->td_end)) { | 562 | && after(end, sender->td_end)) { |
562 | /* | 563 | /* |
563 | * RFC 793: "if a TCP is reinitialized ... then it need | 564 | * RFC 793: "if a TCP is reinitialized ... then it need |
564 | * not wait at all; it must only be sure to use sequence | 565 | * not wait at all; it must only be sure to use sequence |
565 | * numbers larger than those recently used." | 566 | * numbers larger than those recently used." |
566 | */ | 567 | */ |
567 | sender->td_end = | 568 | sender->td_end = |
568 | sender->td_maxend = end; | 569 | sender->td_maxend = end; |
569 | sender->td_maxwin = (win == 0 ? 1 : win); | 570 | sender->td_maxwin = (win == 0 ? 1 : win); |
570 | 571 | ||
571 | tcp_options(skb, dataoff, tcph, sender); | 572 | tcp_options(skb, dataoff, tcph, sender); |
572 | } | 573 | } |
573 | 574 | ||
574 | if (!(tcph->ack)) { | 575 | if (!(tcph->ack)) { |
575 | /* | 576 | /* |
576 | * If there is no ACK, just pretend it was set and OK. | 577 | * If there is no ACK, just pretend it was set and OK. |
577 | */ | 578 | */ |
578 | ack = sack = receiver->td_end; | 579 | ack = sack = receiver->td_end; |
579 | } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == | 580 | } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == |
580 | (TCP_FLAG_ACK|TCP_FLAG_RST)) | 581 | (TCP_FLAG_ACK|TCP_FLAG_RST)) |
581 | && (ack == 0)) { | 582 | && (ack == 0)) { |
582 | /* | 583 | /* |
583 | * Broken TCP stacks, that set ACK in RST packets as well | 584 | * Broken TCP stacks, that set ACK in RST packets as well |
584 | * with zero ack value. | 585 | * with zero ack value. |
585 | */ | 586 | */ |
586 | ack = sack = receiver->td_end; | 587 | ack = sack = receiver->td_end; |
587 | } | 588 | } |
588 | 589 | ||
589 | if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) | 590 | if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) |
590 | /* | 591 | /* |
591 | * RST sent answering SYN. | 592 | * RST sent answering SYN. |
592 | */ | 593 | */ |
593 | seq = end = sender->td_end; | 594 | seq = end = sender->td_end; |
594 | 595 | ||
595 | pr_debug("tcp_in_window: "); | 596 | pr_debug("tcp_in_window: "); |
596 | nf_ct_dump_tuple(tuple); | 597 | nf_ct_dump_tuple(tuple); |
597 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", | 598 | pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", |
598 | seq, ack, receiver_offset, sack, receiver_offset, win, end); | 599 | seq, ack, receiver_offset, sack, receiver_offset, win, end); |
599 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " | 600 | pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " |
600 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | 601 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", |
601 | sender->td_end, sender->td_maxend, sender->td_maxwin, | 602 | sender->td_end, sender->td_maxend, sender->td_maxwin, |
602 | sender->td_scale, | 603 | sender->td_scale, |
603 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | 604 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, |
604 | receiver->td_scale); | 605 | receiver->td_scale); |
605 | 606 | ||
606 | /* Is the ending sequence in the receive window (if available)? */ | 607 | /* Is the ending sequence in the receive window (if available)? */ |
607 | in_recv_win = !receiver->td_maxwin || | 608 | in_recv_win = !receiver->td_maxwin || |
608 | after(end, sender->td_end - receiver->td_maxwin - 1); | 609 | after(end, sender->td_end - receiver->td_maxwin - 1); |
609 | 610 | ||
610 | pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", | 611 | pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", |
611 | before(seq, sender->td_maxend + 1), | 612 | before(seq, sender->td_maxend + 1), |
612 | (in_recv_win ? 1 : 0), | 613 | (in_recv_win ? 1 : 0), |
613 | before(sack, receiver->td_end + 1), | 614 | before(sack, receiver->td_end + 1), |
614 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); | 615 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); |
615 | 616 | ||
616 | if (before(seq, sender->td_maxend + 1) && | 617 | if (before(seq, sender->td_maxend + 1) && |
617 | in_recv_win && | 618 | in_recv_win && |
618 | before(sack, receiver->td_end + 1) && | 619 | before(sack, receiver->td_end + 1) && |
619 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { | 620 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { |
620 | /* | 621 | /* |
621 | * Take into account window scaling (RFC 1323). | 622 | * Take into account window scaling (RFC 1323). |
622 | */ | 623 | */ |
623 | if (!tcph->syn) | 624 | if (!tcph->syn) |
624 | win <<= sender->td_scale; | 625 | win <<= sender->td_scale; |
625 | 626 | ||
626 | /* | 627 | /* |
627 | * Update sender data. | 628 | * Update sender data. |
628 | */ | 629 | */ |
629 | swin = win + (sack - ack); | 630 | swin = win + (sack - ack); |
630 | if (sender->td_maxwin < swin) | 631 | if (sender->td_maxwin < swin) |
631 | sender->td_maxwin = swin; | 632 | sender->td_maxwin = swin; |
632 | if (after(end, sender->td_end)) { | 633 | if (after(end, sender->td_end)) { |
633 | sender->td_end = end; | 634 | sender->td_end = end; |
634 | sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; | 635 | sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; |
635 | } | 636 | } |
636 | if (tcph->ack) { | 637 | if (tcph->ack) { |
637 | if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { | 638 | if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { |
638 | sender->td_maxack = ack; | 639 | sender->td_maxack = ack; |
639 | sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; | 640 | sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; |
640 | } else if (after(ack, sender->td_maxack)) | 641 | } else if (after(ack, sender->td_maxack)) |
641 | sender->td_maxack = ack; | 642 | sender->td_maxack = ack; |
642 | } | 643 | } |
643 | 644 | ||
644 | /* | 645 | /* |
645 | * Update receiver data. | 646 | * Update receiver data. |
646 | */ | 647 | */ |
647 | if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) | 648 | if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) |
648 | receiver->td_maxwin += end - sender->td_maxend; | 649 | receiver->td_maxwin += end - sender->td_maxend; |
649 | if (after(sack + win, receiver->td_maxend - 1)) { | 650 | if (after(sack + win, receiver->td_maxend - 1)) { |
650 | receiver->td_maxend = sack + win; | 651 | receiver->td_maxend = sack + win; |
651 | if (win == 0) | 652 | if (win == 0) |
652 | receiver->td_maxend++; | 653 | receiver->td_maxend++; |
653 | } | 654 | } |
654 | if (ack == receiver->td_end) | 655 | if (ack == receiver->td_end) |
655 | receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; | 656 | receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; |
656 | 657 | ||
657 | /* | 658 | /* |
658 | * Check retransmissions. | 659 | * Check retransmissions. |
659 | */ | 660 | */ |
660 | if (index == TCP_ACK_SET) { | 661 | if (index == TCP_ACK_SET) { |
661 | if (state->last_dir == dir | 662 | if (state->last_dir == dir |
662 | && state->last_seq == seq | 663 | && state->last_seq == seq |
663 | && state->last_ack == ack | 664 | && state->last_ack == ack |
664 | && state->last_end == end | 665 | && state->last_end == end |
665 | && state->last_win == win) | 666 | && state->last_win == win) |
666 | state->retrans++; | 667 | state->retrans++; |
667 | else { | 668 | else { |
668 | state->last_dir = dir; | 669 | state->last_dir = dir; |
669 | state->last_seq = seq; | 670 | state->last_seq = seq; |
670 | state->last_ack = ack; | 671 | state->last_ack = ack; |
671 | state->last_end = end; | 672 | state->last_end = end; |
672 | state->last_win = win; | 673 | state->last_win = win; |
673 | state->retrans = 0; | 674 | state->retrans = 0; |
674 | } | 675 | } |
675 | } | 676 | } |
676 | res = true; | 677 | res = true; |
677 | } else { | 678 | } else { |
678 | res = false; | 679 | res = false; |
679 | if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || | 680 | if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || |
680 | tn->tcp_be_liberal) | 681 | tn->tcp_be_liberal) |
681 | res = true; | 682 | res = true; |
682 | if (!res) { | 683 | if (!res) { |
683 | nf_ct_l4proto_log_invalid(skb, ct, | 684 | nf_ct_l4proto_log_invalid(skb, ct, |
684 | "%s", | 685 | "%s", |
685 | before(seq, sender->td_maxend + 1) ? | 686 | before(seq, sender->td_maxend + 1) ? |
686 | in_recv_win ? | 687 | in_recv_win ? |
687 | before(sack, receiver->td_end + 1) ? | 688 | before(sack, receiver->td_end + 1) ? |
688 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" | 689 | after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" |
689 | : "ACK is under the lower bound (possible overly delayed ACK)" | 690 | : "ACK is under the lower bound (possible overly delayed ACK)" |
690 | : "ACK is over the upper bound (ACKed data not seen yet)" | 691 | : "ACK is over the upper bound (ACKed data not seen yet)" |
691 | : "SEQ is under the lower bound (already ACKed data retransmitted)" | 692 | : "SEQ is under the lower bound (already ACKed data retransmitted)" |
692 | : "SEQ is over the upper bound (over the window of the receiver)"); | 693 | : "SEQ is over the upper bound (over the window of the receiver)"); |
693 | } | 694 | } |
694 | } | 695 | } |
695 | 696 | ||
696 | pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " | 697 | pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " |
697 | "receiver end=%u maxend=%u maxwin=%u\n", | 698 | "receiver end=%u maxend=%u maxwin=%u\n", |
698 | res, sender->td_end, sender->td_maxend, sender->td_maxwin, | 699 | res, sender->td_end, sender->td_maxend, sender->td_maxwin, |
699 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin); | 700 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin); |
700 | 701 | ||
701 | return res; | 702 | return res; |
702 | } | 703 | } |
703 | 704 | ||
704 | /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ | 705 | /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ |
705 | static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| | 706 | static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| |
706 | TCPHDR_URG) + 1] = | 707 | TCPHDR_URG) + 1] = |
707 | { | 708 | { |
708 | [TCPHDR_SYN] = 1, | 709 | [TCPHDR_SYN] = 1, |
709 | [TCPHDR_SYN|TCPHDR_URG] = 1, | 710 | [TCPHDR_SYN|TCPHDR_URG] = 1, |
710 | [TCPHDR_SYN|TCPHDR_ACK] = 1, | 711 | [TCPHDR_SYN|TCPHDR_ACK] = 1, |
711 | [TCPHDR_RST] = 1, | 712 | [TCPHDR_RST] = 1, |
712 | [TCPHDR_RST|TCPHDR_ACK] = 1, | 713 | [TCPHDR_RST|TCPHDR_ACK] = 1, |
713 | [TCPHDR_FIN|TCPHDR_ACK] = 1, | 714 | [TCPHDR_FIN|TCPHDR_ACK] = 1, |
714 | [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, | 715 | [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, |
715 | [TCPHDR_ACK] = 1, | 716 | [TCPHDR_ACK] = 1, |
716 | [TCPHDR_ACK|TCPHDR_URG] = 1, | 717 | [TCPHDR_ACK|TCPHDR_URG] = 1, |
717 | }; | 718 | }; |
718 | 719 | ||
719 | static void tcp_error_log(const struct sk_buff *skb, struct net *net, | 720 | static void tcp_error_log(const struct sk_buff *skb, struct net *net, |
720 | u8 pf, const char *msg) | 721 | u8 pf, const char *msg) |
721 | { | 722 | { |
722 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); | 723 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); |
723 | } | 724 | } |
724 | 725 | ||
725 | /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ | 726 | /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ |
726 | static int tcp_error(struct net *net, struct nf_conn *tmpl, | 727 | static int tcp_error(struct net *net, struct nf_conn *tmpl, |
727 | struct sk_buff *skb, | 728 | struct sk_buff *skb, |
728 | unsigned int dataoff, | 729 | unsigned int dataoff, |
729 | u_int8_t pf, | 730 | u_int8_t pf, |
730 | unsigned int hooknum) | 731 | unsigned int hooknum) |
731 | { | 732 | { |
732 | const struct tcphdr *th; | 733 | const struct tcphdr *th; |
733 | struct tcphdr _tcph; | 734 | struct tcphdr _tcph; |
734 | unsigned int tcplen = skb->len - dataoff; | 735 | unsigned int tcplen = skb->len - dataoff; |
735 | u_int8_t tcpflags; | 736 | u_int8_t tcpflags; |
736 | 737 | ||
737 | /* Smaller that minimal TCP header? */ | 738 | /* Smaller that minimal TCP header? */ |
738 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | 739 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); |
739 | if (th == NULL) { | 740 | if (th == NULL) { |
740 | tcp_error_log(skb, net, pf, "short packet"); | 741 | tcp_error_log(skb, net, pf, "short packet"); |
741 | return -NF_ACCEPT; | 742 | return -NF_ACCEPT; |
742 | } | 743 | } |
743 | 744 | ||
744 | /* Not whole TCP header or malformed packet */ | 745 | /* Not whole TCP header or malformed packet */ |
745 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { | 746 | if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { |
746 | tcp_error_log(skb, net, pf, "truncated packet"); | 747 | tcp_error_log(skb, net, pf, "truncated packet"); |
747 | return -NF_ACCEPT; | 748 | return -NF_ACCEPT; |
748 | } | 749 | } |
749 | 750 | ||
750 | /* Checksum invalid? Ignore. | 751 | /* Checksum invalid? Ignore. |
751 | * We skip checking packets on the outgoing path | 752 | * We skip checking packets on the outgoing path |
752 | * because the checksum is assumed to be correct. | 753 | * because the checksum is assumed to be correct. |
753 | */ | 754 | */ |
754 | /* FIXME: Source route IP option packets --RR */ | 755 | /* FIXME: Source route IP option packets --RR */ |
755 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 756 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
756 | nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { | 757 | nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { |
757 | tcp_error_log(skb, net, pf, "bad checksum"); | 758 | tcp_error_log(skb, net, pf, "bad checksum"); |
758 | return -NF_ACCEPT; | 759 | return -NF_ACCEPT; |
759 | } | 760 | } |
760 | 761 | ||
761 | /* Check TCP flags. */ | 762 | /* Check TCP flags. */ |
762 | tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); | 763 | tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); |
763 | if (!tcp_valid_flags[tcpflags]) { | 764 | if (!tcp_valid_flags[tcpflags]) { |
764 | tcp_error_log(skb, net, pf, "invalid tcp flag combination"); | 765 | tcp_error_log(skb, net, pf, "invalid tcp flag combination"); |
765 | return -NF_ACCEPT; | 766 | return -NF_ACCEPT; |
766 | } | 767 | } |
767 | 768 | ||
768 | return NF_ACCEPT; | 769 | return NF_ACCEPT; |
769 | } | 770 | } |
770 | 771 | ||
771 | static unsigned int *tcp_get_timeouts(struct net *net) | ||
772 | { | ||
773 | return tcp_pernet(net)->timeouts; | ||
774 | } | ||
775 | |||
776 | /* Returns verdict for packet, or -1 for invalid. */ | 772 | /* Returns verdict for packet, or -1 for invalid. */ |
777 | static int tcp_packet(struct nf_conn *ct, | 773 | static int tcp_packet(struct nf_conn *ct, |
778 | const struct sk_buff *skb, | 774 | const struct sk_buff *skb, |
779 | unsigned int dataoff, | 775 | unsigned int dataoff, |
780 | enum ip_conntrack_info ctinfo, | 776 | enum ip_conntrack_info ctinfo) |
781 | unsigned int *timeouts) | ||
782 | { | 777 | { |
783 | struct net *net = nf_ct_net(ct); | 778 | struct net *net = nf_ct_net(ct); |
784 | struct nf_tcp_net *tn = tcp_pernet(net); | 779 | struct nf_tcp_net *tn = tcp_pernet(net); |
785 | struct nf_conntrack_tuple *tuple; | 780 | struct nf_conntrack_tuple *tuple; |
786 | enum tcp_conntrack new_state, old_state; | 781 | enum tcp_conntrack new_state, old_state; |
782 | unsigned int index, *timeouts; | ||
787 | enum ip_conntrack_dir dir; | 783 | enum ip_conntrack_dir dir; |
788 | const struct tcphdr *th; | 784 | const struct tcphdr *th; |
789 | struct tcphdr _tcph; | 785 | struct tcphdr _tcph; |
790 | unsigned long timeout; | 786 | unsigned long timeout; |
791 | unsigned int index; | ||
792 | 787 | ||
793 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | 788 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); |
794 | BUG_ON(th == NULL); | 789 | BUG_ON(th == NULL); |
795 | 790 | ||
796 | spin_lock_bh(&ct->lock); | 791 | spin_lock_bh(&ct->lock); |
797 | old_state = ct->proto.tcp.state; | 792 | old_state = ct->proto.tcp.state; |
798 | dir = CTINFO2DIR(ctinfo); | 793 | dir = CTINFO2DIR(ctinfo); |
799 | index = get_conntrack_index(th); | 794 | index = get_conntrack_index(th); |
800 | new_state = tcp_conntracks[dir][index][old_state]; | 795 | new_state = tcp_conntracks[dir][index][old_state]; |
801 | tuple = &ct->tuplehash[dir].tuple; | 796 | tuple = &ct->tuplehash[dir].tuple; |
802 | 797 | ||
803 | switch (new_state) { | 798 | switch (new_state) { |
804 | case TCP_CONNTRACK_SYN_SENT: | 799 | case TCP_CONNTRACK_SYN_SENT: |
805 | if (old_state < TCP_CONNTRACK_TIME_WAIT) | 800 | if (old_state < TCP_CONNTRACK_TIME_WAIT) |
806 | break; | 801 | break; |
807 | /* RFC 1122: "When a connection is closed actively, | 802 | /* RFC 1122: "When a connection is closed actively, |
808 | * it MUST linger in TIME-WAIT state for a time 2xMSL | 803 | * it MUST linger in TIME-WAIT state for a time 2xMSL |
809 | * (Maximum Segment Lifetime). However, it MAY accept | 804 | * (Maximum Segment Lifetime). However, it MAY accept |
810 | * a new SYN from the remote TCP to reopen the connection | 805 | * a new SYN from the remote TCP to reopen the connection |
811 | * directly from TIME-WAIT state, if..." | 806 | * directly from TIME-WAIT state, if..." |
812 | * We ignore the conditions because we are in the | 807 | * We ignore the conditions because we are in the |
813 | * TIME-WAIT state anyway. | 808 | * TIME-WAIT state anyway. |
814 | * | 809 | * |
815 | * Handle aborted connections: we and the server | 810 | * Handle aborted connections: we and the server |
816 | * think there is an existing connection but the client | 811 | * think there is an existing connection but the client |
817 | * aborts it and starts a new one. | 812 | * aborts it and starts a new one. |
818 | */ | 813 | */ |
819 | if (((ct->proto.tcp.seen[dir].flags | 814 | if (((ct->proto.tcp.seen[dir].flags |
820 | | ct->proto.tcp.seen[!dir].flags) | 815 | | ct->proto.tcp.seen[!dir].flags) |
821 | & IP_CT_TCP_FLAG_CLOSE_INIT) | 816 | & IP_CT_TCP_FLAG_CLOSE_INIT) |
822 | || (ct->proto.tcp.last_dir == dir | 817 | || (ct->proto.tcp.last_dir == dir |
823 | && ct->proto.tcp.last_index == TCP_RST_SET)) { | 818 | && ct->proto.tcp.last_index == TCP_RST_SET)) { |
824 | /* Attempt to reopen a closed/aborted connection. | 819 | /* Attempt to reopen a closed/aborted connection. |
825 | * Delete this connection and look up again. */ | 820 | * Delete this connection and look up again. */ |
826 | spin_unlock_bh(&ct->lock); | 821 | spin_unlock_bh(&ct->lock); |
827 | 822 | ||
828 | /* Only repeat if we can actually remove the timer. | 823 | /* Only repeat if we can actually remove the timer. |
829 | * Destruction may already be in progress in process | 824 | * Destruction may already be in progress in process |
830 | * context and we must give it a chance to terminate. | 825 | * context and we must give it a chance to terminate. |
831 | */ | 826 | */ |
832 | if (nf_ct_kill(ct)) | 827 | if (nf_ct_kill(ct)) |
833 | return -NF_REPEAT; | 828 | return -NF_REPEAT; |
834 | return NF_DROP; | 829 | return NF_DROP; |
835 | } | 830 | } |
836 | /* Fall through */ | 831 | /* Fall through */ |
837 | case TCP_CONNTRACK_IGNORE: | 832 | case TCP_CONNTRACK_IGNORE: |
838 | /* Ignored packets: | 833 | /* Ignored packets: |
839 | * | 834 | * |
840 | * Our connection entry may be out of sync, so ignore | 835 | * Our connection entry may be out of sync, so ignore |
841 | * packets which may signal the real connection between | 836 | * packets which may signal the real connection between |
842 | * the client and the server. | 837 | * the client and the server. |
843 | * | 838 | * |
844 | * a) SYN in ORIGINAL | 839 | * a) SYN in ORIGINAL |
845 | * b) SYN/ACK in REPLY | 840 | * b) SYN/ACK in REPLY |
846 | * c) ACK in reply direction after initial SYN in original. | 841 | * c) ACK in reply direction after initial SYN in original. |
847 | * | 842 | * |
848 | * If the ignored packet is invalid, the receiver will send | 843 | * If the ignored packet is invalid, the receiver will send |
849 | * a RST we'll catch below. | 844 | * a RST we'll catch below. |
850 | */ | 845 | */ |
851 | if (index == TCP_SYNACK_SET | 846 | if (index == TCP_SYNACK_SET |
852 | && ct->proto.tcp.last_index == TCP_SYN_SET | 847 | && ct->proto.tcp.last_index == TCP_SYN_SET |
853 | && ct->proto.tcp.last_dir != dir | 848 | && ct->proto.tcp.last_dir != dir |
854 | && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { | 849 | && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { |
855 | /* b) This SYN/ACK acknowledges a SYN that we earlier | 850 | /* b) This SYN/ACK acknowledges a SYN that we earlier |
856 | * ignored as invalid. This means that the client and | 851 | * ignored as invalid. This means that the client and |
857 | * the server are both in sync, while the firewall is | 852 | * the server are both in sync, while the firewall is |
858 | * not. We get in sync from the previously annotated | 853 | * not. We get in sync from the previously annotated |
859 | * values. | 854 | * values. |
860 | */ | 855 | */ |
861 | old_state = TCP_CONNTRACK_SYN_SENT; | 856 | old_state = TCP_CONNTRACK_SYN_SENT; |
862 | new_state = TCP_CONNTRACK_SYN_RECV; | 857 | new_state = TCP_CONNTRACK_SYN_RECV; |
863 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = | 858 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = |
864 | ct->proto.tcp.last_end; | 859 | ct->proto.tcp.last_end; |
865 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = | 860 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = |
866 | ct->proto.tcp.last_end; | 861 | ct->proto.tcp.last_end; |
867 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = | 862 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = |
868 | ct->proto.tcp.last_win == 0 ? | 863 | ct->proto.tcp.last_win == 0 ? |
869 | 1 : ct->proto.tcp.last_win; | 864 | 1 : ct->proto.tcp.last_win; |
870 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = | 865 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = |
871 | ct->proto.tcp.last_wscale; | 866 | ct->proto.tcp.last_wscale; |
872 | ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; | 867 | ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; |
873 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = | 868 | ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = |
874 | ct->proto.tcp.last_flags; | 869 | ct->proto.tcp.last_flags; |
875 | memset(&ct->proto.tcp.seen[dir], 0, | 870 | memset(&ct->proto.tcp.seen[dir], 0, |
876 | sizeof(struct ip_ct_tcp_state)); | 871 | sizeof(struct ip_ct_tcp_state)); |
877 | break; | 872 | break; |
878 | } | 873 | } |
879 | ct->proto.tcp.last_index = index; | 874 | ct->proto.tcp.last_index = index; |
880 | ct->proto.tcp.last_dir = dir; | 875 | ct->proto.tcp.last_dir = dir; |
881 | ct->proto.tcp.last_seq = ntohl(th->seq); | 876 | ct->proto.tcp.last_seq = ntohl(th->seq); |
882 | ct->proto.tcp.last_end = | 877 | ct->proto.tcp.last_end = |
883 | segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); | 878 | segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); |
884 | ct->proto.tcp.last_win = ntohs(th->window); | 879 | ct->proto.tcp.last_win = ntohs(th->window); |
885 | 880 | ||
886 | /* a) This is a SYN in ORIGINAL. The client and the server | 881 | /* a) This is a SYN in ORIGINAL. The client and the server |
887 | * may be in sync but we are not. In that case, we annotate | 882 | * may be in sync but we are not. In that case, we annotate |
888 | * the TCP options and let the packet go through. If it is a | 883 | * the TCP options and let the packet go through. If it is a |
889 | * valid SYN packet, the server will reply with a SYN/ACK, and | 884 | * valid SYN packet, the server will reply with a SYN/ACK, and |
890 | * then we'll get in sync. Otherwise, the server potentially | 885 | * then we'll get in sync. Otherwise, the server potentially |
891 | * responds with a challenge ACK if implementing RFC5961. | 886 | * responds with a challenge ACK if implementing RFC5961. |
892 | */ | 887 | */ |
893 | if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { | 888 | if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { |
894 | struct ip_ct_tcp_state seen = {}; | 889 | struct ip_ct_tcp_state seen = {}; |
895 | 890 | ||
896 | ct->proto.tcp.last_flags = | 891 | ct->proto.tcp.last_flags = |
897 | ct->proto.tcp.last_wscale = 0; | 892 | ct->proto.tcp.last_wscale = 0; |
898 | tcp_options(skb, dataoff, th, &seen); | 893 | tcp_options(skb, dataoff, th, &seen); |
899 | if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { | 894 | if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { |
900 | ct->proto.tcp.last_flags |= | 895 | ct->proto.tcp.last_flags |= |
901 | IP_CT_TCP_FLAG_WINDOW_SCALE; | 896 | IP_CT_TCP_FLAG_WINDOW_SCALE; |
902 | ct->proto.tcp.last_wscale = seen.td_scale; | 897 | ct->proto.tcp.last_wscale = seen.td_scale; |
903 | } | 898 | } |
904 | if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { | 899 | if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { |
905 | ct->proto.tcp.last_flags |= | 900 | ct->proto.tcp.last_flags |= |
906 | IP_CT_TCP_FLAG_SACK_PERM; | 901 | IP_CT_TCP_FLAG_SACK_PERM; |
907 | } | 902 | } |
908 | /* Mark the potential for RFC5961 challenge ACK, | 903 | /* Mark the potential for RFC5961 challenge ACK, |
909 | * this pose a special problem for LAST_ACK state | 904 | * this pose a special problem for LAST_ACK state |
910 | * as ACK is intrepretated as ACKing last FIN. | 905 | * as ACK is intrepretated as ACKing last FIN. |
911 | */ | 906 | */ |
912 | if (old_state == TCP_CONNTRACK_LAST_ACK) | 907 | if (old_state == TCP_CONNTRACK_LAST_ACK) |
913 | ct->proto.tcp.last_flags |= | 908 | ct->proto.tcp.last_flags |= |
914 | IP_CT_EXP_CHALLENGE_ACK; | 909 | IP_CT_EXP_CHALLENGE_ACK; |
915 | } | 910 | } |
916 | spin_unlock_bh(&ct->lock); | 911 | spin_unlock_bh(&ct->lock); |
917 | nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " | 912 | nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " |
918 | "state %s ", tcp_conntrack_names[old_state]); | 913 | "state %s ", tcp_conntrack_names[old_state]); |
919 | return NF_ACCEPT; | 914 | return NF_ACCEPT; |
920 | case TCP_CONNTRACK_MAX: | 915 | case TCP_CONNTRACK_MAX: |
921 | /* Special case for SYN proxy: when the SYN to the server or | 916 | /* Special case for SYN proxy: when the SYN to the server or |
922 | * the SYN/ACK from the server is lost, the client may transmit | 917 | * the SYN/ACK from the server is lost, the client may transmit |
923 | * a keep-alive packet while in SYN_SENT state. This needs to | 918 | * a keep-alive packet while in SYN_SENT state. This needs to |
924 | * be associated with the original conntrack entry in order to | 919 | * be associated with the original conntrack entry in order to |
925 | * generate a new SYN with the correct sequence number. | 920 | * generate a new SYN with the correct sequence number. |
926 | */ | 921 | */ |
927 | if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && | 922 | if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && |
928 | index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && | 923 | index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && |
929 | ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && | 924 | ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && |
930 | ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { | 925 | ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { |
931 | pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); | 926 | pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); |
932 | spin_unlock_bh(&ct->lock); | 927 | spin_unlock_bh(&ct->lock); |
933 | return NF_ACCEPT; | 928 | return NF_ACCEPT; |
934 | } | 929 | } |
935 | 930 | ||
936 | /* Invalid packet */ | 931 | /* Invalid packet */ |
937 | pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", | 932 | pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", |
938 | dir, get_conntrack_index(th), old_state); | 933 | dir, get_conntrack_index(th), old_state); |
939 | spin_unlock_bh(&ct->lock); | 934 | spin_unlock_bh(&ct->lock); |
940 | nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); | 935 | nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); |
941 | return -NF_ACCEPT; | 936 | return -NF_ACCEPT; |
942 | case TCP_CONNTRACK_TIME_WAIT: | 937 | case TCP_CONNTRACK_TIME_WAIT: |
943 | /* RFC5961 compliance cause stack to send "challenge-ACK" | 938 | /* RFC5961 compliance cause stack to send "challenge-ACK" |
944 | * e.g. in response to spurious SYNs. Conntrack MUST | 939 | * e.g. in response to spurious SYNs. Conntrack MUST |
945 | * not believe this ACK is acking last FIN. | 940 | * not believe this ACK is acking last FIN. |
946 | */ | 941 | */ |
947 | if (old_state == TCP_CONNTRACK_LAST_ACK && | 942 | if (old_state == TCP_CONNTRACK_LAST_ACK && |
948 | index == TCP_ACK_SET && | 943 | index == TCP_ACK_SET && |
949 | ct->proto.tcp.last_dir != dir && | 944 | ct->proto.tcp.last_dir != dir && |
950 | ct->proto.tcp.last_index == TCP_SYN_SET && | 945 | ct->proto.tcp.last_index == TCP_SYN_SET && |
951 | (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { | 946 | (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { |
952 | /* Detected RFC5961 challenge ACK */ | 947 | /* Detected RFC5961 challenge ACK */ |
953 | ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; | 948 | ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; |
954 | spin_unlock_bh(&ct->lock); | 949 | spin_unlock_bh(&ct->lock); |
955 | nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); | 950 | nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); |
956 | return NF_ACCEPT; /* Don't change state */ | 951 | return NF_ACCEPT; /* Don't change state */ |
957 | } | 952 | } |
958 | break; | 953 | break; |
959 | case TCP_CONNTRACK_SYN_SENT2: | 954 | case TCP_CONNTRACK_SYN_SENT2: |
960 | /* tcp_conntracks table is not smart enough to handle | 955 | /* tcp_conntracks table is not smart enough to handle |
961 | * simultaneous open. | 956 | * simultaneous open. |
962 | */ | 957 | */ |
963 | ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; | 958 | ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; |
964 | break; | 959 | break; |
965 | case TCP_CONNTRACK_SYN_RECV: | 960 | case TCP_CONNTRACK_SYN_RECV: |
966 | if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && | 961 | if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && |
967 | ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) | 962 | ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) |
968 | new_state = TCP_CONNTRACK_ESTABLISHED; | 963 | new_state = TCP_CONNTRACK_ESTABLISHED; |
969 | break; | 964 | break; |
970 | case TCP_CONNTRACK_CLOSE: | 965 | case TCP_CONNTRACK_CLOSE: |
971 | if (index == TCP_RST_SET | 966 | if (index == TCP_RST_SET |
972 | && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) | 967 | && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) |
973 | && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { | 968 | && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { |
974 | /* Invalid RST */ | 969 | /* Invalid RST */ |
975 | spin_unlock_bh(&ct->lock); | 970 | spin_unlock_bh(&ct->lock); |
976 | nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); | 971 | nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); |
977 | return -NF_ACCEPT; | 972 | return -NF_ACCEPT; |
978 | } | 973 | } |
979 | if (index == TCP_RST_SET | 974 | if (index == TCP_RST_SET |
980 | && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) | 975 | && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) |
981 | && ct->proto.tcp.last_index == TCP_SYN_SET) | 976 | && ct->proto.tcp.last_index == TCP_SYN_SET) |
982 | || (!test_bit(IPS_ASSURED_BIT, &ct->status) | 977 | || (!test_bit(IPS_ASSURED_BIT, &ct->status) |
983 | && ct->proto.tcp.last_index == TCP_ACK_SET)) | 978 | && ct->proto.tcp.last_index == TCP_ACK_SET)) |
984 | && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { | 979 | && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { |
985 | /* RST sent to invalid SYN or ACK we had let through | 980 | /* RST sent to invalid SYN or ACK we had let through |
986 | * at a) and c) above: | 981 | * at a) and c) above: |
987 | * | 982 | * |
988 | * a) SYN was in window then | 983 | * a) SYN was in window then |
989 | * c) we hold a half-open connection. | 984 | * c) we hold a half-open connection. |
990 | * | 985 | * |
991 | * Delete our connection entry. | 986 | * Delete our connection entry. |
992 | * We skip window checking, because packet might ACK | 987 | * We skip window checking, because packet might ACK |
993 | * segments we ignored. */ | 988 | * segments we ignored. */ |
994 | goto in_window; | 989 | goto in_window; |
995 | } | 990 | } |
996 | /* Just fall through */ | 991 | /* Just fall through */ |
997 | default: | 992 | default: |
998 | /* Keep compilers happy. */ | 993 | /* Keep compilers happy. */ |
999 | break; | 994 | break; |
1000 | } | 995 | } |
1001 | 996 | ||
1002 | if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, | 997 | if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, |
1003 | skb, dataoff, th)) { | 998 | skb, dataoff, th)) { |
1004 | spin_unlock_bh(&ct->lock); | 999 | spin_unlock_bh(&ct->lock); |
1005 | return -NF_ACCEPT; | 1000 | return -NF_ACCEPT; |
1006 | } | 1001 | } |
1007 | in_window: | 1002 | in_window: |
1008 | /* From now on we have got in-window packets */ | 1003 | /* From now on we have got in-window packets */ |
1009 | ct->proto.tcp.last_index = index; | 1004 | ct->proto.tcp.last_index = index; |
1010 | ct->proto.tcp.last_dir = dir; | 1005 | ct->proto.tcp.last_dir = dir; |
1011 | 1006 | ||
1012 | pr_debug("tcp_conntracks: "); | 1007 | pr_debug("tcp_conntracks: "); |
1013 | nf_ct_dump_tuple(tuple); | 1008 | nf_ct_dump_tuple(tuple); |
1014 | pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", | 1009 | pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", |
1015 | (th->syn ? 1 : 0), (th->ack ? 1 : 0), | 1010 | (th->syn ? 1 : 0), (th->ack ? 1 : 0), |
1016 | (th->fin ? 1 : 0), (th->rst ? 1 : 0), | 1011 | (th->fin ? 1 : 0), (th->rst ? 1 : 0), |
1017 | old_state, new_state); | 1012 | old_state, new_state); |
1018 | 1013 | ||
1019 | ct->proto.tcp.state = new_state; | 1014 | ct->proto.tcp.state = new_state; |
1020 | if (old_state != new_state | 1015 | if (old_state != new_state |
1021 | && new_state == TCP_CONNTRACK_FIN_WAIT) | 1016 | && new_state == TCP_CONNTRACK_FIN_WAIT) |
1022 | ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; | 1017 | ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; |
1023 | 1018 | ||
1019 | timeouts = nf_ct_timeout_lookup(ct); | ||
1020 | if (!timeouts) | ||
1021 | timeouts = tn->timeouts; | ||
1022 | |||
1024 | if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && | 1023 | if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && |
1025 | timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) | 1024 | timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) |
1026 | timeout = timeouts[TCP_CONNTRACK_RETRANS]; | 1025 | timeout = timeouts[TCP_CONNTRACK_RETRANS]; |
1027 | else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & | 1026 | else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & |
1028 | IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && | 1027 | IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && |
1029 | timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) | 1028 | timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) |
1030 | timeout = timeouts[TCP_CONNTRACK_UNACK]; | 1029 | timeout = timeouts[TCP_CONNTRACK_UNACK]; |
1031 | else if (ct->proto.tcp.last_win == 0 && | 1030 | else if (ct->proto.tcp.last_win == 0 && |
1032 | timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) | 1031 | timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) |
1033 | timeout = timeouts[TCP_CONNTRACK_RETRANS]; | 1032 | timeout = timeouts[TCP_CONNTRACK_RETRANS]; |
1034 | else | 1033 | else |
1035 | timeout = timeouts[new_state]; | 1034 | timeout = timeouts[new_state]; |
1036 | spin_unlock_bh(&ct->lock); | 1035 | spin_unlock_bh(&ct->lock); |
1037 | 1036 | ||
1038 | if (new_state != old_state) | 1037 | if (new_state != old_state) |
1039 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); | 1038 | nf_conntrack_event_cache(IPCT_PROTOINFO, ct); |
1040 | 1039 | ||
1041 | if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | 1040 | if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { |
1042 | /* If only reply is a RST, we can consider ourselves not to | 1041 | /* If only reply is a RST, we can consider ourselves not to |
1043 | have an established connection: this is a fairly common | 1042 | have an established connection: this is a fairly common |
1044 | problem case, so we can delete the conntrack | 1043 | problem case, so we can delete the conntrack |
1045 | immediately. --RR */ | 1044 | immediately. --RR */ |
1046 | if (th->rst) { | 1045 | if (th->rst) { |
1047 | nf_ct_kill_acct(ct, ctinfo, skb); | 1046 | nf_ct_kill_acct(ct, ctinfo, skb); |
1048 | return NF_ACCEPT; | 1047 | return NF_ACCEPT; |
1049 | } | 1048 | } |
1050 | /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection | 1049 | /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection |
1051 | * pickup with loose=1. Avoid large ESTABLISHED timeout. | 1050 | * pickup with loose=1. Avoid large ESTABLISHED timeout. |
1052 | */ | 1051 | */ |
1053 | if (new_state == TCP_CONNTRACK_ESTABLISHED && | 1052 | if (new_state == TCP_CONNTRACK_ESTABLISHED && |
1054 | timeout > timeouts[TCP_CONNTRACK_UNACK]) | 1053 | timeout > timeouts[TCP_CONNTRACK_UNACK]) |
1055 | timeout = timeouts[TCP_CONNTRACK_UNACK]; | 1054 | timeout = timeouts[TCP_CONNTRACK_UNACK]; |
1056 | } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) | 1055 | } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) |
1057 | && (old_state == TCP_CONNTRACK_SYN_RECV | 1056 | && (old_state == TCP_CONNTRACK_SYN_RECV |
1058 | || old_state == TCP_CONNTRACK_ESTABLISHED) | 1057 | || old_state == TCP_CONNTRACK_ESTABLISHED) |
1059 | && new_state == TCP_CONNTRACK_ESTABLISHED) { | 1058 | && new_state == TCP_CONNTRACK_ESTABLISHED) { |
1060 | /* Set ASSURED if we see see valid ack in ESTABLISHED | 1059 | /* Set ASSURED if we see see valid ack in ESTABLISHED |
1061 | after SYN_RECV or a valid answer for a picked up | 1060 | after SYN_RECV or a valid answer for a picked up |
1062 | connection. */ | 1061 | connection. */ |
1063 | set_bit(IPS_ASSURED_BIT, &ct->status); | 1062 | set_bit(IPS_ASSURED_BIT, &ct->status); |
1064 | nf_conntrack_event_cache(IPCT_ASSURED, ct); | 1063 | nf_conntrack_event_cache(IPCT_ASSURED, ct); |
1065 | } | 1064 | } |
1066 | nf_ct_refresh_acct(ct, ctinfo, skb, timeout); | 1065 | nf_ct_refresh_acct(ct, ctinfo, skb, timeout); |
1067 | 1066 | ||
1068 | return NF_ACCEPT; | 1067 | return NF_ACCEPT; |
1069 | } | 1068 | } |
1070 | 1069 | ||
1071 | /* Called when a new connection for this protocol found. */ | 1070 | /* Called when a new connection for this protocol found. */ |
1072 | static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, | 1071 | static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, |
1073 | unsigned int dataoff, unsigned int *timeouts) | 1072 | unsigned int dataoff) |
1074 | { | 1073 | { |
1075 | enum tcp_conntrack new_state; | 1074 | enum tcp_conntrack new_state; |
1076 | const struct tcphdr *th; | 1075 | const struct tcphdr *th; |
1077 | struct tcphdr _tcph; | 1076 | struct tcphdr _tcph; |
1078 | struct net *net = nf_ct_net(ct); | 1077 | struct net *net = nf_ct_net(ct); |
1079 | struct nf_tcp_net *tn = tcp_pernet(net); | 1078 | struct nf_tcp_net *tn = tcp_pernet(net); |
1080 | const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; | 1079 | const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; |
1081 | const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; | 1080 | const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; |
1082 | 1081 | ||
1083 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); | 1082 | th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); |
1084 | BUG_ON(th == NULL); | 1083 | BUG_ON(th == NULL); |
1085 | 1084 | ||
1086 | /* Don't need lock here: this conntrack not in circulation yet */ | 1085 | /* Don't need lock here: this conntrack not in circulation yet */ |
1087 | new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; | 1086 | new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; |
1088 | 1087 | ||
1089 | /* Invalid: delete conntrack */ | 1088 | /* Invalid: delete conntrack */ |
1090 | if (new_state >= TCP_CONNTRACK_MAX) { | 1089 | if (new_state >= TCP_CONNTRACK_MAX) { |
1091 | pr_debug("nf_ct_tcp: invalid new deleting.\n"); | 1090 | pr_debug("nf_ct_tcp: invalid new deleting.\n"); |
1092 | return false; | 1091 | return false; |
1093 | } | 1092 | } |
1094 | 1093 | ||
1095 | if (new_state == TCP_CONNTRACK_SYN_SENT) { | 1094 | if (new_state == TCP_CONNTRACK_SYN_SENT) { |
1096 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | 1095 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); |
1097 | /* SYN packet */ | 1096 | /* SYN packet */ |
1098 | ct->proto.tcp.seen[0].td_end = | 1097 | ct->proto.tcp.seen[0].td_end = |
1099 | segment_seq_plus_len(ntohl(th->seq), skb->len, | 1098 | segment_seq_plus_len(ntohl(th->seq), skb->len, |
1100 | dataoff, th); | 1099 | dataoff, th); |
1101 | ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | 1100 | ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); |
1102 | if (ct->proto.tcp.seen[0].td_maxwin == 0) | 1101 | if (ct->proto.tcp.seen[0].td_maxwin == 0) |
1103 | ct->proto.tcp.seen[0].td_maxwin = 1; | 1102 | ct->proto.tcp.seen[0].td_maxwin = 1; |
1104 | ct->proto.tcp.seen[0].td_maxend = | 1103 | ct->proto.tcp.seen[0].td_maxend = |
1105 | ct->proto.tcp.seen[0].td_end; | 1104 | ct->proto.tcp.seen[0].td_end; |
1106 | 1105 | ||
1107 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); | 1106 | tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); |
1108 | } else if (tn->tcp_loose == 0) { | 1107 | } else if (tn->tcp_loose == 0) { |
1109 | /* Don't try to pick up connections. */ | 1108 | /* Don't try to pick up connections. */ |
1110 | return false; | 1109 | return false; |
1111 | } else { | 1110 | } else { |
1112 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); | 1111 | memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); |
1113 | /* | 1112 | /* |
1114 | * We are in the middle of a connection, | 1113 | * We are in the middle of a connection, |
1115 | * its history is lost for us. | 1114 | * its history is lost for us. |
1116 | * Let's try to use the data from the packet. | 1115 | * Let's try to use the data from the packet. |
1117 | */ | 1116 | */ |
1118 | ct->proto.tcp.seen[0].td_end = | 1117 | ct->proto.tcp.seen[0].td_end = |
1119 | segment_seq_plus_len(ntohl(th->seq), skb->len, | 1118 | segment_seq_plus_len(ntohl(th->seq), skb->len, |
1120 | dataoff, th); | 1119 | dataoff, th); |
1121 | ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); | 1120 | ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); |
1122 | if (ct->proto.tcp.seen[0].td_maxwin == 0) | 1121 | if (ct->proto.tcp.seen[0].td_maxwin == 0) |
1123 | ct->proto.tcp.seen[0].td_maxwin = 1; | 1122 | ct->proto.tcp.seen[0].td_maxwin = 1; |
1124 | ct->proto.tcp.seen[0].td_maxend = | 1123 | ct->proto.tcp.seen[0].td_maxend = |
1125 | ct->proto.tcp.seen[0].td_end + | 1124 | ct->proto.tcp.seen[0].td_end + |
1126 | ct->proto.tcp.seen[0].td_maxwin; | 1125 | ct->proto.tcp.seen[0].td_maxwin; |
1127 | 1126 | ||
1128 | /* We assume SACK and liberal window checking to handle | 1127 | /* We assume SACK and liberal window checking to handle |
1129 | * window scaling */ | 1128 | * window scaling */ |
1130 | ct->proto.tcp.seen[0].flags = | 1129 | ct->proto.tcp.seen[0].flags = |
1131 | ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | | 1130 | ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | |
1132 | IP_CT_TCP_FLAG_BE_LIBERAL; | 1131 | IP_CT_TCP_FLAG_BE_LIBERAL; |
1133 | } | 1132 | } |
1134 | 1133 | ||
1135 | /* tcp_packet will set them */ | 1134 | /* tcp_packet will set them */ |
1136 | ct->proto.tcp.last_index = TCP_NONE_SET; | 1135 | ct->proto.tcp.last_index = TCP_NONE_SET; |
1137 | 1136 | ||
1138 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " | 1137 | pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " |
1139 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", | 1138 | "receiver end=%u maxend=%u maxwin=%u scale=%i\n", |
1140 | sender->td_end, sender->td_maxend, sender->td_maxwin, | 1139 | sender->td_end, sender->td_maxend, sender->td_maxwin, |
1141 | sender->td_scale, | 1140 | sender->td_scale, |
1142 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, | 1141 | receiver->td_end, receiver->td_maxend, receiver->td_maxwin, |
1143 | receiver->td_scale); | 1142 | receiver->td_scale); |
1144 | return true; | 1143 | return true; |
1145 | } | 1144 | } |
1146 | 1145 | ||
1147 | static bool tcp_can_early_drop(const struct nf_conn *ct) | 1146 | static bool tcp_can_early_drop(const struct nf_conn *ct) |
1148 | { | 1147 | { |
1149 | switch (ct->proto.tcp.state) { | 1148 | switch (ct->proto.tcp.state) { |
1150 | case TCP_CONNTRACK_FIN_WAIT: | 1149 | case TCP_CONNTRACK_FIN_WAIT: |
1151 | case TCP_CONNTRACK_LAST_ACK: | 1150 | case TCP_CONNTRACK_LAST_ACK: |
1152 | case TCP_CONNTRACK_TIME_WAIT: | 1151 | case TCP_CONNTRACK_TIME_WAIT: |
1153 | case TCP_CONNTRACK_CLOSE: | 1152 | case TCP_CONNTRACK_CLOSE: |
1154 | case TCP_CONNTRACK_CLOSE_WAIT: | 1153 | case TCP_CONNTRACK_CLOSE_WAIT: |
1155 | return true; | 1154 | return true; |
1156 | default: | 1155 | default: |
1157 | break; | 1156 | break; |
1158 | } | 1157 | } |
1159 | 1158 | ||
1160 | return false; | 1159 | return false; |
1161 | } | 1160 | } |
1162 | 1161 | ||
1163 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 1162 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
1164 | 1163 | ||
1165 | #include <linux/netfilter/nfnetlink.h> | 1164 | #include <linux/netfilter/nfnetlink.h> |
1166 | #include <linux/netfilter/nfnetlink_conntrack.h> | 1165 | #include <linux/netfilter/nfnetlink_conntrack.h> |
1167 | 1166 | ||
1168 | static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, | 1167 | static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, |
1169 | struct nf_conn *ct) | 1168 | struct nf_conn *ct) |
1170 | { | 1169 | { |
1171 | struct nlattr *nest_parms; | 1170 | struct nlattr *nest_parms; |
1172 | struct nf_ct_tcp_flags tmp = {}; | 1171 | struct nf_ct_tcp_flags tmp = {}; |
1173 | 1172 | ||
1174 | spin_lock_bh(&ct->lock); | 1173 | spin_lock_bh(&ct->lock); |
1175 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); | 1174 | nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); |
1176 | if (!nest_parms) | 1175 | if (!nest_parms) |
1177 | goto nla_put_failure; | 1176 | goto nla_put_failure; |
1178 | 1177 | ||
1179 | if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) || | 1178 | if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) || |
1180 | nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, | 1179 | nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, |
1181 | ct->proto.tcp.seen[0].td_scale) || | 1180 | ct->proto.tcp.seen[0].td_scale) || |
1182 | nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, | 1181 | nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, |
1183 | ct->proto.tcp.seen[1].td_scale)) | 1182 | ct->proto.tcp.seen[1].td_scale)) |
1184 | goto nla_put_failure; | 1183 | goto nla_put_failure; |
1185 | 1184 | ||
1186 | tmp.flags = ct->proto.tcp.seen[0].flags; | 1185 | tmp.flags = ct->proto.tcp.seen[0].flags; |
1187 | if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, | 1186 | if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, |
1188 | sizeof(struct nf_ct_tcp_flags), &tmp)) | 1187 | sizeof(struct nf_ct_tcp_flags), &tmp)) |
1189 | goto nla_put_failure; | 1188 | goto nla_put_failure; |
1190 | 1189 | ||
1191 | tmp.flags = ct->proto.tcp.seen[1].flags; | 1190 | tmp.flags = ct->proto.tcp.seen[1].flags; |
1192 | if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, | 1191 | if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, |
1193 | sizeof(struct nf_ct_tcp_flags), &tmp)) | 1192 | sizeof(struct nf_ct_tcp_flags), &tmp)) |
1194 | goto nla_put_failure; | 1193 | goto nla_put_failure; |
1195 | spin_unlock_bh(&ct->lock); | 1194 | spin_unlock_bh(&ct->lock); |
1196 | 1195 | ||
1197 | nla_nest_end(skb, nest_parms); | 1196 | nla_nest_end(skb, nest_parms); |
1198 | 1197 | ||
1199 | return 0; | 1198 | return 0; |
1200 | 1199 | ||
1201 | nla_put_failure: | 1200 | nla_put_failure: |
1202 | spin_unlock_bh(&ct->lock); | 1201 | spin_unlock_bh(&ct->lock); |
1203 | return -1; | 1202 | return -1; |
1204 | } | 1203 | } |
1205 | 1204 | ||
1206 | static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { | 1205 | static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { |
1207 | [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, | 1206 | [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, |
1208 | [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, | 1207 | [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, |
1209 | [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, | 1208 | [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, |
1210 | [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, | 1209 | [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, |
1211 | [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, | 1210 | [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, |
1212 | }; | 1211 | }; |
1213 | 1212 | ||
1214 | #define TCP_NLATTR_SIZE ( \ | 1213 | #define TCP_NLATTR_SIZE ( \ |
1215 | NLA_ALIGN(NLA_HDRLEN + 1) + \ | 1214 | NLA_ALIGN(NLA_HDRLEN + 1) + \ |
1216 | NLA_ALIGN(NLA_HDRLEN + 1) + \ | 1215 | NLA_ALIGN(NLA_HDRLEN + 1) + \ |
1217 | NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ | 1216 | NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ |
1218 | NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) | 1217 | NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) |
1219 | 1218 | ||
1220 | static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) | 1219 | static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) |
1221 | { | 1220 | { |
1222 | struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; | 1221 | struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; |
1223 | struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; | 1222 | struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; |
1224 | int err; | 1223 | int err; |
1225 | 1224 | ||
1226 | /* updates could not contain anything about the private | 1225 | /* updates could not contain anything about the private |
1227 | * protocol info, in that case skip the parsing */ | 1226 | * protocol info, in that case skip the parsing */ |
1228 | if (!pattr) | 1227 | if (!pattr) |
1229 | return 0; | 1228 | return 0; |
1230 | 1229 | ||
1231 | err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, | 1230 | err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, |
1232 | tcp_nla_policy, NULL); | 1231 | tcp_nla_policy, NULL); |
1233 | if (err < 0) | 1232 | if (err < 0) |
1234 | return err; | 1233 | return err; |
1235 | 1234 | ||
1236 | if (tb[CTA_PROTOINFO_TCP_STATE] && | 1235 | if (tb[CTA_PROTOINFO_TCP_STATE] && |
1237 | nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) | 1236 | nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) |
1238 | return -EINVAL; | 1237 | return -EINVAL; |
1239 | 1238 | ||
1240 | spin_lock_bh(&ct->lock); | 1239 | spin_lock_bh(&ct->lock); |
1241 | if (tb[CTA_PROTOINFO_TCP_STATE]) | 1240 | if (tb[CTA_PROTOINFO_TCP_STATE]) |
1242 | ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); | 1241 | ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); |
1243 | 1242 | ||
1244 | if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { | 1243 | if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { |
1245 | struct nf_ct_tcp_flags *attr = | 1244 | struct nf_ct_tcp_flags *attr = |
1246 | nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); | 1245 | nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); |
1247 | ct->proto.tcp.seen[0].flags &= ~attr->mask; | 1246 | ct->proto.tcp.seen[0].flags &= ~attr->mask; |
1248 | ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; | 1247 | ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; |
1249 | } | 1248 | } |
1250 | 1249 | ||
1251 | if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { | 1250 | if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { |
1252 | struct nf_ct_tcp_flags *attr = | 1251 | struct nf_ct_tcp_flags *attr = |
1253 | nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); | 1252 | nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); |
1254 | ct->proto.tcp.seen[1].flags &= ~attr->mask; | 1253 | ct->proto.tcp.seen[1].flags &= ~attr->mask; |
1255 | ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; | 1254 | ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; |
1256 | } | 1255 | } |
1257 | 1256 | ||
1258 | if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && | 1257 | if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && |
1259 | tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && | 1258 | tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && |
1260 | ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && | 1259 | ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && |
1261 | ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { | 1260 | ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { |
1262 | ct->proto.tcp.seen[0].td_scale = | 1261 | ct->proto.tcp.seen[0].td_scale = |
1263 | nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); | 1262 | nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); |
1264 | ct->proto.tcp.seen[1].td_scale = | 1263 | ct->proto.tcp.seen[1].td_scale = |
1265 | nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); | 1264 | nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); |
1266 | } | 1265 | } |
1267 | spin_unlock_bh(&ct->lock); | 1266 | spin_unlock_bh(&ct->lock); |
1268 | 1267 | ||
1269 | return 0; | 1268 | return 0; |
1270 | } | 1269 | } |
1271 | 1270 | ||
1272 | static unsigned int tcp_nlattr_tuple_size(void) | 1271 | static unsigned int tcp_nlattr_tuple_size(void) |
1273 | { | 1272 | { |
1274 | static unsigned int size __read_mostly; | 1273 | static unsigned int size __read_mostly; |
1275 | 1274 | ||
1276 | if (!size) | 1275 | if (!size) |
1277 | size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); | 1276 | size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); |
1278 | 1277 | ||
1279 | return size; | 1278 | return size; |
1280 | } | 1279 | } |
1281 | #endif | 1280 | #endif |
1282 | 1281 | ||
1283 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 1282 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
1284 | 1283 | ||
1285 | #include <linux/netfilter/nfnetlink.h> | 1284 | #include <linux/netfilter/nfnetlink.h> |
1286 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 1285 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
1287 | 1286 | ||
1288 | static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], | 1287 | static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], |
1289 | struct net *net, void *data) | 1288 | struct net *net, void *data) |
1290 | { | 1289 | { |
1291 | unsigned int *timeouts = data; | ||
1292 | struct nf_tcp_net *tn = tcp_pernet(net); | 1290 | struct nf_tcp_net *tn = tcp_pernet(net); |
1291 | unsigned int *timeouts = data; | ||
1293 | int i; | 1292 | int i; |
1294 | 1293 | ||
1294 | if (!timeouts) | ||
1295 | timeouts = tn->timeouts; | ||
1295 | /* set default TCP timeouts. */ | 1296 | /* set default TCP timeouts. */ |
1296 | for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) | 1297 | for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) |
1297 | timeouts[i] = tn->timeouts[i]; | 1298 | timeouts[i] = tn->timeouts[i]; |
1298 | 1299 | ||
1299 | if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { | 1300 | if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { |
1300 | timeouts[TCP_CONNTRACK_SYN_SENT] = | 1301 | timeouts[TCP_CONNTRACK_SYN_SENT] = |
1301 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; | 1302 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; |
1302 | } | 1303 | } |
1303 | if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { | 1304 | if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { |
1304 | timeouts[TCP_CONNTRACK_SYN_RECV] = | 1305 | timeouts[TCP_CONNTRACK_SYN_RECV] = |
1305 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; | 1306 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; |
1306 | } | 1307 | } |
1307 | if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) { | 1308 | if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) { |
1308 | timeouts[TCP_CONNTRACK_ESTABLISHED] = | 1309 | timeouts[TCP_CONNTRACK_ESTABLISHED] = |
1309 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ; | 1310 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ; |
1310 | } | 1311 | } |
1311 | if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) { | 1312 | if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) { |
1312 | timeouts[TCP_CONNTRACK_FIN_WAIT] = | 1313 | timeouts[TCP_CONNTRACK_FIN_WAIT] = |
1313 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ; | 1314 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ; |
1314 | } | 1315 | } |
1315 | if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) { | 1316 | if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) { |
1316 | timeouts[TCP_CONNTRACK_CLOSE_WAIT] = | 1317 | timeouts[TCP_CONNTRACK_CLOSE_WAIT] = |
1317 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ; | 1318 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ; |
1318 | } | 1319 | } |
1319 | if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) { | 1320 | if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) { |
1320 | timeouts[TCP_CONNTRACK_LAST_ACK] = | 1321 | timeouts[TCP_CONNTRACK_LAST_ACK] = |
1321 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ; | 1322 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ; |
1322 | } | 1323 | } |
1323 | if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) { | 1324 | if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) { |
1324 | timeouts[TCP_CONNTRACK_TIME_WAIT] = | 1325 | timeouts[TCP_CONNTRACK_TIME_WAIT] = |
1325 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ; | 1326 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ; |
1326 | } | 1327 | } |
1327 | if (tb[CTA_TIMEOUT_TCP_CLOSE]) { | 1328 | if (tb[CTA_TIMEOUT_TCP_CLOSE]) { |
1328 | timeouts[TCP_CONNTRACK_CLOSE] = | 1329 | timeouts[TCP_CONNTRACK_CLOSE] = |
1329 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ; | 1330 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ; |
1330 | } | 1331 | } |
1331 | if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) { | 1332 | if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) { |
1332 | timeouts[TCP_CONNTRACK_SYN_SENT2] = | 1333 | timeouts[TCP_CONNTRACK_SYN_SENT2] = |
1333 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ; | 1334 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ; |
1334 | } | 1335 | } |
1335 | if (tb[CTA_TIMEOUT_TCP_RETRANS]) { | 1336 | if (tb[CTA_TIMEOUT_TCP_RETRANS]) { |
1336 | timeouts[TCP_CONNTRACK_RETRANS] = | 1337 | timeouts[TCP_CONNTRACK_RETRANS] = |
1337 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ; | 1338 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ; |
1338 | } | 1339 | } |
1339 | if (tb[CTA_TIMEOUT_TCP_UNACK]) { | 1340 | if (tb[CTA_TIMEOUT_TCP_UNACK]) { |
1340 | timeouts[TCP_CONNTRACK_UNACK] = | 1341 | timeouts[TCP_CONNTRACK_UNACK] = |
1341 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ; | 1342 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ; |
1342 | } | 1343 | } |
1343 | return 0; | 1344 | return 0; |
1344 | } | 1345 | } |
1345 | 1346 | ||
1346 | static int | 1347 | static int |
1347 | tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 1348 | tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
1348 | { | 1349 | { |
1349 | const unsigned int *timeouts = data; | 1350 | const unsigned int *timeouts = data; |
1350 | 1351 | ||
1351 | if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT, | 1352 | if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT, |
1352 | htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) || | 1353 | htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) || |
1353 | nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV, | 1354 | nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV, |
1354 | htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) || | 1355 | htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) || |
1355 | nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, | 1356 | nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, |
1356 | htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) || | 1357 | htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) || |
1357 | nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, | 1358 | nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, |
1358 | htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) || | 1359 | htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) || |
1359 | nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, | 1360 | nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, |
1360 | htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) || | 1361 | htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) || |
1361 | nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK, | 1362 | nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK, |
1362 | htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) || | 1363 | htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) || |
1363 | nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, | 1364 | nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, |
1364 | htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) || | 1365 | htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) || |
1365 | nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE, | 1366 | nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE, |
1366 | htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) || | 1367 | htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) || |
1367 | nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, | 1368 | nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, |
1368 | htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) || | 1369 | htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) || |
1369 | nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS, | 1370 | nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS, |
1370 | htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) || | 1371 | htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) || |
1371 | nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK, | 1372 | nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK, |
1372 | htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ))) | 1373 | htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ))) |
1373 | goto nla_put_failure; | 1374 | goto nla_put_failure; |
1374 | return 0; | 1375 | return 0; |
1375 | 1376 | ||
1376 | nla_put_failure: | 1377 | nla_put_failure: |
1377 | return -ENOSPC; | 1378 | return -ENOSPC; |
1378 | } | 1379 | } |
1379 | 1380 | ||
1380 | static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { | 1381 | static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { |
1381 | [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 }, | 1382 | [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 }, |
1382 | [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 }, | 1383 | [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 }, |
1383 | [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 }, | 1384 | [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 }, |
1384 | [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 }, | 1385 | [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 }, |
1385 | [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 }, | 1386 | [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 }, |
1386 | [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 }, | 1387 | [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 }, |
1387 | [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, | 1388 | [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, |
1388 | [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, | 1389 | [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, |
1389 | [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, | 1390 | [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, |
1390 | [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, | 1391 | [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, |
1391 | [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, | 1392 | [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, |
1392 | }; | 1393 | }; |
1393 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 1394 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
1394 | 1395 | ||
1395 | #ifdef CONFIG_SYSCTL | 1396 | #ifdef CONFIG_SYSCTL |
1396 | static struct ctl_table tcp_sysctl_table[] = { | 1397 | static struct ctl_table tcp_sysctl_table[] = { |
1397 | { | 1398 | { |
1398 | .procname = "nf_conntrack_tcp_timeout_syn_sent", | 1399 | .procname = "nf_conntrack_tcp_timeout_syn_sent", |
1399 | .maxlen = sizeof(unsigned int), | 1400 | .maxlen = sizeof(unsigned int), |
1400 | .mode = 0644, | 1401 | .mode = 0644, |
1401 | .proc_handler = proc_dointvec_jiffies, | 1402 | .proc_handler = proc_dointvec_jiffies, |
1402 | }, | 1403 | }, |
1403 | { | 1404 | { |
1404 | .procname = "nf_conntrack_tcp_timeout_syn_recv", | 1405 | .procname = "nf_conntrack_tcp_timeout_syn_recv", |
1405 | .maxlen = sizeof(unsigned int), | 1406 | .maxlen = sizeof(unsigned int), |
1406 | .mode = 0644, | 1407 | .mode = 0644, |
1407 | .proc_handler = proc_dointvec_jiffies, | 1408 | .proc_handler = proc_dointvec_jiffies, |
1408 | }, | 1409 | }, |
1409 | { | 1410 | { |
1410 | .procname = "nf_conntrack_tcp_timeout_established", | 1411 | .procname = "nf_conntrack_tcp_timeout_established", |
1411 | .maxlen = sizeof(unsigned int), | 1412 | .maxlen = sizeof(unsigned int), |
1412 | .mode = 0644, | 1413 | .mode = 0644, |
1413 | .proc_handler = proc_dointvec_jiffies, | 1414 | .proc_handler = proc_dointvec_jiffies, |
1414 | }, | 1415 | }, |
1415 | { | 1416 | { |
1416 | .procname = "nf_conntrack_tcp_timeout_fin_wait", | 1417 | .procname = "nf_conntrack_tcp_timeout_fin_wait", |
1417 | .maxlen = sizeof(unsigned int), | 1418 | .maxlen = sizeof(unsigned int), |
1418 | .mode = 0644, | 1419 | .mode = 0644, |
1419 | .proc_handler = proc_dointvec_jiffies, | 1420 | .proc_handler = proc_dointvec_jiffies, |
1420 | }, | 1421 | }, |
1421 | { | 1422 | { |
1422 | .procname = "nf_conntrack_tcp_timeout_close_wait", | 1423 | .procname = "nf_conntrack_tcp_timeout_close_wait", |
1423 | .maxlen = sizeof(unsigned int), | 1424 | .maxlen = sizeof(unsigned int), |
1424 | .mode = 0644, | 1425 | .mode = 0644, |
1425 | .proc_handler = proc_dointvec_jiffies, | 1426 | .proc_handler = proc_dointvec_jiffies, |
1426 | }, | 1427 | }, |
1427 | { | 1428 | { |
1428 | .procname = "nf_conntrack_tcp_timeout_last_ack", | 1429 | .procname = "nf_conntrack_tcp_timeout_last_ack", |
1429 | .maxlen = sizeof(unsigned int), | 1430 | .maxlen = sizeof(unsigned int), |
1430 | .mode = 0644, | 1431 | .mode = 0644, |
1431 | .proc_handler = proc_dointvec_jiffies, | 1432 | .proc_handler = proc_dointvec_jiffies, |
1432 | }, | 1433 | }, |
1433 | { | 1434 | { |
1434 | .procname = "nf_conntrack_tcp_timeout_time_wait", | 1435 | .procname = "nf_conntrack_tcp_timeout_time_wait", |
1435 | .maxlen = sizeof(unsigned int), | 1436 | .maxlen = sizeof(unsigned int), |
1436 | .mode = 0644, | 1437 | .mode = 0644, |
1437 | .proc_handler = proc_dointvec_jiffies, | 1438 | .proc_handler = proc_dointvec_jiffies, |
1438 | }, | 1439 | }, |
1439 | { | 1440 | { |
1440 | .procname = "nf_conntrack_tcp_timeout_close", | 1441 | .procname = "nf_conntrack_tcp_timeout_close", |
1441 | .maxlen = sizeof(unsigned int), | 1442 | .maxlen = sizeof(unsigned int), |
1442 | .mode = 0644, | 1443 | .mode = 0644, |
1443 | .proc_handler = proc_dointvec_jiffies, | 1444 | .proc_handler = proc_dointvec_jiffies, |
1444 | }, | 1445 | }, |
1445 | { | 1446 | { |
1446 | .procname = "nf_conntrack_tcp_timeout_max_retrans", | 1447 | .procname = "nf_conntrack_tcp_timeout_max_retrans", |
1447 | .maxlen = sizeof(unsigned int), | 1448 | .maxlen = sizeof(unsigned int), |
1448 | .mode = 0644, | 1449 | .mode = 0644, |
1449 | .proc_handler = proc_dointvec_jiffies, | 1450 | .proc_handler = proc_dointvec_jiffies, |
1450 | }, | 1451 | }, |
1451 | { | 1452 | { |
1452 | .procname = "nf_conntrack_tcp_timeout_unacknowledged", | 1453 | .procname = "nf_conntrack_tcp_timeout_unacknowledged", |
1453 | .maxlen = sizeof(unsigned int), | 1454 | .maxlen = sizeof(unsigned int), |
1454 | .mode = 0644, | 1455 | .mode = 0644, |
1455 | .proc_handler = proc_dointvec_jiffies, | 1456 | .proc_handler = proc_dointvec_jiffies, |
1456 | }, | 1457 | }, |
1457 | { | 1458 | { |
1458 | .procname = "nf_conntrack_tcp_loose", | 1459 | .procname = "nf_conntrack_tcp_loose", |
1459 | .maxlen = sizeof(unsigned int), | 1460 | .maxlen = sizeof(unsigned int), |
1460 | .mode = 0644, | 1461 | .mode = 0644, |
1461 | .proc_handler = proc_dointvec, | 1462 | .proc_handler = proc_dointvec, |
1462 | }, | 1463 | }, |
1463 | { | 1464 | { |
1464 | .procname = "nf_conntrack_tcp_be_liberal", | 1465 | .procname = "nf_conntrack_tcp_be_liberal", |
1465 | .maxlen = sizeof(unsigned int), | 1466 | .maxlen = sizeof(unsigned int), |
1466 | .mode = 0644, | 1467 | .mode = 0644, |
1467 | .proc_handler = proc_dointvec, | 1468 | .proc_handler = proc_dointvec, |
1468 | }, | 1469 | }, |
1469 | { | 1470 | { |
1470 | .procname = "nf_conntrack_tcp_max_retrans", | 1471 | .procname = "nf_conntrack_tcp_max_retrans", |
1471 | .maxlen = sizeof(unsigned int), | 1472 | .maxlen = sizeof(unsigned int), |
1472 | .mode = 0644, | 1473 | .mode = 0644, |
1473 | .proc_handler = proc_dointvec, | 1474 | .proc_handler = proc_dointvec, |
1474 | }, | 1475 | }, |
1475 | { } | 1476 | { } |
1476 | }; | 1477 | }; |
1477 | #endif /* CONFIG_SYSCTL */ | 1478 | #endif /* CONFIG_SYSCTL */ |
1478 | 1479 | ||
1479 | static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, | 1480 | static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, |
1480 | struct nf_tcp_net *tn) | 1481 | struct nf_tcp_net *tn) |
1481 | { | 1482 | { |
1482 | #ifdef CONFIG_SYSCTL | 1483 | #ifdef CONFIG_SYSCTL |
1483 | if (pn->ctl_table) | 1484 | if (pn->ctl_table) |
1484 | return 0; | 1485 | return 0; |
1485 | 1486 | ||
1486 | pn->ctl_table = kmemdup(tcp_sysctl_table, | 1487 | pn->ctl_table = kmemdup(tcp_sysctl_table, |
1487 | sizeof(tcp_sysctl_table), | 1488 | sizeof(tcp_sysctl_table), |
1488 | GFP_KERNEL); | 1489 | GFP_KERNEL); |
1489 | if (!pn->ctl_table) | 1490 | if (!pn->ctl_table) |
1490 | return -ENOMEM; | 1491 | return -ENOMEM; |
1491 | 1492 | ||
1492 | pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; | 1493 | pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; |
1493 | pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; | 1494 | pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; |
1494 | pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; | 1495 | pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; |
1495 | pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; | 1496 | pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; |
1496 | pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; | 1497 | pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; |
1497 | pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; | 1498 | pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; |
1498 | pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; | 1499 | pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; |
1499 | pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; | 1500 | pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; |
1500 | pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; | 1501 | pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; |
1501 | pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; | 1502 | pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; |
1502 | pn->ctl_table[10].data = &tn->tcp_loose; | 1503 | pn->ctl_table[10].data = &tn->tcp_loose; |
1503 | pn->ctl_table[11].data = &tn->tcp_be_liberal; | 1504 | pn->ctl_table[11].data = &tn->tcp_be_liberal; |
1504 | pn->ctl_table[12].data = &tn->tcp_max_retrans; | 1505 | pn->ctl_table[12].data = &tn->tcp_max_retrans; |
1505 | #endif | 1506 | #endif |
1506 | return 0; | 1507 | return 0; |
1507 | } | 1508 | } |
1508 | 1509 | ||
1509 | static int tcp_init_net(struct net *net, u_int16_t proto) | 1510 | static int tcp_init_net(struct net *net, u_int16_t proto) |
1510 | { | 1511 | { |
1511 | struct nf_tcp_net *tn = tcp_pernet(net); | 1512 | struct nf_tcp_net *tn = tcp_pernet(net); |
1512 | struct nf_proto_net *pn = &tn->pn; | 1513 | struct nf_proto_net *pn = &tn->pn; |
1513 | 1514 | ||
1514 | if (!pn->users) { | 1515 | if (!pn->users) { |
1515 | int i; | 1516 | int i; |
1516 | 1517 | ||
1517 | for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) | 1518 | for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) |
1518 | tn->timeouts[i] = tcp_timeouts[i]; | 1519 | tn->timeouts[i] = tcp_timeouts[i]; |
1519 | 1520 | ||
1520 | tn->tcp_loose = nf_ct_tcp_loose; | 1521 | tn->tcp_loose = nf_ct_tcp_loose; |
1521 | tn->tcp_be_liberal = nf_ct_tcp_be_liberal; | 1522 | tn->tcp_be_liberal = nf_ct_tcp_be_liberal; |
1522 | tn->tcp_max_retrans = nf_ct_tcp_max_retrans; | 1523 | tn->tcp_max_retrans = nf_ct_tcp_max_retrans; |
1523 | } | 1524 | } |
1524 | 1525 | ||
1525 | return tcp_kmemdup_sysctl_table(pn, tn); | 1526 | return tcp_kmemdup_sysctl_table(pn, tn); |
1526 | } | 1527 | } |
1527 | 1528 | ||
1528 | static struct nf_proto_net *tcp_get_net_proto(struct net *net) | 1529 | static struct nf_proto_net *tcp_get_net_proto(struct net *net) |
1529 | { | 1530 | { |
1530 | return &net->ct.nf_ct_proto.tcp.pn; | 1531 | return &net->ct.nf_ct_proto.tcp.pn; |
1531 | } | 1532 | } |
1532 | 1533 | ||
1533 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = | 1534 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = |
1534 | { | 1535 | { |
1535 | .l3proto = PF_INET, | 1536 | .l3proto = PF_INET, |
1536 | .l4proto = IPPROTO_TCP, | 1537 | .l4proto = IPPROTO_TCP, |
1537 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 1538 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
1538 | .print_conntrack = tcp_print_conntrack, | 1539 | .print_conntrack = tcp_print_conntrack, |
1539 | #endif | 1540 | #endif |
1540 | .packet = tcp_packet, | 1541 | .packet = tcp_packet, |
1541 | .get_timeouts = tcp_get_timeouts, | ||
1542 | .new = tcp_new, | 1542 | .new = tcp_new, |
1543 | .error = tcp_error, | 1543 | .error = tcp_error, |
1544 | .can_early_drop = tcp_can_early_drop, | 1544 | .can_early_drop = tcp_can_early_drop, |
1545 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 1545 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
1546 | .to_nlattr = tcp_to_nlattr, | 1546 | .to_nlattr = tcp_to_nlattr, |
1547 | .from_nlattr = nlattr_to_tcp, | 1547 | .from_nlattr = nlattr_to_tcp, |
1548 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 1548 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
1549 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 1549 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
1550 | .nlattr_tuple_size = tcp_nlattr_tuple_size, | 1550 | .nlattr_tuple_size = tcp_nlattr_tuple_size, |
1551 | .nlattr_size = TCP_NLATTR_SIZE, | 1551 | .nlattr_size = TCP_NLATTR_SIZE, |
1552 | .nla_policy = nf_ct_port_nla_policy, | 1552 | .nla_policy = nf_ct_port_nla_policy, |
1553 | #endif | 1553 | #endif |
1554 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 1554 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
1555 | .ctnl_timeout = { | 1555 | .ctnl_timeout = { |
1556 | .nlattr_to_obj = tcp_timeout_nlattr_to_obj, | 1556 | .nlattr_to_obj = tcp_timeout_nlattr_to_obj, |
1557 | .obj_to_nlattr = tcp_timeout_obj_to_nlattr, | 1557 | .obj_to_nlattr = tcp_timeout_obj_to_nlattr, |
1558 | .nlattr_max = CTA_TIMEOUT_TCP_MAX, | 1558 | .nlattr_max = CTA_TIMEOUT_TCP_MAX, |
1559 | .obj_size = sizeof(unsigned int) * | 1559 | .obj_size = sizeof(unsigned int) * |
1560 | TCP_CONNTRACK_TIMEOUT_MAX, | 1560 | TCP_CONNTRACK_TIMEOUT_MAX, |
1561 | .nla_policy = tcp_timeout_nla_policy, | 1561 | .nla_policy = tcp_timeout_nla_policy, |
1562 | }, | 1562 | }, |
1563 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 1563 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
1564 | .init_net = tcp_init_net, | 1564 | .init_net = tcp_init_net, |
1565 | .get_net_proto = tcp_get_net_proto, | 1565 | .get_net_proto = tcp_get_net_proto, |
1566 | }; | 1566 | }; |
1567 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); | 1567 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); |
1568 | 1568 | ||
1569 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = | 1569 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = |
1570 | { | 1570 | { |
1571 | .l3proto = PF_INET6, | 1571 | .l3proto = PF_INET6, |
1572 | .l4proto = IPPROTO_TCP, | 1572 | .l4proto = IPPROTO_TCP, |
1573 | #ifdef CONFIG_NF_CONNTRACK_PROCFS | 1573 | #ifdef CONFIG_NF_CONNTRACK_PROCFS |
1574 | .print_conntrack = tcp_print_conntrack, | 1574 | .print_conntrack = tcp_print_conntrack, |
1575 | #endif | 1575 | #endif |
1576 | .packet = tcp_packet, | 1576 | .packet = tcp_packet, |
1577 | .get_timeouts = tcp_get_timeouts, | ||
1578 | .new = tcp_new, | 1577 | .new = tcp_new, |
1579 | .error = tcp_error, | 1578 | .error = tcp_error, |
1580 | .can_early_drop = tcp_can_early_drop, | 1579 | .can_early_drop = tcp_can_early_drop, |
1581 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 1580 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
1582 | .nlattr_size = TCP_NLATTR_SIZE, | 1581 | .nlattr_size = TCP_NLATTR_SIZE, |
1583 | .to_nlattr = tcp_to_nlattr, | 1582 | .to_nlattr = tcp_to_nlattr, |
1584 | .from_nlattr = nlattr_to_tcp, | 1583 | .from_nlattr = nlattr_to_tcp, |
1585 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 1584 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
1586 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 1585 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
1587 | .nlattr_tuple_size = tcp_nlattr_tuple_size, | 1586 | .nlattr_tuple_size = tcp_nlattr_tuple_size, |
1588 | .nla_policy = nf_ct_port_nla_policy, | 1587 | .nla_policy = nf_ct_port_nla_policy, |
1589 | #endif | 1588 | #endif |
1590 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 1589 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
1591 | .ctnl_timeout = { | 1590 | .ctnl_timeout = { |
1592 | .nlattr_to_obj = tcp_timeout_nlattr_to_obj, | 1591 | .nlattr_to_obj = tcp_timeout_nlattr_to_obj, |
1593 | .obj_to_nlattr = tcp_timeout_obj_to_nlattr, | 1592 | .obj_to_nlattr = tcp_timeout_obj_to_nlattr, |
1594 | .nlattr_max = CTA_TIMEOUT_TCP_MAX, | 1593 | .nlattr_max = CTA_TIMEOUT_TCP_MAX, |
1595 | .obj_size = sizeof(unsigned int) * | 1594 | .obj_size = sizeof(unsigned int) * |
net/netfilter/nf_conntrack_proto_udp.c
1 | /* (C) 1999-2001 Paul `Rusty' Russell | 1 | /* (C) 1999-2001 Paul `Rusty' Russell |
2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> | 2 | * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> |
3 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> | 3 | * (C) 2006-2012 Patrick McHardy <kaber@trash.net> |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
7 | * published by the Free Software Foundation. | 7 | * published by the Free Software Foundation. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/timer.h> | 11 | #include <linux/timer.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/udp.h> | 13 | #include <linux/udp.h> |
14 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
15 | #include <linux/skbuff.h> | 15 | #include <linux/skbuff.h> |
16 | #include <linux/ipv6.h> | 16 | #include <linux/ipv6.h> |
17 | #include <net/ip6_checksum.h> | 17 | #include <net/ip6_checksum.h> |
18 | #include <net/checksum.h> | 18 | #include <net/checksum.h> |
19 | 19 | ||
20 | #include <linux/netfilter.h> | 20 | #include <linux/netfilter.h> |
21 | #include <linux/netfilter_ipv4.h> | 21 | #include <linux/netfilter_ipv4.h> |
22 | #include <linux/netfilter_ipv6.h> | 22 | #include <linux/netfilter_ipv6.h> |
23 | #include <net/netfilter/nf_conntrack_l4proto.h> | 23 | #include <net/netfilter/nf_conntrack_l4proto.h> |
24 | #include <net/netfilter/nf_conntrack_ecache.h> | 24 | #include <net/netfilter/nf_conntrack_ecache.h> |
25 | #include <net/netfilter/nf_conntrack_timeout.h> | ||
25 | #include <net/netfilter/nf_log.h> | 26 | #include <net/netfilter/nf_log.h> |
26 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 27 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
27 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> | 28 | #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> |
28 | 29 | ||
29 | static const unsigned int udp_timeouts[UDP_CT_MAX] = { | 30 | static const unsigned int udp_timeouts[UDP_CT_MAX] = { |
30 | [UDP_CT_UNREPLIED] = 30*HZ, | 31 | [UDP_CT_UNREPLIED] = 30*HZ, |
31 | [UDP_CT_REPLIED] = 180*HZ, | 32 | [UDP_CT_REPLIED] = 180*HZ, |
32 | }; | 33 | }; |
33 | 34 | ||
34 | static inline struct nf_udp_net *udp_pernet(struct net *net) | 35 | static inline struct nf_udp_net *udp_pernet(struct net *net) |
35 | { | 36 | { |
36 | return &net->ct.nf_ct_proto.udp; | 37 | return &net->ct.nf_ct_proto.udp; |
37 | } | 38 | } |
38 | 39 | ||
39 | static unsigned int *udp_get_timeouts(struct net *net) | 40 | static unsigned int *udp_get_timeouts(struct net *net) |
40 | { | 41 | { |
41 | return udp_pernet(net)->timeouts; | 42 | return udp_pernet(net)->timeouts; |
42 | } | 43 | } |
43 | 44 | ||
44 | /* Returns verdict for packet, and may modify conntracktype */ | 45 | /* Returns verdict for packet, and may modify conntracktype */ |
45 | static int udp_packet(struct nf_conn *ct, | 46 | static int udp_packet(struct nf_conn *ct, |
46 | const struct sk_buff *skb, | 47 | const struct sk_buff *skb, |
47 | unsigned int dataoff, | 48 | unsigned int dataoff, |
48 | enum ip_conntrack_info ctinfo, | 49 | enum ip_conntrack_info ctinfo) |
49 | unsigned int *timeouts) | ||
50 | { | 50 | { |
51 | unsigned int *timeouts; | ||
52 | |||
53 | timeouts = nf_ct_timeout_lookup(ct); | ||
54 | if (!timeouts) | ||
55 | timeouts = udp_get_timeouts(nf_ct_net(ct)); | ||
56 | |||
51 | /* If we've seen traffic both ways, this is some kind of UDP | 57 | /* If we've seen traffic both ways, this is some kind of UDP |
52 | stream. Extend timeout. */ | 58 | stream. Extend timeout. */ |
53 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | 59 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { |
54 | nf_ct_refresh_acct(ct, ctinfo, skb, | 60 | nf_ct_refresh_acct(ct, ctinfo, skb, |
55 | timeouts[UDP_CT_REPLIED]); | 61 | timeouts[UDP_CT_REPLIED]); |
56 | /* Also, more likely to be important, and not a probe */ | 62 | /* Also, more likely to be important, and not a probe */ |
57 | if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) | 63 | if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) |
58 | nf_conntrack_event_cache(IPCT_ASSURED, ct); | 64 | nf_conntrack_event_cache(IPCT_ASSURED, ct); |
59 | } else { | 65 | } else { |
60 | nf_ct_refresh_acct(ct, ctinfo, skb, | 66 | nf_ct_refresh_acct(ct, ctinfo, skb, |
61 | timeouts[UDP_CT_UNREPLIED]); | 67 | timeouts[UDP_CT_UNREPLIED]); |
62 | } | 68 | } |
63 | return NF_ACCEPT; | 69 | return NF_ACCEPT; |
64 | } | 70 | } |
65 | 71 | ||
66 | /* Called when a new connection for this protocol found. */ | 72 | /* Called when a new connection for this protocol found. */ |
67 | static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, | 73 | static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, |
68 | unsigned int dataoff, unsigned int *timeouts) | 74 | unsigned int dataoff) |
69 | { | 75 | { |
70 | return true; | 76 | return true; |
71 | } | 77 | } |
72 | 78 | ||
73 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE | 79 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE |
74 | static void udplite_error_log(const struct sk_buff *skb, struct net *net, | 80 | static void udplite_error_log(const struct sk_buff *skb, struct net *net, |
75 | u8 pf, const char *msg) | 81 | u8 pf, const char *msg) |
76 | { | 82 | { |
77 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); | 83 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); |
78 | } | 84 | } |
79 | 85 | ||
80 | static int udplite_error(struct net *net, struct nf_conn *tmpl, | 86 | static int udplite_error(struct net *net, struct nf_conn *tmpl, |
81 | struct sk_buff *skb, | 87 | struct sk_buff *skb, |
82 | unsigned int dataoff, | 88 | unsigned int dataoff, |
83 | u8 pf, unsigned int hooknum) | 89 | u8 pf, unsigned int hooknum) |
84 | { | 90 | { |
85 | unsigned int udplen = skb->len - dataoff; | 91 | unsigned int udplen = skb->len - dataoff; |
86 | const struct udphdr *hdr; | 92 | const struct udphdr *hdr; |
87 | struct udphdr _hdr; | 93 | struct udphdr _hdr; |
88 | unsigned int cscov; | 94 | unsigned int cscov; |
89 | 95 | ||
90 | /* Header is too small? */ | 96 | /* Header is too small? */ |
91 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 97 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
92 | if (!hdr) { | 98 | if (!hdr) { |
93 | udplite_error_log(skb, net, pf, "short packet"); | 99 | udplite_error_log(skb, net, pf, "short packet"); |
94 | return -NF_ACCEPT; | 100 | return -NF_ACCEPT; |
95 | } | 101 | } |
96 | 102 | ||
97 | cscov = ntohs(hdr->len); | 103 | cscov = ntohs(hdr->len); |
98 | if (cscov == 0) { | 104 | if (cscov == 0) { |
99 | cscov = udplen; | 105 | cscov = udplen; |
100 | } else if (cscov < sizeof(*hdr) || cscov > udplen) { | 106 | } else if (cscov < sizeof(*hdr) || cscov > udplen) { |
101 | udplite_error_log(skb, net, pf, "invalid checksum coverage"); | 107 | udplite_error_log(skb, net, pf, "invalid checksum coverage"); |
102 | return -NF_ACCEPT; | 108 | return -NF_ACCEPT; |
103 | } | 109 | } |
104 | 110 | ||
105 | /* UDPLITE mandates checksums */ | 111 | /* UDPLITE mandates checksums */ |
106 | if (!hdr->check) { | 112 | if (!hdr->check) { |
107 | udplite_error_log(skb, net, pf, "checksum missing"); | 113 | udplite_error_log(skb, net, pf, "checksum missing"); |
108 | return -NF_ACCEPT; | 114 | return -NF_ACCEPT; |
109 | } | 115 | } |
110 | 116 | ||
111 | /* Checksum invalid? Ignore. */ | 117 | /* Checksum invalid? Ignore. */ |
112 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 118 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
113 | nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, | 119 | nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, |
114 | pf)) { | 120 | pf)) { |
115 | udplite_error_log(skb, net, pf, "bad checksum"); | 121 | udplite_error_log(skb, net, pf, "bad checksum"); |
116 | return -NF_ACCEPT; | 122 | return -NF_ACCEPT; |
117 | } | 123 | } |
118 | 124 | ||
119 | return NF_ACCEPT; | 125 | return NF_ACCEPT; |
120 | } | 126 | } |
121 | #endif | 127 | #endif |
122 | 128 | ||
123 | static void udp_error_log(const struct sk_buff *skb, struct net *net, | 129 | static void udp_error_log(const struct sk_buff *skb, struct net *net, |
124 | u8 pf, const char *msg) | 130 | u8 pf, const char *msg) |
125 | { | 131 | { |
126 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); | 132 | nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); |
127 | } | 133 | } |
128 | 134 | ||
129 | static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, | 135 | static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, |
130 | unsigned int dataoff, | 136 | unsigned int dataoff, |
131 | u_int8_t pf, | 137 | u_int8_t pf, |
132 | unsigned int hooknum) | 138 | unsigned int hooknum) |
133 | { | 139 | { |
134 | unsigned int udplen = skb->len - dataoff; | 140 | unsigned int udplen = skb->len - dataoff; |
135 | const struct udphdr *hdr; | 141 | const struct udphdr *hdr; |
136 | struct udphdr _hdr; | 142 | struct udphdr _hdr; |
137 | 143 | ||
138 | /* Header is too small? */ | 144 | /* Header is too small? */ |
139 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 145 | hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
140 | if (hdr == NULL) { | 146 | if (hdr == NULL) { |
141 | udp_error_log(skb, net, pf, "short packet"); | 147 | udp_error_log(skb, net, pf, "short packet"); |
142 | return -NF_ACCEPT; | 148 | return -NF_ACCEPT; |
143 | } | 149 | } |
144 | 150 | ||
145 | /* Truncated/malformed packets */ | 151 | /* Truncated/malformed packets */ |
146 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { | 152 | if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { |
147 | udp_error_log(skb, net, pf, "truncated/malformed packet"); | 153 | udp_error_log(skb, net, pf, "truncated/malformed packet"); |
148 | return -NF_ACCEPT; | 154 | return -NF_ACCEPT; |
149 | } | 155 | } |
150 | 156 | ||
151 | /* Packet with no checksum */ | 157 | /* Packet with no checksum */ |
152 | if (!hdr->check) | 158 | if (!hdr->check) |
153 | return NF_ACCEPT; | 159 | return NF_ACCEPT; |
154 | 160 | ||
155 | /* Checksum invalid? Ignore. | 161 | /* Checksum invalid? Ignore. |
156 | * We skip checking packets on the outgoing path | 162 | * We skip checking packets on the outgoing path |
157 | * because the checksum is assumed to be correct. | 163 | * because the checksum is assumed to be correct. |
158 | * FIXME: Source route IP option packets --RR */ | 164 | * FIXME: Source route IP option packets --RR */ |
159 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 165 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
160 | nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { | 166 | nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { |
161 | udp_error_log(skb, net, pf, "bad checksum"); | 167 | udp_error_log(skb, net, pf, "bad checksum"); |
162 | return -NF_ACCEPT; | 168 | return -NF_ACCEPT; |
163 | } | 169 | } |
164 | 170 | ||
165 | return NF_ACCEPT; | 171 | return NF_ACCEPT; |
166 | } | 172 | } |
167 | 173 | ||
168 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 174 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
169 | 175 | ||
170 | #include <linux/netfilter/nfnetlink.h> | 176 | #include <linux/netfilter/nfnetlink.h> |
171 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 177 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
172 | 178 | ||
173 | static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], | 179 | static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], |
174 | struct net *net, void *data) | 180 | struct net *net, void *data) |
175 | { | 181 | { |
176 | unsigned int *timeouts = data; | 182 | unsigned int *timeouts = data; |
177 | struct nf_udp_net *un = udp_pernet(net); | 183 | struct nf_udp_net *un = udp_pernet(net); |
178 | 184 | ||
185 | if (!timeouts) | ||
186 | timeouts = un->timeouts; | ||
187 | |||
179 | /* set default timeouts for UDP. */ | 188 | /* set default timeouts for UDP. */ |
180 | timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; | 189 | timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; |
181 | timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; | 190 | timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; |
182 | 191 | ||
183 | if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { | 192 | if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { |
184 | timeouts[UDP_CT_UNREPLIED] = | 193 | timeouts[UDP_CT_UNREPLIED] = |
185 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; | 194 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; |
186 | } | 195 | } |
187 | if (tb[CTA_TIMEOUT_UDP_REPLIED]) { | 196 | if (tb[CTA_TIMEOUT_UDP_REPLIED]) { |
188 | timeouts[UDP_CT_REPLIED] = | 197 | timeouts[UDP_CT_REPLIED] = |
189 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; | 198 | ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; |
190 | } | 199 | } |
191 | return 0; | 200 | return 0; |
192 | } | 201 | } |
193 | 202 | ||
194 | static int | 203 | static int |
195 | udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) | 204 | udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) |
196 | { | 205 | { |
197 | const unsigned int *timeouts = data; | 206 | const unsigned int *timeouts = data; |
198 | 207 | ||
199 | if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED, | 208 | if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED, |
200 | htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) || | 209 | htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) || |
201 | nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED, | 210 | nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED, |
202 | htonl(timeouts[UDP_CT_REPLIED] / HZ))) | 211 | htonl(timeouts[UDP_CT_REPLIED] / HZ))) |
203 | goto nla_put_failure; | 212 | goto nla_put_failure; |
204 | return 0; | 213 | return 0; |
205 | 214 | ||
206 | nla_put_failure: | 215 | nla_put_failure: |
207 | return -ENOSPC; | 216 | return -ENOSPC; |
208 | } | 217 | } |
209 | 218 | ||
210 | static const struct nla_policy | 219 | static const struct nla_policy |
211 | udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { | 220 | udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { |
212 | [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, | 221 | [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, |
213 | [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, | 222 | [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, |
214 | }; | 223 | }; |
215 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 224 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
216 | 225 | ||
217 | #ifdef CONFIG_SYSCTL | 226 | #ifdef CONFIG_SYSCTL |
218 | static struct ctl_table udp_sysctl_table[] = { | 227 | static struct ctl_table udp_sysctl_table[] = { |
219 | { | 228 | { |
220 | .procname = "nf_conntrack_udp_timeout", | 229 | .procname = "nf_conntrack_udp_timeout", |
221 | .maxlen = sizeof(unsigned int), | 230 | .maxlen = sizeof(unsigned int), |
222 | .mode = 0644, | 231 | .mode = 0644, |
223 | .proc_handler = proc_dointvec_jiffies, | 232 | .proc_handler = proc_dointvec_jiffies, |
224 | }, | 233 | }, |
225 | { | 234 | { |
226 | .procname = "nf_conntrack_udp_timeout_stream", | 235 | .procname = "nf_conntrack_udp_timeout_stream", |
227 | .maxlen = sizeof(unsigned int), | 236 | .maxlen = sizeof(unsigned int), |
228 | .mode = 0644, | 237 | .mode = 0644, |
229 | .proc_handler = proc_dointvec_jiffies, | 238 | .proc_handler = proc_dointvec_jiffies, |
230 | }, | 239 | }, |
231 | { } | 240 | { } |
232 | }; | 241 | }; |
233 | #endif /* CONFIG_SYSCTL */ | 242 | #endif /* CONFIG_SYSCTL */ |
234 | 243 | ||
235 | static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, | 244 | static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, |
236 | struct nf_udp_net *un) | 245 | struct nf_udp_net *un) |
237 | { | 246 | { |
238 | #ifdef CONFIG_SYSCTL | 247 | #ifdef CONFIG_SYSCTL |
239 | if (pn->ctl_table) | 248 | if (pn->ctl_table) |
240 | return 0; | 249 | return 0; |
241 | pn->ctl_table = kmemdup(udp_sysctl_table, | 250 | pn->ctl_table = kmemdup(udp_sysctl_table, |
242 | sizeof(udp_sysctl_table), | 251 | sizeof(udp_sysctl_table), |
243 | GFP_KERNEL); | 252 | GFP_KERNEL); |
244 | if (!pn->ctl_table) | 253 | if (!pn->ctl_table) |
245 | return -ENOMEM; | 254 | return -ENOMEM; |
246 | pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; | 255 | pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; |
247 | pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; | 256 | pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; |
248 | #endif | 257 | #endif |
249 | return 0; | 258 | return 0; |
250 | } | 259 | } |
251 | 260 | ||
252 | static int udp_init_net(struct net *net, u_int16_t proto) | 261 | static int udp_init_net(struct net *net, u_int16_t proto) |
253 | { | 262 | { |
254 | struct nf_udp_net *un = udp_pernet(net); | 263 | struct nf_udp_net *un = udp_pernet(net); |
255 | struct nf_proto_net *pn = &un->pn; | 264 | struct nf_proto_net *pn = &un->pn; |
256 | 265 | ||
257 | if (!pn->users) { | 266 | if (!pn->users) { |
258 | int i; | 267 | int i; |
259 | 268 | ||
260 | for (i = 0; i < UDP_CT_MAX; i++) | 269 | for (i = 0; i < UDP_CT_MAX; i++) |
261 | un->timeouts[i] = udp_timeouts[i]; | 270 | un->timeouts[i] = udp_timeouts[i]; |
262 | } | 271 | } |
263 | 272 | ||
264 | return udp_kmemdup_sysctl_table(pn, un); | 273 | return udp_kmemdup_sysctl_table(pn, un); |
265 | } | 274 | } |
266 | 275 | ||
267 | static struct nf_proto_net *udp_get_net_proto(struct net *net) | 276 | static struct nf_proto_net *udp_get_net_proto(struct net *net) |
268 | { | 277 | { |
269 | return &net->ct.nf_ct_proto.udp.pn; | 278 | return &net->ct.nf_ct_proto.udp.pn; |
270 | } | 279 | } |
271 | 280 | ||
272 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = | 281 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = |
273 | { | 282 | { |
274 | .l3proto = PF_INET, | 283 | .l3proto = PF_INET, |
275 | .l4proto = IPPROTO_UDP, | 284 | .l4proto = IPPROTO_UDP, |
276 | .allow_clash = true, | 285 | .allow_clash = true, |
277 | .packet = udp_packet, | 286 | .packet = udp_packet, |
278 | .get_timeouts = udp_get_timeouts, | ||
279 | .new = udp_new, | 287 | .new = udp_new, |
280 | .error = udp_error, | 288 | .error = udp_error, |
281 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 289 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
282 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 290 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
283 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 291 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
284 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 292 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
285 | .nla_policy = nf_ct_port_nla_policy, | 293 | .nla_policy = nf_ct_port_nla_policy, |
286 | #endif | 294 | #endif |
287 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 295 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
288 | .ctnl_timeout = { | 296 | .ctnl_timeout = { |
289 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, | 297 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, |
290 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, | 298 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, |
291 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, | 299 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, |
292 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, | 300 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, |
293 | .nla_policy = udp_timeout_nla_policy, | 301 | .nla_policy = udp_timeout_nla_policy, |
294 | }, | 302 | }, |
295 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 303 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
296 | .init_net = udp_init_net, | 304 | .init_net = udp_init_net, |
297 | .get_net_proto = udp_get_net_proto, | 305 | .get_net_proto = udp_get_net_proto, |
298 | }; | 306 | }; |
299 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); | 307 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); |
300 | 308 | ||
301 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE | 309 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE |
302 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = | 310 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = |
303 | { | 311 | { |
304 | .l3proto = PF_INET, | 312 | .l3proto = PF_INET, |
305 | .l4proto = IPPROTO_UDPLITE, | 313 | .l4proto = IPPROTO_UDPLITE, |
306 | .allow_clash = true, | 314 | .allow_clash = true, |
307 | .packet = udp_packet, | 315 | .packet = udp_packet, |
308 | .get_timeouts = udp_get_timeouts, | ||
309 | .new = udp_new, | 316 | .new = udp_new, |
310 | .error = udplite_error, | 317 | .error = udplite_error, |
311 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 318 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
312 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 319 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
313 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 320 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
314 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 321 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
315 | .nla_policy = nf_ct_port_nla_policy, | 322 | .nla_policy = nf_ct_port_nla_policy, |
316 | #endif | 323 | #endif |
317 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 324 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
318 | .ctnl_timeout = { | 325 | .ctnl_timeout = { |
319 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, | 326 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, |
320 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, | 327 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, |
321 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, | 328 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, |
322 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, | 329 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, |
323 | .nla_policy = udp_timeout_nla_policy, | 330 | .nla_policy = udp_timeout_nla_policy, |
324 | }, | 331 | }, |
325 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 332 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
326 | .init_net = udp_init_net, | 333 | .init_net = udp_init_net, |
327 | .get_net_proto = udp_get_net_proto, | 334 | .get_net_proto = udp_get_net_proto, |
328 | }; | 335 | }; |
329 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); | 336 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); |
330 | #endif | 337 | #endif |
331 | 338 | ||
332 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = | 339 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = |
333 | { | 340 | { |
334 | .l3proto = PF_INET6, | 341 | .l3proto = PF_INET6, |
335 | .l4proto = IPPROTO_UDP, | 342 | .l4proto = IPPROTO_UDP, |
336 | .allow_clash = true, | 343 | .allow_clash = true, |
337 | .packet = udp_packet, | 344 | .packet = udp_packet, |
338 | .get_timeouts = udp_get_timeouts, | ||
339 | .new = udp_new, | 345 | .new = udp_new, |
340 | .error = udp_error, | 346 | .error = udp_error, |
341 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 347 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
342 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 348 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
343 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 349 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
344 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 350 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
345 | .nla_policy = nf_ct_port_nla_policy, | 351 | .nla_policy = nf_ct_port_nla_policy, |
346 | #endif | 352 | #endif |
347 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 353 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
348 | .ctnl_timeout = { | 354 | .ctnl_timeout = { |
349 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, | 355 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, |
350 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, | 356 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, |
351 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, | 357 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, |
352 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, | 358 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, |
353 | .nla_policy = udp_timeout_nla_policy, | 359 | .nla_policy = udp_timeout_nla_policy, |
354 | }, | 360 | }, |
355 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 361 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
356 | .init_net = udp_init_net, | 362 | .init_net = udp_init_net, |
357 | .get_net_proto = udp_get_net_proto, | 363 | .get_net_proto = udp_get_net_proto, |
358 | }; | 364 | }; |
359 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); | 365 | EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); |
360 | 366 | ||
361 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE | 367 | #ifdef CONFIG_NF_CT_PROTO_UDPLITE |
362 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = | 368 | const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = |
363 | { | 369 | { |
364 | .l3proto = PF_INET6, | 370 | .l3proto = PF_INET6, |
365 | .l4proto = IPPROTO_UDPLITE, | 371 | .l4proto = IPPROTO_UDPLITE, |
366 | .allow_clash = true, | 372 | .allow_clash = true, |
367 | .packet = udp_packet, | 373 | .packet = udp_packet, |
368 | .get_timeouts = udp_get_timeouts, | ||
369 | .new = udp_new, | 374 | .new = udp_new, |
370 | .error = udplite_error, | 375 | .error = udplite_error, |
371 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) | 376 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK) |
372 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | 377 | .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, |
373 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | 378 | .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, |
374 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | 379 | .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, |
375 | .nla_policy = nf_ct_port_nla_policy, | 380 | .nla_policy = nf_ct_port_nla_policy, |
376 | #endif | 381 | #endif |
377 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | 382 | #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) |
378 | .ctnl_timeout = { | 383 | .ctnl_timeout = { |
379 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, | 384 | .nlattr_to_obj = udp_timeout_nlattr_to_obj, |
380 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, | 385 | .obj_to_nlattr = udp_timeout_obj_to_nlattr, |
381 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, | 386 | .nlattr_max = CTA_TIMEOUT_UDP_MAX, |
382 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, | 387 | .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, |
383 | .nla_policy = udp_timeout_nla_policy, | 388 | .nla_policy = udp_timeout_nla_policy, |
384 | }, | 389 | }, |
385 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | 390 | #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ |
386 | .init_net = udp_init_net, | 391 | .init_net = udp_init_net, |
387 | .get_net_proto = udp_get_net_proto, | 392 | .get_net_proto = udp_get_net_proto, |
net/netfilter/nfnetlink_cttimeout.c
1 | /* | 1 | /* |
2 | * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> | 2 | * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> |
3 | * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> | 3 | * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License version 2 as | 6 | * it under the terms of the GNU General Public License version 2 as |
7 | * published by the Free Software Foundation (or any later at your option). | 7 | * published by the Free Software Foundation (or any later at your option). |
8 | */ | 8 | */ |
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/rculist.h> | 12 | #include <linux/rculist.h> |
13 | #include <linux/rculist_nulls.h> | 13 | #include <linux/rculist_nulls.h> |
14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
15 | #include <linux/timer.h> | 15 | #include <linux/timer.h> |
16 | #include <linux/security.h> | 16 | #include <linux/security.h> |
17 | #include <linux/skbuff.h> | 17 | #include <linux/skbuff.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | #include <linux/netlink.h> | 19 | #include <linux/netlink.h> |
20 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
21 | #include <linux/interrupt.h> | 21 | #include <linux/interrupt.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | 23 | ||
24 | #include <linux/netfilter.h> | 24 | #include <linux/netfilter.h> |
25 | #include <net/netlink.h> | 25 | #include <net/netlink.h> |
26 | #include <net/sock.h> | 26 | #include <net/sock.h> |
27 | #include <net/netfilter/nf_conntrack.h> | 27 | #include <net/netfilter/nf_conntrack.h> |
28 | #include <net/netfilter/nf_conntrack_core.h> | 28 | #include <net/netfilter/nf_conntrack_core.h> |
29 | #include <net/netfilter/nf_conntrack_l4proto.h> | 29 | #include <net/netfilter/nf_conntrack_l4proto.h> |
30 | #include <net/netfilter/nf_conntrack_tuple.h> | 30 | #include <net/netfilter/nf_conntrack_tuple.h> |
31 | #include <net/netfilter/nf_conntrack_timeout.h> | 31 | #include <net/netfilter/nf_conntrack_timeout.h> |
32 | 32 | ||
33 | #include <linux/netfilter/nfnetlink.h> | 33 | #include <linux/netfilter/nfnetlink.h> |
34 | #include <linux/netfilter/nfnetlink_cttimeout.h> | 34 | #include <linux/netfilter/nfnetlink_cttimeout.h> |
35 | 35 | ||
36 | MODULE_LICENSE("GPL"); | 36 | MODULE_LICENSE("GPL"); |
37 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); | 37 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |
38 | MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); | 38 | MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); |
39 | 39 | ||
40 | static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { | 40 | static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { |
41 | [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, | 41 | [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, |
42 | .len = CTNL_TIMEOUT_NAME_MAX - 1}, | 42 | .len = CTNL_TIMEOUT_NAME_MAX - 1}, |
43 | [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, | 43 | [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, |
44 | [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, | 44 | [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, |
45 | [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, | 45 | [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, |
46 | }; | 46 | }; |
47 | 47 | ||
48 | static int | 48 | static int |
49 | ctnl_timeout_parse_policy(void *timeouts, | 49 | ctnl_timeout_parse_policy(void *timeout, |
50 | const struct nf_conntrack_l4proto *l4proto, | 50 | const struct nf_conntrack_l4proto *l4proto, |
51 | struct net *net, const struct nlattr *attr) | 51 | struct net *net, const struct nlattr *attr) |
52 | { | 52 | { |
53 | struct nlattr **tb; | 53 | struct nlattr **tb; |
54 | int ret = 0; | 54 | int ret = 0; |
55 | 55 | ||
56 | if (!l4proto->ctnl_timeout.nlattr_to_obj) | 56 | if (!l4proto->ctnl_timeout.nlattr_to_obj) |
57 | return 0; | 57 | return 0; |
58 | 58 | ||
59 | tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), | 59 | tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), |
60 | GFP_KERNEL); | 60 | GFP_KERNEL); |
61 | 61 | ||
62 | if (!tb) | 62 | if (!tb) |
63 | return -ENOMEM; | 63 | return -ENOMEM; |
64 | 64 | ||
65 | ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, | 65 | ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, |
66 | l4proto->ctnl_timeout.nla_policy, NULL); | 66 | l4proto->ctnl_timeout.nla_policy, NULL); |
67 | if (ret < 0) | 67 | if (ret < 0) |
68 | goto err; | 68 | goto err; |
69 | 69 | ||
70 | ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); | 70 | ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout); |
71 | 71 | ||
72 | err: | 72 | err: |
73 | kfree(tb); | 73 | kfree(tb); |
74 | return ret; | 74 | return ret; |
75 | } | 75 | } |
76 | 76 | ||
77 | static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, | 77 | static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, |
78 | struct sk_buff *skb, | 78 | struct sk_buff *skb, |
79 | const struct nlmsghdr *nlh, | 79 | const struct nlmsghdr *nlh, |
80 | const struct nlattr * const cda[], | 80 | const struct nlattr * const cda[], |
81 | struct netlink_ext_ack *extack) | 81 | struct netlink_ext_ack *extack) |
82 | { | 82 | { |
83 | __u16 l3num; | 83 | __u16 l3num; |
84 | __u8 l4num; | 84 | __u8 l4num; |
85 | const struct nf_conntrack_l4proto *l4proto; | 85 | const struct nf_conntrack_l4proto *l4proto; |
86 | struct ctnl_timeout *timeout, *matching = NULL; | 86 | struct ctnl_timeout *timeout, *matching = NULL; |
87 | char *name; | 87 | char *name; |
88 | int ret; | 88 | int ret; |
89 | 89 | ||
90 | if (!cda[CTA_TIMEOUT_NAME] || | 90 | if (!cda[CTA_TIMEOUT_NAME] || |
91 | !cda[CTA_TIMEOUT_L3PROTO] || | 91 | !cda[CTA_TIMEOUT_L3PROTO] || |
92 | !cda[CTA_TIMEOUT_L4PROTO] || | 92 | !cda[CTA_TIMEOUT_L4PROTO] || |
93 | !cda[CTA_TIMEOUT_DATA]) | 93 | !cda[CTA_TIMEOUT_DATA]) |
94 | return -EINVAL; | 94 | return -EINVAL; |
95 | 95 | ||
96 | name = nla_data(cda[CTA_TIMEOUT_NAME]); | 96 | name = nla_data(cda[CTA_TIMEOUT_NAME]); |
97 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); | 97 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); |
98 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); | 98 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); |
99 | 99 | ||
100 | list_for_each_entry(timeout, &net->nfct_timeout_list, head) { | 100 | list_for_each_entry(timeout, &net->nfct_timeout_list, head) { |
101 | if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) | 101 | if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) |
102 | continue; | 102 | continue; |
103 | 103 | ||
104 | if (nlh->nlmsg_flags & NLM_F_EXCL) | 104 | if (nlh->nlmsg_flags & NLM_F_EXCL) |
105 | return -EEXIST; | 105 | return -EEXIST; |
106 | 106 | ||
107 | matching = timeout; | 107 | matching = timeout; |
108 | break; | 108 | break; |
109 | } | 109 | } |
110 | 110 | ||
111 | if (matching) { | 111 | if (matching) { |
112 | if (nlh->nlmsg_flags & NLM_F_REPLACE) { | 112 | if (nlh->nlmsg_flags & NLM_F_REPLACE) { |
113 | /* You cannot replace one timeout policy by another of | 113 | /* You cannot replace one timeout policy by another of |
114 | * different kind, sorry. | 114 | * different kind, sorry. |
115 | */ | 115 | */ |
116 | if (matching->l3num != l3num || | 116 | if (matching->l3num != l3num || |
117 | matching->l4proto->l4proto != l4num) | 117 | matching->l4proto->l4proto != l4num) |
118 | return -EINVAL; | 118 | return -EINVAL; |
119 | 119 | ||
120 | return ctnl_timeout_parse_policy(&matching->data, | 120 | return ctnl_timeout_parse_policy(&matching->data, |
121 | matching->l4proto, net, | 121 | matching->l4proto, net, |
122 | cda[CTA_TIMEOUT_DATA]); | 122 | cda[CTA_TIMEOUT_DATA]); |
123 | } | 123 | } |
124 | 124 | ||
125 | return -EBUSY; | 125 | return -EBUSY; |
126 | } | 126 | } |
127 | 127 | ||
128 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); | 128 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); |
129 | 129 | ||
130 | /* This protocol is not supportted, skip. */ | 130 | /* This protocol is not supportted, skip. */ |
131 | if (l4proto->l4proto != l4num) { | 131 | if (l4proto->l4proto != l4num) { |
132 | ret = -EOPNOTSUPP; | 132 | ret = -EOPNOTSUPP; |
133 | goto err_proto_put; | 133 | goto err_proto_put; |
134 | } | 134 | } |
135 | 135 | ||
136 | timeout = kzalloc(sizeof(struct ctnl_timeout) + | 136 | timeout = kzalloc(sizeof(struct ctnl_timeout) + |
137 | l4proto->ctnl_timeout.obj_size, GFP_KERNEL); | 137 | l4proto->ctnl_timeout.obj_size, GFP_KERNEL); |
138 | if (timeout == NULL) { | 138 | if (timeout == NULL) { |
139 | ret = -ENOMEM; | 139 | ret = -ENOMEM; |
140 | goto err_proto_put; | 140 | goto err_proto_put; |
141 | } | 141 | } |
142 | 142 | ||
143 | ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, | 143 | ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, |
144 | cda[CTA_TIMEOUT_DATA]); | 144 | cda[CTA_TIMEOUT_DATA]); |
145 | if (ret < 0) | 145 | if (ret < 0) |
146 | goto err; | 146 | goto err; |
147 | 147 | ||
148 | strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); | 148 | strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); |
149 | timeout->l3num = l3num; | 149 | timeout->l3num = l3num; |
150 | timeout->l4proto = l4proto; | 150 | timeout->l4proto = l4proto; |
151 | refcount_set(&timeout->refcnt, 1); | 151 | refcount_set(&timeout->refcnt, 1); |
152 | list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); | 152 | list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); |
153 | 153 | ||
154 | return 0; | 154 | return 0; |
155 | err: | 155 | err: |
156 | kfree(timeout); | 156 | kfree(timeout); |
157 | err_proto_put: | 157 | err_proto_put: |
158 | nf_ct_l4proto_put(l4proto); | 158 | nf_ct_l4proto_put(l4proto); |
159 | return ret; | 159 | return ret; |
160 | } | 160 | } |
161 | 161 | ||
162 | static int | 162 | static int |
163 | ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, | 163 | ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, |
164 | int event, struct ctnl_timeout *timeout) | 164 | int event, struct ctnl_timeout *timeout) |
165 | { | 165 | { |
166 | struct nlmsghdr *nlh; | 166 | struct nlmsghdr *nlh; |
167 | struct nfgenmsg *nfmsg; | 167 | struct nfgenmsg *nfmsg; |
168 | unsigned int flags = portid ? NLM_F_MULTI : 0; | 168 | unsigned int flags = portid ? NLM_F_MULTI : 0; |
169 | const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; | 169 | const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; |
170 | 170 | ||
171 | event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); | 171 | event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); |
172 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); | 172 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); |
173 | if (nlh == NULL) | 173 | if (nlh == NULL) |
174 | goto nlmsg_failure; | 174 | goto nlmsg_failure; |
175 | 175 | ||
176 | nfmsg = nlmsg_data(nlh); | 176 | nfmsg = nlmsg_data(nlh); |
177 | nfmsg->nfgen_family = AF_UNSPEC; | 177 | nfmsg->nfgen_family = AF_UNSPEC; |
178 | nfmsg->version = NFNETLINK_V0; | 178 | nfmsg->version = NFNETLINK_V0; |
179 | nfmsg->res_id = 0; | 179 | nfmsg->res_id = 0; |
180 | 180 | ||
181 | if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || | 181 | if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || |
182 | nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || | 182 | nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || |
183 | nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || | 183 | nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || |
184 | nla_put_be32(skb, CTA_TIMEOUT_USE, | 184 | nla_put_be32(skb, CTA_TIMEOUT_USE, |
185 | htonl(refcount_read(&timeout->refcnt)))) | 185 | htonl(refcount_read(&timeout->refcnt)))) |
186 | goto nla_put_failure; | 186 | goto nla_put_failure; |
187 | 187 | ||
188 | if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { | 188 | if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { |
189 | struct nlattr *nest_parms; | 189 | struct nlattr *nest_parms; |
190 | int ret; | 190 | int ret; |
191 | 191 | ||
192 | nest_parms = nla_nest_start(skb, | 192 | nest_parms = nla_nest_start(skb, |
193 | CTA_TIMEOUT_DATA | NLA_F_NESTED); | 193 | CTA_TIMEOUT_DATA | NLA_F_NESTED); |
194 | if (!nest_parms) | 194 | if (!nest_parms) |
195 | goto nla_put_failure; | 195 | goto nla_put_failure; |
196 | 196 | ||
197 | ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); | 197 | ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); |
198 | if (ret < 0) | 198 | if (ret < 0) |
199 | goto nla_put_failure; | 199 | goto nla_put_failure; |
200 | 200 | ||
201 | nla_nest_end(skb, nest_parms); | 201 | nla_nest_end(skb, nest_parms); |
202 | } | 202 | } |
203 | 203 | ||
204 | nlmsg_end(skb, nlh); | 204 | nlmsg_end(skb, nlh); |
205 | return skb->len; | 205 | return skb->len; |
206 | 206 | ||
207 | nlmsg_failure: | 207 | nlmsg_failure: |
208 | nla_put_failure: | 208 | nla_put_failure: |
209 | nlmsg_cancel(skb, nlh); | 209 | nlmsg_cancel(skb, nlh); |
210 | return -1; | 210 | return -1; |
211 | } | 211 | } |
212 | 212 | ||
213 | static int | 213 | static int |
214 | ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) | 214 | ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) |
215 | { | 215 | { |
216 | struct net *net = sock_net(skb->sk); | 216 | struct net *net = sock_net(skb->sk); |
217 | struct ctnl_timeout *cur, *last; | 217 | struct ctnl_timeout *cur, *last; |
218 | 218 | ||
219 | if (cb->args[2]) | 219 | if (cb->args[2]) |
220 | return 0; | 220 | return 0; |
221 | 221 | ||
222 | last = (struct ctnl_timeout *)cb->args[1]; | 222 | last = (struct ctnl_timeout *)cb->args[1]; |
223 | if (cb->args[1]) | 223 | if (cb->args[1]) |
224 | cb->args[1] = 0; | 224 | cb->args[1] = 0; |
225 | 225 | ||
226 | rcu_read_lock(); | 226 | rcu_read_lock(); |
227 | list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { | 227 | list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { |
228 | if (last) { | 228 | if (last) { |
229 | if (cur != last) | 229 | if (cur != last) |
230 | continue; | 230 | continue; |
231 | 231 | ||
232 | last = NULL; | 232 | last = NULL; |
233 | } | 233 | } |
234 | if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, | 234 | if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, |
235 | cb->nlh->nlmsg_seq, | 235 | cb->nlh->nlmsg_seq, |
236 | NFNL_MSG_TYPE(cb->nlh->nlmsg_type), | 236 | NFNL_MSG_TYPE(cb->nlh->nlmsg_type), |
237 | IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { | 237 | IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { |
238 | cb->args[1] = (unsigned long)cur; | 238 | cb->args[1] = (unsigned long)cur; |
239 | break; | 239 | break; |
240 | } | 240 | } |
241 | } | 241 | } |
242 | if (!cb->args[1]) | 242 | if (!cb->args[1]) |
243 | cb->args[2] = 1; | 243 | cb->args[2] = 1; |
244 | rcu_read_unlock(); | 244 | rcu_read_unlock(); |
245 | return skb->len; | 245 | return skb->len; |
246 | } | 246 | } |
247 | 247 | ||
248 | static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, | 248 | static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, |
249 | struct sk_buff *skb, | 249 | struct sk_buff *skb, |
250 | const struct nlmsghdr *nlh, | 250 | const struct nlmsghdr *nlh, |
251 | const struct nlattr * const cda[], | 251 | const struct nlattr * const cda[], |
252 | struct netlink_ext_ack *extack) | 252 | struct netlink_ext_ack *extack) |
253 | { | 253 | { |
254 | int ret = -ENOENT; | 254 | int ret = -ENOENT; |
255 | char *name; | 255 | char *name; |
256 | struct ctnl_timeout *cur; | 256 | struct ctnl_timeout *cur; |
257 | 257 | ||
258 | if (nlh->nlmsg_flags & NLM_F_DUMP) { | 258 | if (nlh->nlmsg_flags & NLM_F_DUMP) { |
259 | struct netlink_dump_control c = { | 259 | struct netlink_dump_control c = { |
260 | .dump = ctnl_timeout_dump, | 260 | .dump = ctnl_timeout_dump, |
261 | }; | 261 | }; |
262 | return netlink_dump_start(ctnl, skb, nlh, &c); | 262 | return netlink_dump_start(ctnl, skb, nlh, &c); |
263 | } | 263 | } |
264 | 264 | ||
265 | if (!cda[CTA_TIMEOUT_NAME]) | 265 | if (!cda[CTA_TIMEOUT_NAME]) |
266 | return -EINVAL; | 266 | return -EINVAL; |
267 | name = nla_data(cda[CTA_TIMEOUT_NAME]); | 267 | name = nla_data(cda[CTA_TIMEOUT_NAME]); |
268 | 268 | ||
269 | list_for_each_entry(cur, &net->nfct_timeout_list, head) { | 269 | list_for_each_entry(cur, &net->nfct_timeout_list, head) { |
270 | struct sk_buff *skb2; | 270 | struct sk_buff *skb2; |
271 | 271 | ||
272 | if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) | 272 | if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) |
273 | continue; | 273 | continue; |
274 | 274 | ||
275 | skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | 275 | skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); |
276 | if (skb2 == NULL) { | 276 | if (skb2 == NULL) { |
277 | ret = -ENOMEM; | 277 | ret = -ENOMEM; |
278 | break; | 278 | break; |
279 | } | 279 | } |
280 | 280 | ||
281 | ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, | 281 | ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, |
282 | nlh->nlmsg_seq, | 282 | nlh->nlmsg_seq, |
283 | NFNL_MSG_TYPE(nlh->nlmsg_type), | 283 | NFNL_MSG_TYPE(nlh->nlmsg_type), |
284 | IPCTNL_MSG_TIMEOUT_NEW, cur); | 284 | IPCTNL_MSG_TIMEOUT_NEW, cur); |
285 | if (ret <= 0) { | 285 | if (ret <= 0) { |
286 | kfree_skb(skb2); | 286 | kfree_skb(skb2); |
287 | break; | 287 | break; |
288 | } | 288 | } |
289 | ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, | 289 | ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, |
290 | MSG_DONTWAIT); | 290 | MSG_DONTWAIT); |
291 | if (ret > 0) | 291 | if (ret > 0) |
292 | ret = 0; | 292 | ret = 0; |
293 | 293 | ||
294 | /* this avoids a loop in nfnetlink. */ | 294 | /* this avoids a loop in nfnetlink. */ |
295 | return ret == -EAGAIN ? -ENOBUFS : ret; | 295 | return ret == -EAGAIN ? -ENOBUFS : ret; |
296 | } | 296 | } |
297 | return ret; | 297 | return ret; |
298 | } | 298 | } |
299 | 299 | ||
300 | static int untimeout(struct nf_conn *ct, void *timeout) | 300 | static int untimeout(struct nf_conn *ct, void *timeout) |
301 | { | 301 | { |
302 | struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); | 302 | struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); |
303 | 303 | ||
304 | if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) | 304 | if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) |
305 | RCU_INIT_POINTER(timeout_ext->timeout, NULL); | 305 | RCU_INIT_POINTER(timeout_ext->timeout, NULL); |
306 | 306 | ||
307 | /* We are not intended to delete this conntrack. */ | 307 | /* We are not intended to delete this conntrack. */ |
308 | return 0; | 308 | return 0; |
309 | } | 309 | } |
310 | 310 | ||
311 | static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) | 311 | static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) |
312 | { | 312 | { |
313 | nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); | 313 | nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); |
314 | } | 314 | } |
315 | 315 | ||
316 | /* try to delete object, fail if it is still in use. */ | 316 | /* try to delete object, fail if it is still in use. */ |
317 | static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) | 317 | static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) |
318 | { | 318 | { |
319 | int ret = 0; | 319 | int ret = 0; |
320 | 320 | ||
321 | /* We want to avoid races with ctnl_timeout_put. So only when the | 321 | /* We want to avoid races with ctnl_timeout_put. So only when the |
322 | * current refcnt is 1, we decrease it to 0. | 322 | * current refcnt is 1, we decrease it to 0. |
323 | */ | 323 | */ |
324 | if (refcount_dec_if_one(&timeout->refcnt)) { | 324 | if (refcount_dec_if_one(&timeout->refcnt)) { |
325 | /* We are protected by nfnl mutex. */ | 325 | /* We are protected by nfnl mutex. */ |
326 | list_del_rcu(&timeout->head); | 326 | list_del_rcu(&timeout->head); |
327 | nf_ct_l4proto_put(timeout->l4proto); | 327 | nf_ct_l4proto_put(timeout->l4proto); |
328 | ctnl_untimeout(net, timeout); | 328 | ctnl_untimeout(net, timeout); |
329 | kfree_rcu(timeout, rcu_head); | 329 | kfree_rcu(timeout, rcu_head); |
330 | } else { | 330 | } else { |
331 | ret = -EBUSY; | 331 | ret = -EBUSY; |
332 | } | 332 | } |
333 | return ret; | 333 | return ret; |
334 | } | 334 | } |
335 | 335 | ||
336 | static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, | 336 | static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, |
337 | struct sk_buff *skb, | 337 | struct sk_buff *skb, |
338 | const struct nlmsghdr *nlh, | 338 | const struct nlmsghdr *nlh, |
339 | const struct nlattr * const cda[], | 339 | const struct nlattr * const cda[], |
340 | struct netlink_ext_ack *extack) | 340 | struct netlink_ext_ack *extack) |
341 | { | 341 | { |
342 | struct ctnl_timeout *cur, *tmp; | 342 | struct ctnl_timeout *cur, *tmp; |
343 | int ret = -ENOENT; | 343 | int ret = -ENOENT; |
344 | char *name; | 344 | char *name; |
345 | 345 | ||
346 | if (!cda[CTA_TIMEOUT_NAME]) { | 346 | if (!cda[CTA_TIMEOUT_NAME]) { |
347 | list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, | 347 | list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, |
348 | head) | 348 | head) |
349 | ctnl_timeout_try_del(net, cur); | 349 | ctnl_timeout_try_del(net, cur); |
350 | 350 | ||
351 | return 0; | 351 | return 0; |
352 | } | 352 | } |
353 | name = nla_data(cda[CTA_TIMEOUT_NAME]); | 353 | name = nla_data(cda[CTA_TIMEOUT_NAME]); |
354 | 354 | ||
355 | list_for_each_entry(cur, &net->nfct_timeout_list, head) { | 355 | list_for_each_entry(cur, &net->nfct_timeout_list, head) { |
356 | if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) | 356 | if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) |
357 | continue; | 357 | continue; |
358 | 358 | ||
359 | ret = ctnl_timeout_try_del(net, cur); | 359 | ret = ctnl_timeout_try_del(net, cur); |
360 | if (ret < 0) | 360 | if (ret < 0) |
361 | return ret; | 361 | return ret; |
362 | 362 | ||
363 | break; | 363 | break; |
364 | } | 364 | } |
365 | return ret; | 365 | return ret; |
366 | } | 366 | } |
367 | 367 | ||
368 | static int cttimeout_default_set(struct net *net, struct sock *ctnl, | 368 | static int cttimeout_default_set(struct net *net, struct sock *ctnl, |
369 | struct sk_buff *skb, | 369 | struct sk_buff *skb, |
370 | const struct nlmsghdr *nlh, | 370 | const struct nlmsghdr *nlh, |
371 | const struct nlattr * const cda[], | 371 | const struct nlattr * const cda[], |
372 | struct netlink_ext_ack *extack) | 372 | struct netlink_ext_ack *extack) |
373 | { | 373 | { |
374 | const struct nf_conntrack_l4proto *l4proto; | 374 | const struct nf_conntrack_l4proto *l4proto; |
375 | unsigned int *timeouts; | ||
376 | __u16 l3num; | 375 | __u16 l3num; |
377 | __u8 l4num; | 376 | __u8 l4num; |
378 | int ret; | 377 | int ret; |
379 | 378 | ||
380 | if (!cda[CTA_TIMEOUT_L3PROTO] || | 379 | if (!cda[CTA_TIMEOUT_L3PROTO] || |
381 | !cda[CTA_TIMEOUT_L4PROTO] || | 380 | !cda[CTA_TIMEOUT_L4PROTO] || |
382 | !cda[CTA_TIMEOUT_DATA]) | 381 | !cda[CTA_TIMEOUT_DATA]) |
383 | return -EINVAL; | 382 | return -EINVAL; |
384 | 383 | ||
385 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); | 384 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); |
386 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); | 385 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); |
387 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); | 386 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); |
388 | 387 | ||
389 | /* This protocol is not supported, skip. */ | 388 | /* This protocol is not supported, skip. */ |
390 | if (l4proto->l4proto != l4num) { | 389 | if (l4proto->l4proto != l4num) { |
391 | ret = -EOPNOTSUPP; | 390 | ret = -EOPNOTSUPP; |
392 | goto err; | 391 | goto err; |
393 | } | 392 | } |
394 | 393 | ||
395 | timeouts = l4proto->get_timeouts(net); | 394 | ret = ctnl_timeout_parse_policy(NULL, l4proto, net, |
396 | |||
397 | ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, | ||
398 | cda[CTA_TIMEOUT_DATA]); | 395 | cda[CTA_TIMEOUT_DATA]); |
399 | if (ret < 0) | 396 | if (ret < 0) |
400 | goto err; | 397 | goto err; |
401 | 398 | ||
402 | nf_ct_l4proto_put(l4proto); | 399 | nf_ct_l4proto_put(l4proto); |
403 | return 0; | 400 | return 0; |
404 | err: | 401 | err: |
405 | nf_ct_l4proto_put(l4proto); | 402 | nf_ct_l4proto_put(l4proto); |
406 | return ret; | 403 | return ret; |
407 | } | 404 | } |
408 | 405 | ||
409 | static int | 406 | static int |
410 | cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, | 407 | cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, |
411 | u32 seq, u32 type, int event, | 408 | u32 seq, u32 type, int event, |
412 | const struct nf_conntrack_l4proto *l4proto) | 409 | const struct nf_conntrack_l4proto *l4proto) |
413 | { | 410 | { |
414 | struct nlmsghdr *nlh; | 411 | struct nlmsghdr *nlh; |
415 | struct nfgenmsg *nfmsg; | 412 | struct nfgenmsg *nfmsg; |
416 | unsigned int flags = portid ? NLM_F_MULTI : 0; | 413 | unsigned int flags = portid ? NLM_F_MULTI : 0; |
417 | 414 | ||
418 | event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); | 415 | event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); |
419 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); | 416 | nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); |
420 | if (nlh == NULL) | 417 | if (nlh == NULL) |
421 | goto nlmsg_failure; | 418 | goto nlmsg_failure; |
422 | 419 | ||
423 | nfmsg = nlmsg_data(nlh); | 420 | nfmsg = nlmsg_data(nlh); |
424 | nfmsg->nfgen_family = AF_UNSPEC; | 421 | nfmsg->nfgen_family = AF_UNSPEC; |
425 | nfmsg->version = NFNETLINK_V0; | 422 | nfmsg->version = NFNETLINK_V0; |
426 | nfmsg->res_id = 0; | 423 | nfmsg->res_id = 0; |
427 | 424 | ||
428 | if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || | 425 | if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || |
429 | nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) | 426 | nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) |
430 | goto nla_put_failure; | 427 | goto nla_put_failure; |
431 | 428 | ||
432 | if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { | 429 | if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { |
433 | struct nlattr *nest_parms; | 430 | struct nlattr *nest_parms; |
434 | unsigned int *timeouts = l4proto->get_timeouts(net); | ||
435 | int ret; | 431 | int ret; |
436 | 432 | ||
437 | nest_parms = nla_nest_start(skb, | 433 | nest_parms = nla_nest_start(skb, |
438 | CTA_TIMEOUT_DATA | NLA_F_NESTED); | 434 | CTA_TIMEOUT_DATA | NLA_F_NESTED); |
439 | if (!nest_parms) | 435 | if (!nest_parms) |
440 | goto nla_put_failure; | 436 | goto nla_put_failure; |
441 | 437 | ||
442 | ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); | 438 | ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL); |
443 | if (ret < 0) | 439 | if (ret < 0) |
444 | goto nla_put_failure; | 440 | goto nla_put_failure; |
445 | 441 | ||
446 | nla_nest_end(skb, nest_parms); | 442 | nla_nest_end(skb, nest_parms); |
447 | } | 443 | } |
448 | 444 | ||
449 | nlmsg_end(skb, nlh); | 445 | nlmsg_end(skb, nlh); |
450 | return skb->len; | 446 | return skb->len; |
451 | 447 | ||
452 | nlmsg_failure: | 448 | nlmsg_failure: |
453 | nla_put_failure: | 449 | nla_put_failure: |
454 | nlmsg_cancel(skb, nlh); | 450 | nlmsg_cancel(skb, nlh); |
455 | return -1; | 451 | return -1; |
456 | } | 452 | } |
457 | 453 | ||
458 | static int cttimeout_default_get(struct net *net, struct sock *ctnl, | 454 | static int cttimeout_default_get(struct net *net, struct sock *ctnl, |
459 | struct sk_buff *skb, | 455 | struct sk_buff *skb, |
460 | const struct nlmsghdr *nlh, | 456 | const struct nlmsghdr *nlh, |
461 | const struct nlattr * const cda[], | 457 | const struct nlattr * const cda[], |
462 | struct netlink_ext_ack *extack) | 458 | struct netlink_ext_ack *extack) |
463 | { | 459 | { |
464 | const struct nf_conntrack_l4proto *l4proto; | 460 | const struct nf_conntrack_l4proto *l4proto; |
465 | struct sk_buff *skb2; | 461 | struct sk_buff *skb2; |
466 | int ret, err; | 462 | int ret, err; |
467 | __u16 l3num; | 463 | __u16 l3num; |
468 | __u8 l4num; | 464 | __u8 l4num; |
469 | 465 | ||
470 | if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) | 466 | if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) |
471 | return -EINVAL; | 467 | return -EINVAL; |
472 | 468 | ||
473 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); | 469 | l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); |
474 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); | 470 | l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); |
475 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); | 471 | l4proto = nf_ct_l4proto_find_get(l3num, l4num); |
476 | 472 | ||
477 | /* This protocol is not supported, skip. */ | 473 | /* This protocol is not supported, skip. */ |
478 | if (l4proto->l4proto != l4num) { | 474 | if (l4proto->l4proto != l4num) { |
479 | err = -EOPNOTSUPP; | 475 | err = -EOPNOTSUPP; |
480 | goto err; | 476 | goto err; |
481 | } | 477 | } |
482 | 478 | ||
483 | skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | 479 | skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); |
484 | if (skb2 == NULL) { | 480 | if (skb2 == NULL) { |
485 | err = -ENOMEM; | 481 | err = -ENOMEM; |
486 | goto err; | 482 | goto err; |
487 | } | 483 | } |
488 | 484 | ||
489 | ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, | 485 | ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, |
490 | nlh->nlmsg_seq, | 486 | nlh->nlmsg_seq, |
491 | NFNL_MSG_TYPE(nlh->nlmsg_type), | 487 | NFNL_MSG_TYPE(nlh->nlmsg_type), |
492 | IPCTNL_MSG_TIMEOUT_DEFAULT_SET, | 488 | IPCTNL_MSG_TIMEOUT_DEFAULT_SET, |
493 | l4proto); | 489 | l4proto); |
494 | if (ret <= 0) { | 490 | if (ret <= 0) { |
495 | kfree_skb(skb2); | 491 | kfree_skb(skb2); |
496 | err = -ENOMEM; | 492 | err = -ENOMEM; |
497 | goto err; | 493 | goto err; |
498 | } | 494 | } |
499 | ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); | 495 | ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); |
500 | if (ret > 0) | 496 | if (ret > 0) |
501 | ret = 0; | 497 | ret = 0; |
502 | 498 | ||
503 | /* this avoids a loop in nfnetlink. */ | 499 | /* this avoids a loop in nfnetlink. */ |
504 | return ret == -EAGAIN ? -ENOBUFS : ret; | 500 | return ret == -EAGAIN ? -ENOBUFS : ret; |
505 | err: | 501 | err: |
506 | nf_ct_l4proto_put(l4proto); | 502 | nf_ct_l4proto_put(l4proto); |
507 | return err; | 503 | return err; |
508 | } | 504 | } |
509 | 505 | ||
510 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 506 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
511 | static struct ctnl_timeout * | 507 | static struct ctnl_timeout * |
512 | ctnl_timeout_find_get(struct net *net, const char *name) | 508 | ctnl_timeout_find_get(struct net *net, const char *name) |
513 | { | 509 | { |
514 | struct ctnl_timeout *timeout, *matching = NULL; | 510 | struct ctnl_timeout *timeout, *matching = NULL; |
515 | 511 | ||
516 | list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { | 512 | list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { |
517 | if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) | 513 | if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) |
518 | continue; | 514 | continue; |
519 | 515 | ||
520 | if (!try_module_get(THIS_MODULE)) | 516 | if (!try_module_get(THIS_MODULE)) |
521 | goto err; | 517 | goto err; |
522 | 518 | ||
523 | if (!refcount_inc_not_zero(&timeout->refcnt)) { | 519 | if (!refcount_inc_not_zero(&timeout->refcnt)) { |
524 | module_put(THIS_MODULE); | 520 | module_put(THIS_MODULE); |
525 | goto err; | 521 | goto err; |
526 | } | 522 | } |
527 | matching = timeout; | 523 | matching = timeout; |
528 | break; | 524 | break; |
529 | } | 525 | } |
530 | err: | 526 | err: |
531 | return matching; | 527 | return matching; |
532 | } | 528 | } |
533 | 529 | ||
534 | static void ctnl_timeout_put(struct ctnl_timeout *timeout) | 530 | static void ctnl_timeout_put(struct ctnl_timeout *timeout) |
535 | { | 531 | { |
536 | if (refcount_dec_and_test(&timeout->refcnt)) | 532 | if (refcount_dec_and_test(&timeout->refcnt)) |
537 | kfree_rcu(timeout, rcu_head); | 533 | kfree_rcu(timeout, rcu_head); |
538 | 534 | ||
539 | module_put(THIS_MODULE); | 535 | module_put(THIS_MODULE); |
540 | } | 536 | } |
541 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ | 537 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ |
542 | 538 | ||
543 | static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { | 539 | static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { |
544 | [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, | 540 | [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, |
545 | .attr_count = CTA_TIMEOUT_MAX, | 541 | .attr_count = CTA_TIMEOUT_MAX, |
546 | .policy = cttimeout_nla_policy }, | 542 | .policy = cttimeout_nla_policy }, |
547 | [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, | 543 | [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, |
548 | .attr_count = CTA_TIMEOUT_MAX, | 544 | .attr_count = CTA_TIMEOUT_MAX, |
549 | .policy = cttimeout_nla_policy }, | 545 | .policy = cttimeout_nla_policy }, |
550 | [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, | 546 | [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, |
551 | .attr_count = CTA_TIMEOUT_MAX, | 547 | .attr_count = CTA_TIMEOUT_MAX, |
552 | .policy = cttimeout_nla_policy }, | 548 | .policy = cttimeout_nla_policy }, |
553 | [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, | 549 | [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, |
554 | .attr_count = CTA_TIMEOUT_MAX, | 550 | .attr_count = CTA_TIMEOUT_MAX, |
555 | .policy = cttimeout_nla_policy }, | 551 | .policy = cttimeout_nla_policy }, |
556 | [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, | 552 | [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, |
557 | .attr_count = CTA_TIMEOUT_MAX, | 553 | .attr_count = CTA_TIMEOUT_MAX, |
558 | .policy = cttimeout_nla_policy }, | 554 | .policy = cttimeout_nla_policy }, |
559 | }; | 555 | }; |
560 | 556 | ||
561 | static const struct nfnetlink_subsystem cttimeout_subsys = { | 557 | static const struct nfnetlink_subsystem cttimeout_subsys = { |
562 | .name = "conntrack_timeout", | 558 | .name = "conntrack_timeout", |
563 | .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, | 559 | .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, |
564 | .cb_count = IPCTNL_MSG_TIMEOUT_MAX, | 560 | .cb_count = IPCTNL_MSG_TIMEOUT_MAX, |
565 | .cb = cttimeout_cb, | 561 | .cb = cttimeout_cb, |
566 | }; | 562 | }; |
567 | 563 | ||
568 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); | 564 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); |
569 | 565 | ||
570 | static int __net_init cttimeout_net_init(struct net *net) | 566 | static int __net_init cttimeout_net_init(struct net *net) |
571 | { | 567 | { |
572 | INIT_LIST_HEAD(&net->nfct_timeout_list); | 568 | INIT_LIST_HEAD(&net->nfct_timeout_list); |
573 | 569 | ||
574 | return 0; | 570 | return 0; |
575 | } | 571 | } |
576 | 572 | ||
577 | static void __net_exit cttimeout_net_exit(struct net *net) | 573 | static void __net_exit cttimeout_net_exit(struct net *net) |
578 | { | 574 | { |
579 | struct ctnl_timeout *cur, *tmp; | 575 | struct ctnl_timeout *cur, *tmp; |
580 | 576 | ||
581 | nf_ct_unconfirmed_destroy(net); | 577 | nf_ct_unconfirmed_destroy(net); |
582 | ctnl_untimeout(net, NULL); | 578 | ctnl_untimeout(net, NULL); |
583 | 579 | ||
584 | list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { | 580 | list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { |
585 | list_del_rcu(&cur->head); | 581 | list_del_rcu(&cur->head); |
586 | nf_ct_l4proto_put(cur->l4proto); | 582 | nf_ct_l4proto_put(cur->l4proto); |
587 | 583 | ||
588 | if (refcount_dec_and_test(&cur->refcnt)) | 584 | if (refcount_dec_and_test(&cur->refcnt)) |
589 | kfree_rcu(cur, rcu_head); | 585 | kfree_rcu(cur, rcu_head); |
590 | } | 586 | } |
591 | } | 587 | } |
592 | 588 | ||
593 | static struct pernet_operations cttimeout_ops = { | 589 | static struct pernet_operations cttimeout_ops = { |
594 | .init = cttimeout_net_init, | 590 | .init = cttimeout_net_init, |
595 | .exit = cttimeout_net_exit, | 591 | .exit = cttimeout_net_exit, |
596 | }; | 592 | }; |
597 | 593 | ||
598 | static int __init cttimeout_init(void) | 594 | static int __init cttimeout_init(void) |
599 | { | 595 | { |
600 | int ret; | 596 | int ret; |
601 | 597 | ||
602 | ret = register_pernet_subsys(&cttimeout_ops); | 598 | ret = register_pernet_subsys(&cttimeout_ops); |
603 | if (ret < 0) | 599 | if (ret < 0) |
604 | return ret; | 600 | return ret; |
605 | 601 | ||
606 | ret = nfnetlink_subsys_register(&cttimeout_subsys); | 602 | ret = nfnetlink_subsys_register(&cttimeout_subsys); |
607 | if (ret < 0) { | 603 | if (ret < 0) { |
608 | pr_err("cttimeout_init: cannot register cttimeout with " | 604 | pr_err("cttimeout_init: cannot register cttimeout with " |
609 | "nfnetlink.\n"); | 605 | "nfnetlink.\n"); |
610 | goto err_out; | 606 | goto err_out; |
611 | } | 607 | } |
612 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 608 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
613 | RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get); | 609 | RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get); |
614 | RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put); | 610 | RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put); |
615 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ | 611 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ |
616 | return 0; | 612 | return 0; |
617 | 613 | ||
618 | err_out: | 614 | err_out: |
619 | unregister_pernet_subsys(&cttimeout_ops); | 615 | unregister_pernet_subsys(&cttimeout_ops); |
620 | return ret; | 616 | return ret; |
621 | } | 617 | } |
622 | 618 | ||
623 | static void __exit cttimeout_exit(void) | 619 | static void __exit cttimeout_exit(void) |
624 | { | 620 | { |
625 | nfnetlink_subsys_unregister(&cttimeout_subsys); | 621 | nfnetlink_subsys_unregister(&cttimeout_subsys); |
626 | 622 | ||
627 | unregister_pernet_subsys(&cttimeout_ops); | 623 | unregister_pernet_subsys(&cttimeout_ops); |
628 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT | 624 | #ifdef CONFIG_NF_CONNTRACK_TIMEOUT |
629 | RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); | 625 | RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); |
630 | RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); | 626 | RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); |
631 | synchronize_rcu(); | 627 | synchronize_rcu(); |
632 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ | 628 | #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ |
633 | } | 629 | } |
634 | 630 | ||
635 | module_init(cttimeout_init); | 631 | module_init(cttimeout_init); |
636 | module_exit(cttimeout_exit); | 632 | module_exit(cttimeout_exit); |
637 | 633 |