Commit c779e849608a875448f6ffc2a5c2a15523bdcd00

Authored by Florian Westphal
Committed by Pablo Neira Ayuso
1 parent 97e08caec3

netfilter: conntrack: remove get_timeout() indirection

Not needed, we can have the l4trackers fetch it themselvs.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Showing 12 changed files with 94 additions and 104 deletions Inline Diff

include/net/netfilter/nf_conntrack_l4proto.h
1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* 2 /*
3 * Header for use in defining a given L4 protocol for connection tracking. 3 * Header for use in defining a given L4 protocol for connection tracking.
4 * 4 *
5 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 5 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
6 * - generalized L3 protocol dependent part. 6 * - generalized L3 protocol dependent part.
7 * 7 *
8 * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h 8 * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h
9 */ 9 */
10 10
11 #ifndef _NF_CONNTRACK_L4PROTO_H 11 #ifndef _NF_CONNTRACK_L4PROTO_H
12 #define _NF_CONNTRACK_L4PROTO_H 12 #define _NF_CONNTRACK_L4PROTO_H
13 #include <linux/netlink.h> 13 #include <linux/netlink.h>
14 #include <net/netlink.h> 14 #include <net/netlink.h>
15 #include <net/netfilter/nf_conntrack.h> 15 #include <net/netfilter/nf_conntrack.h>
16 #include <net/netns/generic.h> 16 #include <net/netns/generic.h>
17 17
18 struct seq_file; 18 struct seq_file;
19 19
20 struct nf_conntrack_l4proto { 20 struct nf_conntrack_l4proto {
21 /* L3 Protocol number. */ 21 /* L3 Protocol number. */
22 u_int16_t l3proto; 22 u_int16_t l3proto;
23 23
24 /* L4 Protocol number. */ 24 /* L4 Protocol number. */
25 u_int8_t l4proto; 25 u_int8_t l4proto;
26 26
27 /* Resolve clashes on insertion races. */ 27 /* Resolve clashes on insertion races. */
28 bool allow_clash; 28 bool allow_clash;
29 29
30 /* protoinfo nlattr size, closes a hole */ 30 /* protoinfo nlattr size, closes a hole */
31 u16 nlattr_size; 31 u16 nlattr_size;
32 32
33 /* Try to fill in the third arg: dataoff is offset past network protocol 33 /* Try to fill in the third arg: dataoff is offset past network protocol
34 hdr. Return true if possible. */ 34 hdr. Return true if possible. */
35 bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, 35 bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
36 struct net *net, struct nf_conntrack_tuple *tuple); 36 struct net *net, struct nf_conntrack_tuple *tuple);
37 37
38 /* Invert the per-proto part of the tuple: ie. turn xmit into reply. 38 /* Invert the per-proto part of the tuple: ie. turn xmit into reply.
39 * Only used by icmp, most protocols use a generic version. 39 * Only used by icmp, most protocols use a generic version.
40 */ 40 */
41 bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, 41 bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
42 const struct nf_conntrack_tuple *orig); 42 const struct nf_conntrack_tuple *orig);
43 43
44 /* Returns verdict for packet, or -1 for invalid. */ 44 /* Returns verdict for packet, or -1 for invalid. */
45 int (*packet)(struct nf_conn *ct, 45 int (*packet)(struct nf_conn *ct,
46 const struct sk_buff *skb, 46 const struct sk_buff *skb,
47 unsigned int dataoff, 47 unsigned int dataoff,
48 enum ip_conntrack_info ctinfo, 48 enum ip_conntrack_info ctinfo);
49 unsigned int *timeouts);
50 49
51 /* Called when a new connection for this protocol found; 50 /* Called when a new connection for this protocol found;
52 * returns TRUE if it's OK. If so, packet() called next. */ 51 * returns TRUE if it's OK. If so, packet() called next. */
53 bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, 52 bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
54 unsigned int dataoff, unsigned int *timeouts); 53 unsigned int dataoff);
55 54
56 /* Called when a conntrack entry is destroyed */ 55 /* Called when a conntrack entry is destroyed */
57 void (*destroy)(struct nf_conn *ct); 56 void (*destroy)(struct nf_conn *ct);
58 57
59 int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, 58 int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
60 unsigned int dataoff, 59 unsigned int dataoff,
61 u_int8_t pf, unsigned int hooknum); 60 u_int8_t pf, unsigned int hooknum);
62 61
63 /* called by gc worker if table is full */ 62 /* called by gc worker if table is full */
64 bool (*can_early_drop)(const struct nf_conn *ct); 63 bool (*can_early_drop)(const struct nf_conn *ct);
65
66 /* Return the array of timeouts for this protocol. */
67 unsigned int *(*get_timeouts)(struct net *net);
68 64
69 /* convert protoinfo to nfnetink attributes */ 65 /* convert protoinfo to nfnetink attributes */
70 int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, 66 int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
71 struct nf_conn *ct); 67 struct nf_conn *ct);
72 68
73 /* convert nfnetlink attributes to protoinfo */ 69 /* convert nfnetlink attributes to protoinfo */
74 int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct); 70 int (*from_nlattr)(struct nlattr *tb[], struct nf_conn *ct);
75 71
76 int (*tuple_to_nlattr)(struct sk_buff *skb, 72 int (*tuple_to_nlattr)(struct sk_buff *skb,
77 const struct nf_conntrack_tuple *t); 73 const struct nf_conntrack_tuple *t);
78 /* Calculate tuple nlattr size */ 74 /* Calculate tuple nlattr size */
79 unsigned int (*nlattr_tuple_size)(void); 75 unsigned int (*nlattr_tuple_size)(void);
80 int (*nlattr_to_tuple)(struct nlattr *tb[], 76 int (*nlattr_to_tuple)(struct nlattr *tb[],
81 struct nf_conntrack_tuple *t); 77 struct nf_conntrack_tuple *t);
82 const struct nla_policy *nla_policy; 78 const struct nla_policy *nla_policy;
83 79
84 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 80 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
85 struct { 81 struct {
86 int (*nlattr_to_obj)(struct nlattr *tb[], 82 int (*nlattr_to_obj)(struct nlattr *tb[],
87 struct net *net, void *data); 83 struct net *net, void *data);
88 int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); 84 int (*obj_to_nlattr)(struct sk_buff *skb, const void *data);
89 85
90 u16 obj_size; 86 u16 obj_size;
91 u16 nlattr_max; 87 u16 nlattr_max;
92 const struct nla_policy *nla_policy; 88 const struct nla_policy *nla_policy;
93 } ctnl_timeout; 89 } ctnl_timeout;
94 #endif 90 #endif
95 #ifdef CONFIG_NF_CONNTRACK_PROCFS 91 #ifdef CONFIG_NF_CONNTRACK_PROCFS
96 /* Print out the private part of the conntrack. */ 92 /* Print out the private part of the conntrack. */
97 void (*print_conntrack)(struct seq_file *s, struct nf_conn *); 93 void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
98 #endif 94 #endif
99 unsigned int *net_id; 95 unsigned int *net_id;
100 /* Init l4proto pernet data */ 96 /* Init l4proto pernet data */
101 int (*init_net)(struct net *net, u_int16_t proto); 97 int (*init_net)(struct net *net, u_int16_t proto);
102 98
103 /* Return the per-net protocol part. */ 99 /* Return the per-net protocol part. */
104 struct nf_proto_net *(*get_net_proto)(struct net *net); 100 struct nf_proto_net *(*get_net_proto)(struct net *net);
105 101
106 /* Module (if any) which this is connected to. */ 102 /* Module (if any) which this is connected to. */
107 struct module *me; 103 struct module *me;
108 }; 104 };
109 105
110 /* Existing built-in generic protocol */ 106 /* Existing built-in generic protocol */
111 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; 107 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
112 108
113 #define MAX_NF_CT_PROTO 256 109 #define MAX_NF_CT_PROTO 256
114 110
115 const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, 111 const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
116 u_int8_t l4proto); 112 u_int8_t l4proto);
117 113
118 const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, 114 const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
119 u_int8_t l4proto); 115 u_int8_t l4proto);
120 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); 116 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
121 117
122 /* Protocol pernet registration. */ 118 /* Protocol pernet registration. */
123 int nf_ct_l4proto_pernet_register_one(struct net *net, 119 int nf_ct_l4proto_pernet_register_one(struct net *net,
124 const struct nf_conntrack_l4proto *proto); 120 const struct nf_conntrack_l4proto *proto);
125 void nf_ct_l4proto_pernet_unregister_one(struct net *net, 121 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
126 const struct nf_conntrack_l4proto *proto); 122 const struct nf_conntrack_l4proto *proto);
127 int nf_ct_l4proto_pernet_register(struct net *net, 123 int nf_ct_l4proto_pernet_register(struct net *net,
128 const struct nf_conntrack_l4proto *const proto[], 124 const struct nf_conntrack_l4proto *const proto[],
129 unsigned int num_proto); 125 unsigned int num_proto);
130 void nf_ct_l4proto_pernet_unregister(struct net *net, 126 void nf_ct_l4proto_pernet_unregister(struct net *net,
131 const struct nf_conntrack_l4proto *const proto[], 127 const struct nf_conntrack_l4proto *const proto[],
132 unsigned int num_proto); 128 unsigned int num_proto);
133 129
134 /* Protocol global registration. */ 130 /* Protocol global registration. */
135 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); 131 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
136 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); 132 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
137 int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], 133 int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
138 unsigned int num_proto); 134 unsigned int num_proto);
139 void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], 135 void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
140 unsigned int num_proto); 136 unsigned int num_proto);
141 137
142 /* Generic netlink helpers */ 138 /* Generic netlink helpers */
143 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 139 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
144 const struct nf_conntrack_tuple *tuple); 140 const struct nf_conntrack_tuple *tuple);
145 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 141 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
146 struct nf_conntrack_tuple *t); 142 struct nf_conntrack_tuple *t);
147 unsigned int nf_ct_port_nlattr_tuple_size(void); 143 unsigned int nf_ct_port_nlattr_tuple_size(void);
148 extern const struct nla_policy nf_ct_port_nla_policy[]; 144 extern const struct nla_policy nf_ct_port_nla_policy[];
149 145
150 #ifdef CONFIG_SYSCTL 146 #ifdef CONFIG_SYSCTL
151 __printf(3, 4) __cold 147 __printf(3, 4) __cold
152 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, 148 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
153 const struct nf_conn *ct, 149 const struct nf_conn *ct,
154 const char *fmt, ...); 150 const char *fmt, ...);
155 __printf(5, 6) __cold 151 __printf(5, 6) __cold
156 void nf_l4proto_log_invalid(const struct sk_buff *skb, 152 void nf_l4proto_log_invalid(const struct sk_buff *skb,
157 struct net *net, 153 struct net *net,
158 u16 pf, u8 protonum, 154 u16 pf, u8 protonum,
159 const char *fmt, ...); 155 const char *fmt, ...);
160 #else 156 #else
161 static inline __printf(5, 6) __cold 157 static inline __printf(5, 6) __cold
162 void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, 158 void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net,
163 u16 pf, u8 protonum, const char *fmt, ...) {} 159 u16 pf, u8 protonum, const char *fmt, ...) {}
164 static inline __printf(3, 4) __cold 160 static inline __printf(3, 4) __cold
165 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, 161 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
166 const struct nf_conn *ct, 162 const struct nf_conn *ct,
167 const char *fmt, ...) { } 163 const char *fmt, ...) { }
168 #endif /* CONFIG_SYSCTL */ 164 #endif /* CONFIG_SYSCTL */
169 165
170 #endif /*_NF_CONNTRACK_PROTOCOL_H*/ 166 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
171 167
include/net/netfilter/nf_conntrack_timeout.h
1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _NF_CONNTRACK_TIMEOUT_H 2 #ifndef _NF_CONNTRACK_TIMEOUT_H
3 #define _NF_CONNTRACK_TIMEOUT_H 3 #define _NF_CONNTRACK_TIMEOUT_H
4 4
5 #include <net/net_namespace.h> 5 #include <net/net_namespace.h>
6 #include <linux/netfilter/nf_conntrack_common.h> 6 #include <linux/netfilter/nf_conntrack_common.h>
7 #include <linux/netfilter/nf_conntrack_tuple_common.h> 7 #include <linux/netfilter/nf_conntrack_tuple_common.h>
8 #include <linux/refcount.h> 8 #include <linux/refcount.h>
9 #include <net/netfilter/nf_conntrack.h> 9 #include <net/netfilter/nf_conntrack.h>
10 #include <net/netfilter/nf_conntrack_extend.h> 10 #include <net/netfilter/nf_conntrack_extend.h>
11 11
12 #define CTNL_TIMEOUT_NAME_MAX 32 12 #define CTNL_TIMEOUT_NAME_MAX 32
13 13
14 struct ctnl_timeout { 14 struct ctnl_timeout {
15 struct list_head head; 15 struct list_head head;
16 struct rcu_head rcu_head; 16 struct rcu_head rcu_head;
17 refcount_t refcnt; 17 refcount_t refcnt;
18 char name[CTNL_TIMEOUT_NAME_MAX]; 18 char name[CTNL_TIMEOUT_NAME_MAX];
19 __u16 l3num; 19 __u16 l3num;
20 const struct nf_conntrack_l4proto *l4proto; 20 const struct nf_conntrack_l4proto *l4proto;
21 char data[0]; 21 char data[0];
22 }; 22 };
23 23
24 struct nf_conn_timeout { 24 struct nf_conn_timeout {
25 struct ctnl_timeout __rcu *timeout; 25 struct ctnl_timeout __rcu *timeout;
26 }; 26 };
27 27
28 static inline unsigned int * 28 static inline unsigned int *
29 nf_ct_timeout_data(struct nf_conn_timeout *t) 29 nf_ct_timeout_data(struct nf_conn_timeout *t)
30 { 30 {
31 struct ctnl_timeout *timeout; 31 struct ctnl_timeout *timeout;
32 32
33 timeout = rcu_dereference(t->timeout); 33 timeout = rcu_dereference(t->timeout);
34 if (timeout == NULL) 34 if (timeout == NULL)
35 return NULL; 35 return NULL;
36 36
37 return (unsigned int *)timeout->data; 37 return (unsigned int *)timeout->data;
38 } 38 }
39 39
40 static inline 40 static inline
41 struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct) 41 struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct)
42 { 42 {
43 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 43 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
44 return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT); 44 return nf_ct_ext_find(ct, NF_CT_EXT_TIMEOUT);
45 #else 45 #else
46 return NULL; 46 return NULL;
47 #endif 47 #endif
48 } 48 }
49 49
50 static inline 50 static inline
51 struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, 51 struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
52 struct ctnl_timeout *timeout, 52 struct ctnl_timeout *timeout,
53 gfp_t gfp) 53 gfp_t gfp)
54 { 54 {
55 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 55 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
56 struct nf_conn_timeout *timeout_ext; 56 struct nf_conn_timeout *timeout_ext;
57 57
58 timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp); 58 timeout_ext = nf_ct_ext_add(ct, NF_CT_EXT_TIMEOUT, gfp);
59 if (timeout_ext == NULL) 59 if (timeout_ext == NULL)
60 return NULL; 60 return NULL;
61 61
62 rcu_assign_pointer(timeout_ext->timeout, timeout); 62 rcu_assign_pointer(timeout_ext->timeout, timeout);
63 63
64 return timeout_ext; 64 return timeout_ext;
65 #else 65 #else
66 return NULL; 66 return NULL;
67 #endif 67 #endif
68 }; 68 };
69 69
70 static inline unsigned int * 70 static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
71 nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
72 const struct nf_conntrack_l4proto *l4proto)
73 { 71 {
72 unsigned int *timeouts = NULL;
74 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 73 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
75 struct nf_conn_timeout *timeout_ext; 74 struct nf_conn_timeout *timeout_ext;
76 unsigned int *timeouts;
77 75
78 timeout_ext = nf_ct_timeout_find(ct); 76 timeout_ext = nf_ct_timeout_find(ct);
79 if (timeout_ext) { 77 if (timeout_ext)
80 timeouts = nf_ct_timeout_data(timeout_ext); 78 timeouts = nf_ct_timeout_data(timeout_ext);
81 if (unlikely(!timeouts))
82 timeouts = l4proto->get_timeouts(net);
83 } else {
84 timeouts = l4proto->get_timeouts(net);
85 }
86
87 return timeouts;
88 #else
89 return l4proto->get_timeouts(net);
90 #endif 79 #endif
80 return timeouts;
91 } 81 }
92 82
93 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 83 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
94 int nf_conntrack_timeout_init(void); 84 int nf_conntrack_timeout_init(void);
95 void nf_conntrack_timeout_fini(void); 85 void nf_conntrack_timeout_fini(void);
96 #else 86 #else
97 static inline int nf_conntrack_timeout_init(void) 87 static inline int nf_conntrack_timeout_init(void)
98 { 88 {
99 return 0; 89 return 0;
100 } 90 }
101 91
102 static inline void nf_conntrack_timeout_fini(void) 92 static inline void nf_conntrack_timeout_fini(void)
103 { 93 {
104 return; 94 return;
105 } 95 }
106 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ 96 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
107 97
108 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 98 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
109 extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); 99 extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
110 extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); 100 extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout);
111 #endif 101 #endif
112 102
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2010 Patrick McHardy <kaber@trash.net> 3 * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9
10 #include <linux/types.h> 10 #include <linux/types.h>
11 #include <linux/timer.h> 11 #include <linux/timer.h>
12 #include <linux/netfilter.h> 12 #include <linux/netfilter.h>
13 #include <linux/in.h> 13 #include <linux/in.h>
14 #include <linux/icmp.h> 14 #include <linux/icmp.h>
15 #include <linux/seq_file.h> 15 #include <linux/seq_file.h>
16 #include <net/ip.h> 16 #include <net/ip.h>
17 #include <net/checksum.h> 17 #include <net/checksum.h>
18 #include <linux/netfilter_ipv4.h> 18 #include <linux/netfilter_ipv4.h>
19 #include <net/netfilter/nf_conntrack_tuple.h> 19 #include <net/netfilter/nf_conntrack_tuple.h>
20 #include <net/netfilter/nf_conntrack_l4proto.h> 20 #include <net/netfilter/nf_conntrack_l4proto.h>
21 #include <net/netfilter/nf_conntrack_core.h> 21 #include <net/netfilter/nf_conntrack_core.h>
22 #include <net/netfilter/nf_conntrack_timeout.h>
22 #include <net/netfilter/nf_conntrack_zones.h> 23 #include <net/netfilter/nf_conntrack_zones.h>
23 #include <net/netfilter/nf_log.h> 24 #include <net/netfilter/nf_log.h>
24 25
25 static const unsigned int nf_ct_icmp_timeout = 30*HZ; 26 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
26 27
27 static inline struct nf_icmp_net *icmp_pernet(struct net *net) 28 static inline struct nf_icmp_net *icmp_pernet(struct net *net)
28 { 29 {
29 return &net->ct.nf_ct_proto.icmp; 30 return &net->ct.nf_ct_proto.icmp;
30 } 31 }
31 32
32 static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 33 static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
33 struct net *net, struct nf_conntrack_tuple *tuple) 34 struct net *net, struct nf_conntrack_tuple *tuple)
34 { 35 {
35 const struct icmphdr *hp; 36 const struct icmphdr *hp;
36 struct icmphdr _hdr; 37 struct icmphdr _hdr;
37 38
38 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 39 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
39 if (hp == NULL) 40 if (hp == NULL)
40 return false; 41 return false;
41 42
42 tuple->dst.u.icmp.type = hp->type; 43 tuple->dst.u.icmp.type = hp->type;
43 tuple->src.u.icmp.id = hp->un.echo.id; 44 tuple->src.u.icmp.id = hp->un.echo.id;
44 tuple->dst.u.icmp.code = hp->code; 45 tuple->dst.u.icmp.code = hp->code;
45 46
46 return true; 47 return true;
47 } 48 }
48 49
49 /* Add 1; spaces filled with 0. */ 50 /* Add 1; spaces filled with 0. */
50 static const u_int8_t invmap[] = { 51 static const u_int8_t invmap[] = {
51 [ICMP_ECHO] = ICMP_ECHOREPLY + 1, 52 [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
52 [ICMP_ECHOREPLY] = ICMP_ECHO + 1, 53 [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
53 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, 54 [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
54 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, 55 [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
55 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, 56 [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
56 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, 57 [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
57 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, 58 [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
58 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 59 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
59 }; 60 };
60 61
61 static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, 62 static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
62 const struct nf_conntrack_tuple *orig) 63 const struct nf_conntrack_tuple *orig)
63 { 64 {
64 if (orig->dst.u.icmp.type >= sizeof(invmap) || 65 if (orig->dst.u.icmp.type >= sizeof(invmap) ||
65 !invmap[orig->dst.u.icmp.type]) 66 !invmap[orig->dst.u.icmp.type])
66 return false; 67 return false;
67 68
68 tuple->src.u.icmp.id = orig->src.u.icmp.id; 69 tuple->src.u.icmp.id = orig->src.u.icmp.id;
69 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; 70 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
70 tuple->dst.u.icmp.code = orig->dst.u.icmp.code; 71 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
71 return true; 72 return true;
72 } 73 }
73 74
74 static unsigned int *icmp_get_timeouts(struct net *net) 75 static unsigned int *icmp_get_timeouts(struct net *net)
75 { 76 {
76 return &icmp_pernet(net)->timeout; 77 return &icmp_pernet(net)->timeout;
77 } 78 }
78 79
79 /* Returns verdict for packet, or -1 for invalid. */ 80 /* Returns verdict for packet, or -1 for invalid. */
80 static int icmp_packet(struct nf_conn *ct, 81 static int icmp_packet(struct nf_conn *ct,
81 const struct sk_buff *skb, 82 const struct sk_buff *skb,
82 unsigned int dataoff, 83 unsigned int dataoff,
83 enum ip_conntrack_info ctinfo, 84 enum ip_conntrack_info ctinfo)
84 unsigned int *timeout)
85 { 85 {
86 /* Do not immediately delete the connection after the first 86 /* Do not immediately delete the connection after the first
87 successful reply to avoid excessive conntrackd traffic 87 successful reply to avoid excessive conntrackd traffic
88 and also to handle correctly ICMP echo reply duplicates. */ 88 and also to handle correctly ICMP echo reply duplicates. */
89 unsigned int *timeout = nf_ct_timeout_lookup(ct);
90
91 if (!timeout)
92 timeout = icmp_get_timeouts(nf_ct_net(ct));
93
89 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 94 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
90 95
91 return NF_ACCEPT; 96 return NF_ACCEPT;
92 } 97 }
93 98
94 /* Called when a new connection for this protocol found. */ 99 /* Called when a new connection for this protocol found. */
95 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, 100 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
96 unsigned int dataoff, unsigned int *timeouts) 101 unsigned int dataoff)
97 { 102 {
98 static const u_int8_t valid_new[] = { 103 static const u_int8_t valid_new[] = {
99 [ICMP_ECHO] = 1, 104 [ICMP_ECHO] = 1,
100 [ICMP_TIMESTAMP] = 1, 105 [ICMP_TIMESTAMP] = 1,
101 [ICMP_INFO_REQUEST] = 1, 106 [ICMP_INFO_REQUEST] = 1,
102 [ICMP_ADDRESS] = 1 107 [ICMP_ADDRESS] = 1
103 }; 108 };
104 109
105 if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || 110 if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
106 !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { 111 !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
107 /* Can't create a new ICMP `conn' with this. */ 112 /* Can't create a new ICMP `conn' with this. */
108 pr_debug("icmp: can't create new conn with type %u\n", 113 pr_debug("icmp: can't create new conn with type %u\n",
109 ct->tuplehash[0].tuple.dst.u.icmp.type); 114 ct->tuplehash[0].tuple.dst.u.icmp.type);
110 nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); 115 nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
111 return false; 116 return false;
112 } 117 }
113 return true; 118 return true;
114 } 119 }
115 120
116 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 121 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
117 static int 122 static int
118 icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, 123 icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
119 unsigned int hooknum) 124 unsigned int hooknum)
120 { 125 {
121 struct nf_conntrack_tuple innertuple, origtuple; 126 struct nf_conntrack_tuple innertuple, origtuple;
122 const struct nf_conntrack_l4proto *innerproto; 127 const struct nf_conntrack_l4proto *innerproto;
123 const struct nf_conntrack_tuple_hash *h; 128 const struct nf_conntrack_tuple_hash *h;
124 const struct nf_conntrack_zone *zone; 129 const struct nf_conntrack_zone *zone;
125 enum ip_conntrack_info ctinfo; 130 enum ip_conntrack_info ctinfo;
126 struct nf_conntrack_zone tmp; 131 struct nf_conntrack_zone tmp;
127 132
128 WARN_ON(skb_nfct(skb)); 133 WARN_ON(skb_nfct(skb));
129 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 134 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
130 135
131 /* Are they talking about one of our connections? */ 136 /* Are they talking about one of our connections? */
132 if (!nf_ct_get_tuplepr(skb, 137 if (!nf_ct_get_tuplepr(skb,
133 skb_network_offset(skb) + ip_hdrlen(skb) 138 skb_network_offset(skb) + ip_hdrlen(skb)
134 + sizeof(struct icmphdr), 139 + sizeof(struct icmphdr),
135 PF_INET, net, &origtuple)) { 140 PF_INET, net, &origtuple)) {
136 pr_debug("icmp_error_message: failed to get tuple\n"); 141 pr_debug("icmp_error_message: failed to get tuple\n");
137 return -NF_ACCEPT; 142 return -NF_ACCEPT;
138 } 143 }
139 144
140 /* rcu_read_lock()ed by nf_hook_thresh */ 145 /* rcu_read_lock()ed by nf_hook_thresh */
141 innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); 146 innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
142 147
143 /* Ordinarily, we'd expect the inverted tupleproto, but it's 148 /* Ordinarily, we'd expect the inverted tupleproto, but it's
144 been preserved inside the ICMP. */ 149 been preserved inside the ICMP. */
145 if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { 150 if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
146 pr_debug("icmp_error_message: no match\n"); 151 pr_debug("icmp_error_message: no match\n");
147 return -NF_ACCEPT; 152 return -NF_ACCEPT;
148 } 153 }
149 154
150 ctinfo = IP_CT_RELATED; 155 ctinfo = IP_CT_RELATED;
151 156
152 h = nf_conntrack_find_get(net, zone, &innertuple); 157 h = nf_conntrack_find_get(net, zone, &innertuple);
153 if (!h) { 158 if (!h) {
154 pr_debug("icmp_error_message: no match\n"); 159 pr_debug("icmp_error_message: no match\n");
155 return -NF_ACCEPT; 160 return -NF_ACCEPT;
156 } 161 }
157 162
158 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 163 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
159 ctinfo += IP_CT_IS_REPLY; 164 ctinfo += IP_CT_IS_REPLY;
160 165
161 /* Update skb to refer to this connection */ 166 /* Update skb to refer to this connection */
162 nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); 167 nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
163 return NF_ACCEPT; 168 return NF_ACCEPT;
164 } 169 }
165 170
166 static void icmp_error_log(const struct sk_buff *skb, struct net *net, 171 static void icmp_error_log(const struct sk_buff *skb, struct net *net,
167 u8 pf, const char *msg) 172 u8 pf, const char *msg)
168 { 173 {
169 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); 174 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
170 } 175 }
171 176
172 /* Small and modified version of icmp_rcv */ 177 /* Small and modified version of icmp_rcv */
173 static int 178 static int
174 icmp_error(struct net *net, struct nf_conn *tmpl, 179 icmp_error(struct net *net, struct nf_conn *tmpl,
175 struct sk_buff *skb, unsigned int dataoff, 180 struct sk_buff *skb, unsigned int dataoff,
176 u8 pf, unsigned int hooknum) 181 u8 pf, unsigned int hooknum)
177 { 182 {
178 const struct icmphdr *icmph; 183 const struct icmphdr *icmph;
179 struct icmphdr _ih; 184 struct icmphdr _ih;
180 185
181 /* Not enough header? */ 186 /* Not enough header? */
182 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 187 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
183 if (icmph == NULL) { 188 if (icmph == NULL) {
184 icmp_error_log(skb, net, pf, "short packet"); 189 icmp_error_log(skb, net, pf, "short packet");
185 return -NF_ACCEPT; 190 return -NF_ACCEPT;
186 } 191 }
187 192
188 /* See ip_conntrack_proto_tcp.c */ 193 /* See ip_conntrack_proto_tcp.c */
189 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 194 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
190 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 195 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
191 icmp_error_log(skb, net, pf, "bad hw icmp checksum"); 196 icmp_error_log(skb, net, pf, "bad hw icmp checksum");
192 return -NF_ACCEPT; 197 return -NF_ACCEPT;
193 } 198 }
194 199
195 /* 200 /*
196 * 18 is the highest 'known' ICMP type. Anything else is a mystery 201 * 18 is the highest 'known' ICMP type. Anything else is a mystery
197 * 202 *
198 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently 203 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
199 * discarded. 204 * discarded.
200 */ 205 */
201 if (icmph->type > NR_ICMP_TYPES) { 206 if (icmph->type > NR_ICMP_TYPES) {
202 icmp_error_log(skb, net, pf, "invalid icmp type"); 207 icmp_error_log(skb, net, pf, "invalid icmp type");
203 return -NF_ACCEPT; 208 return -NF_ACCEPT;
204 } 209 }
205 210
206 /* Need to track icmp error message? */ 211 /* Need to track icmp error message? */
207 if (icmph->type != ICMP_DEST_UNREACH && 212 if (icmph->type != ICMP_DEST_UNREACH &&
208 icmph->type != ICMP_SOURCE_QUENCH && 213 icmph->type != ICMP_SOURCE_QUENCH &&
209 icmph->type != ICMP_TIME_EXCEEDED && 214 icmph->type != ICMP_TIME_EXCEEDED &&
210 icmph->type != ICMP_PARAMETERPROB && 215 icmph->type != ICMP_PARAMETERPROB &&
211 icmph->type != ICMP_REDIRECT) 216 icmph->type != ICMP_REDIRECT)
212 return NF_ACCEPT; 217 return NF_ACCEPT;
213 218
214 return icmp_error_message(net, tmpl, skb, hooknum); 219 return icmp_error_message(net, tmpl, skb, hooknum);
215 } 220 }
216 221
217 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 222 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
218 223
219 #include <linux/netfilter/nfnetlink.h> 224 #include <linux/netfilter/nfnetlink.h>
220 #include <linux/netfilter/nfnetlink_conntrack.h> 225 #include <linux/netfilter/nfnetlink_conntrack.h>
221 226
222 static int icmp_tuple_to_nlattr(struct sk_buff *skb, 227 static int icmp_tuple_to_nlattr(struct sk_buff *skb,
223 const struct nf_conntrack_tuple *t) 228 const struct nf_conntrack_tuple *t)
224 { 229 {
225 if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || 230 if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
226 nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || 231 nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
227 nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) 232 nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
228 goto nla_put_failure; 233 goto nla_put_failure;
229 return 0; 234 return 0;
230 235
231 nla_put_failure: 236 nla_put_failure:
232 return -1; 237 return -1;
233 } 238 }
234 239
235 static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { 240 static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
236 [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, 241 [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 },
237 [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, 242 [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 },
238 [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, 243 [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 },
239 }; 244 };
240 245
241 static int icmp_nlattr_to_tuple(struct nlattr *tb[], 246 static int icmp_nlattr_to_tuple(struct nlattr *tb[],
242 struct nf_conntrack_tuple *tuple) 247 struct nf_conntrack_tuple *tuple)
243 { 248 {
244 if (!tb[CTA_PROTO_ICMP_TYPE] || 249 if (!tb[CTA_PROTO_ICMP_TYPE] ||
245 !tb[CTA_PROTO_ICMP_CODE] || 250 !tb[CTA_PROTO_ICMP_CODE] ||
246 !tb[CTA_PROTO_ICMP_ID]) 251 !tb[CTA_PROTO_ICMP_ID])
247 return -EINVAL; 252 return -EINVAL;
248 253
249 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); 254 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
250 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); 255 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
251 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); 256 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
252 257
253 if (tuple->dst.u.icmp.type >= sizeof(invmap) || 258 if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
254 !invmap[tuple->dst.u.icmp.type]) 259 !invmap[tuple->dst.u.icmp.type])
255 return -EINVAL; 260 return -EINVAL;
256 261
257 return 0; 262 return 0;
258 } 263 }
259 264
260 static unsigned int icmp_nlattr_tuple_size(void) 265 static unsigned int icmp_nlattr_tuple_size(void)
261 { 266 {
262 static unsigned int size __read_mostly; 267 static unsigned int size __read_mostly;
263 268
264 if (!size) 269 if (!size)
265 size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); 270 size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
266 271
267 return size; 272 return size;
268 } 273 }
269 #endif 274 #endif
270 275
271 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 276 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
272 277
273 #include <linux/netfilter/nfnetlink.h> 278 #include <linux/netfilter/nfnetlink.h>
274 #include <linux/netfilter/nfnetlink_cttimeout.h> 279 #include <linux/netfilter/nfnetlink_cttimeout.h>
275 280
276 static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], 281 static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
277 struct net *net, void *data) 282 struct net *net, void *data)
278 { 283 {
279 unsigned int *timeout = data; 284 unsigned int *timeout = data;
280 struct nf_icmp_net *in = icmp_pernet(net); 285 struct nf_icmp_net *in = icmp_pernet(net);
281 286
282 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { 287 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
288 if (!timeout)
289 timeout = &in->timeout;
283 *timeout = 290 *timeout =
284 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; 291 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
285 } else { 292 } else if (timeout) {
286 /* Set default ICMP timeout. */ 293 /* Set default ICMP timeout. */
287 *timeout = in->timeout; 294 *timeout = in->timeout;
288 } 295 }
289 return 0; 296 return 0;
290 } 297 }
291 298
292 static int 299 static int
293 icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 300 icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
294 { 301 {
295 const unsigned int *timeout = data; 302 const unsigned int *timeout = data;
296 303
297 if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) 304 if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
298 goto nla_put_failure; 305 goto nla_put_failure;
299 return 0; 306 return 0;
300 307
301 nla_put_failure: 308 nla_put_failure:
302 return -ENOSPC; 309 return -ENOSPC;
303 } 310 }
304 311
305 static const struct nla_policy 312 static const struct nla_policy
306 icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { 313 icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
307 [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, 314 [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 },
308 }; 315 };
309 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 316 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
310 317
311 #ifdef CONFIG_SYSCTL 318 #ifdef CONFIG_SYSCTL
312 static struct ctl_table icmp_sysctl_table[] = { 319 static struct ctl_table icmp_sysctl_table[] = {
313 { 320 {
314 .procname = "nf_conntrack_icmp_timeout", 321 .procname = "nf_conntrack_icmp_timeout",
315 .maxlen = sizeof(unsigned int), 322 .maxlen = sizeof(unsigned int),
316 .mode = 0644, 323 .mode = 0644,
317 .proc_handler = proc_dointvec_jiffies, 324 .proc_handler = proc_dointvec_jiffies,
318 }, 325 },
319 { } 326 { }
320 }; 327 };
321 #endif /* CONFIG_SYSCTL */ 328 #endif /* CONFIG_SYSCTL */
322 329
323 static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, 330 static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
324 struct nf_icmp_net *in) 331 struct nf_icmp_net *in)
325 { 332 {
326 #ifdef CONFIG_SYSCTL 333 #ifdef CONFIG_SYSCTL
327 pn->ctl_table = kmemdup(icmp_sysctl_table, 334 pn->ctl_table = kmemdup(icmp_sysctl_table,
328 sizeof(icmp_sysctl_table), 335 sizeof(icmp_sysctl_table),
329 GFP_KERNEL); 336 GFP_KERNEL);
330 if (!pn->ctl_table) 337 if (!pn->ctl_table)
331 return -ENOMEM; 338 return -ENOMEM;
332 339
333 pn->ctl_table[0].data = &in->timeout; 340 pn->ctl_table[0].data = &in->timeout;
334 #endif 341 #endif
335 return 0; 342 return 0;
336 } 343 }
337 344
338 static int icmp_init_net(struct net *net, u_int16_t proto) 345 static int icmp_init_net(struct net *net, u_int16_t proto)
339 { 346 {
340 struct nf_icmp_net *in = icmp_pernet(net); 347 struct nf_icmp_net *in = icmp_pernet(net);
341 struct nf_proto_net *pn = &in->pn; 348 struct nf_proto_net *pn = &in->pn;
342 349
343 in->timeout = nf_ct_icmp_timeout; 350 in->timeout = nf_ct_icmp_timeout;
344 351
345 return icmp_kmemdup_sysctl_table(pn, in); 352 return icmp_kmemdup_sysctl_table(pn, in);
346 } 353 }
347 354
348 static struct nf_proto_net *icmp_get_net_proto(struct net *net) 355 static struct nf_proto_net *icmp_get_net_proto(struct net *net)
349 { 356 {
350 return &net->ct.nf_ct_proto.icmp.pn; 357 return &net->ct.nf_ct_proto.icmp.pn;
351 } 358 }
352 359
353 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = 360 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
354 { 361 {
355 .l3proto = PF_INET, 362 .l3proto = PF_INET,
356 .l4proto = IPPROTO_ICMP, 363 .l4proto = IPPROTO_ICMP,
357 .pkt_to_tuple = icmp_pkt_to_tuple, 364 .pkt_to_tuple = icmp_pkt_to_tuple,
358 .invert_tuple = icmp_invert_tuple, 365 .invert_tuple = icmp_invert_tuple,
359 .packet = icmp_packet, 366 .packet = icmp_packet,
360 .get_timeouts = icmp_get_timeouts,
361 .new = icmp_new, 367 .new = icmp_new,
362 .error = icmp_error, 368 .error = icmp_error,
363 .destroy = NULL, 369 .destroy = NULL,
364 .me = NULL, 370 .me = NULL,
365 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 371 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
366 .tuple_to_nlattr = icmp_tuple_to_nlattr, 372 .tuple_to_nlattr = icmp_tuple_to_nlattr,
367 .nlattr_tuple_size = icmp_nlattr_tuple_size, 373 .nlattr_tuple_size = icmp_nlattr_tuple_size,
368 .nlattr_to_tuple = icmp_nlattr_to_tuple, 374 .nlattr_to_tuple = icmp_nlattr_to_tuple,
369 .nla_policy = icmp_nla_policy, 375 .nla_policy = icmp_nla_policy,
370 #endif 376 #endif
371 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 377 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
372 .ctnl_timeout = { 378 .ctnl_timeout = {
373 .nlattr_to_obj = icmp_timeout_nlattr_to_obj, 379 .nlattr_to_obj = icmp_timeout_nlattr_to_obj,
374 .obj_to_nlattr = icmp_timeout_obj_to_nlattr, 380 .obj_to_nlattr = icmp_timeout_obj_to_nlattr,
375 .nlattr_max = CTA_TIMEOUT_ICMP_MAX, 381 .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
376 .obj_size = sizeof(unsigned int), 382 .obj_size = sizeof(unsigned int),
377 .nla_policy = icmp_timeout_nla_policy, 383 .nla_policy = icmp_timeout_nla_policy,
378 }, 384 },
379 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 385 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
380 .init_net = icmp_init_net, 386 .init_net = icmp_init_net,
381 .get_net_proto = icmp_get_net_proto, 387 .get_net_proto = icmp_get_net_proto,
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
1 /* 1 /*
2 * Copyright (C)2003,2004 USAGI/WIDE Project 2 * Copyright (C)2003,2004 USAGI/WIDE Project
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * Author: 8 * Author:
9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
10 */ 10 */
11 11
12 #include <linux/types.h> 12 #include <linux/types.h>
13 #include <linux/timer.h> 13 #include <linux/timer.h>
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/netfilter.h> 15 #include <linux/netfilter.h>
16 #include <linux/in6.h> 16 #include <linux/in6.h>
17 #include <linux/icmpv6.h> 17 #include <linux/icmpv6.h>
18 #include <linux/ipv6.h> 18 #include <linux/ipv6.h>
19 #include <net/ipv6.h> 19 #include <net/ipv6.h>
20 #include <net/ip6_checksum.h> 20 #include <net/ip6_checksum.h>
21 #include <linux/seq_file.h> 21 #include <linux/seq_file.h>
22 #include <linux/netfilter_ipv6.h> 22 #include <linux/netfilter_ipv6.h>
23 #include <net/netfilter/nf_conntrack_tuple.h> 23 #include <net/netfilter/nf_conntrack_tuple.h>
24 #include <net/netfilter/nf_conntrack_l4proto.h> 24 #include <net/netfilter/nf_conntrack_l4proto.h>
25 #include <net/netfilter/nf_conntrack_core.h> 25 #include <net/netfilter/nf_conntrack_core.h>
26 #include <net/netfilter/nf_conntrack_timeout.h>
26 #include <net/netfilter/nf_conntrack_zones.h> 27 #include <net/netfilter/nf_conntrack_zones.h>
27 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 28 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
28 #include <net/netfilter/nf_log.h> 29 #include <net/netfilter/nf_log.h>
29 30
30 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; 31 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
31 32
32 static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) 33 static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
33 { 34 {
34 return &net->ct.nf_ct_proto.icmpv6; 35 return &net->ct.nf_ct_proto.icmpv6;
35 } 36 }
36 37
37 static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, 38 static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
38 unsigned int dataoff, 39 unsigned int dataoff,
39 struct net *net, 40 struct net *net,
40 struct nf_conntrack_tuple *tuple) 41 struct nf_conntrack_tuple *tuple)
41 { 42 {
42 const struct icmp6hdr *hp; 43 const struct icmp6hdr *hp;
43 struct icmp6hdr _hdr; 44 struct icmp6hdr _hdr;
44 45
45 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 46 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
46 if (hp == NULL) 47 if (hp == NULL)
47 return false; 48 return false;
48 tuple->dst.u.icmp.type = hp->icmp6_type; 49 tuple->dst.u.icmp.type = hp->icmp6_type;
49 tuple->src.u.icmp.id = hp->icmp6_identifier; 50 tuple->src.u.icmp.id = hp->icmp6_identifier;
50 tuple->dst.u.icmp.code = hp->icmp6_code; 51 tuple->dst.u.icmp.code = hp->icmp6_code;
51 52
52 return true; 53 return true;
53 } 54 }
54 55
55 /* Add 1; spaces filled with 0. */ 56 /* Add 1; spaces filled with 0. */
56 static const u_int8_t invmap[] = { 57 static const u_int8_t invmap[] = {
57 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, 58 [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
58 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, 59 [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
59 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1, 60 [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
60 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1 61 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1
61 }; 62 };
62 63
63 static const u_int8_t noct_valid_new[] = { 64 static const u_int8_t noct_valid_new[] = {
64 [ICMPV6_MGM_QUERY - 130] = 1, 65 [ICMPV6_MGM_QUERY - 130] = 1,
65 [ICMPV6_MGM_REPORT - 130] = 1, 66 [ICMPV6_MGM_REPORT - 130] = 1,
66 [ICMPV6_MGM_REDUCTION - 130] = 1, 67 [ICMPV6_MGM_REDUCTION - 130] = 1,
67 [NDISC_ROUTER_SOLICITATION - 130] = 1, 68 [NDISC_ROUTER_SOLICITATION - 130] = 1,
68 [NDISC_ROUTER_ADVERTISEMENT - 130] = 1, 69 [NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
69 [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1, 70 [NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
70 [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1, 71 [NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
71 [ICMPV6_MLD2_REPORT - 130] = 1 72 [ICMPV6_MLD2_REPORT - 130] = 1
72 }; 73 };
73 74
74 static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, 75 static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
75 const struct nf_conntrack_tuple *orig) 76 const struct nf_conntrack_tuple *orig)
76 { 77 {
77 int type = orig->dst.u.icmp.type - 128; 78 int type = orig->dst.u.icmp.type - 128;
78 if (type < 0 || type >= sizeof(invmap) || !invmap[type]) 79 if (type < 0 || type >= sizeof(invmap) || !invmap[type])
79 return false; 80 return false;
80 81
81 tuple->src.u.icmp.id = orig->src.u.icmp.id; 82 tuple->src.u.icmp.id = orig->src.u.icmp.id;
82 tuple->dst.u.icmp.type = invmap[type] - 1; 83 tuple->dst.u.icmp.type = invmap[type] - 1;
83 tuple->dst.u.icmp.code = orig->dst.u.icmp.code; 84 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
84 return true; 85 return true;
85 } 86 }
86 87
87 static unsigned int *icmpv6_get_timeouts(struct net *net) 88 static unsigned int *icmpv6_get_timeouts(struct net *net)
88 { 89 {
89 return &icmpv6_pernet(net)->timeout; 90 return &icmpv6_pernet(net)->timeout;
90 } 91 }
91 92
92 /* Returns verdict for packet, or -1 for invalid. */ 93 /* Returns verdict for packet, or -1 for invalid. */
93 static int icmpv6_packet(struct nf_conn *ct, 94 static int icmpv6_packet(struct nf_conn *ct,
94 const struct sk_buff *skb, 95 const struct sk_buff *skb,
95 unsigned int dataoff, 96 unsigned int dataoff,
96 enum ip_conntrack_info ctinfo, 97 enum ip_conntrack_info ctinfo)
97 unsigned int *timeout)
98 { 98 {
99 unsigned int *timeout = nf_ct_timeout_lookup(ct);
100
101 if (!timeout)
102 timeout = icmpv6_get_timeouts(nf_ct_net(ct));
103
99 /* Do not immediately delete the connection after the first 104 /* Do not immediately delete the connection after the first
100 successful reply to avoid excessive conntrackd traffic 105 successful reply to avoid excessive conntrackd traffic
101 and also to handle correctly ICMP echo reply duplicates. */ 106 and also to handle correctly ICMP echo reply duplicates. */
102 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 107 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
103 108
104 return NF_ACCEPT; 109 return NF_ACCEPT;
105 } 110 }
106 111
107 /* Called when a new connection for this protocol found. */ 112 /* Called when a new connection for this protocol found. */
108 static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, 113 static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
109 unsigned int dataoff, unsigned int *timeouts) 114 unsigned int dataoff)
110 { 115 {
111 static const u_int8_t valid_new[] = { 116 static const u_int8_t valid_new[] = {
112 [ICMPV6_ECHO_REQUEST - 128] = 1, 117 [ICMPV6_ECHO_REQUEST - 128] = 1,
113 [ICMPV6_NI_QUERY - 128] = 1 118 [ICMPV6_NI_QUERY - 128] = 1
114 }; 119 };
115 int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; 120 int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
116 121
117 if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { 122 if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
118 /* Can't create a new ICMPv6 `conn' with this. */ 123 /* Can't create a new ICMPv6 `conn' with this. */
119 pr_debug("icmpv6: can't create new conn with type %u\n", 124 pr_debug("icmpv6: can't create new conn with type %u\n",
120 type + 128); 125 type + 128);
121 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); 126 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
122 return false; 127 return false;
123 } 128 }
124 return true; 129 return true;
125 } 130 }
126 131
127 static int 132 static int
128 icmpv6_error_message(struct net *net, struct nf_conn *tmpl, 133 icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
129 struct sk_buff *skb, 134 struct sk_buff *skb,
130 unsigned int icmp6off) 135 unsigned int icmp6off)
131 { 136 {
132 struct nf_conntrack_tuple intuple, origtuple; 137 struct nf_conntrack_tuple intuple, origtuple;
133 const struct nf_conntrack_tuple_hash *h; 138 const struct nf_conntrack_tuple_hash *h;
134 const struct nf_conntrack_l4proto *inproto; 139 const struct nf_conntrack_l4proto *inproto;
135 enum ip_conntrack_info ctinfo; 140 enum ip_conntrack_info ctinfo;
136 struct nf_conntrack_zone tmp; 141 struct nf_conntrack_zone tmp;
137 142
138 WARN_ON(skb_nfct(skb)); 143 WARN_ON(skb_nfct(skb));
139 144
140 /* Are they talking about one of our connections? */ 145 /* Are they talking about one of our connections? */
141 if (!nf_ct_get_tuplepr(skb, 146 if (!nf_ct_get_tuplepr(skb,
142 skb_network_offset(skb) 147 skb_network_offset(skb)
143 + sizeof(struct ipv6hdr) 148 + sizeof(struct ipv6hdr)
144 + sizeof(struct icmp6hdr), 149 + sizeof(struct icmp6hdr),
145 PF_INET6, net, &origtuple)) { 150 PF_INET6, net, &origtuple)) {
146 pr_debug("icmpv6_error: Can't get tuple\n"); 151 pr_debug("icmpv6_error: Can't get tuple\n");
147 return -NF_ACCEPT; 152 return -NF_ACCEPT;
148 } 153 }
149 154
150 /* rcu_read_lock()ed by nf_hook_thresh */ 155 /* rcu_read_lock()ed by nf_hook_thresh */
151 inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); 156 inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
152 157
153 /* Ordinarily, we'd expect the inverted tupleproto, but it's 158 /* Ordinarily, we'd expect the inverted tupleproto, but it's
154 been preserved inside the ICMP. */ 159 been preserved inside the ICMP. */
155 if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { 160 if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
156 pr_debug("icmpv6_error: Can't invert tuple\n"); 161 pr_debug("icmpv6_error: Can't invert tuple\n");
157 return -NF_ACCEPT; 162 return -NF_ACCEPT;
158 } 163 }
159 164
160 ctinfo = IP_CT_RELATED; 165 ctinfo = IP_CT_RELATED;
161 166
162 h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), 167 h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
163 &intuple); 168 &intuple);
164 if (!h) { 169 if (!h) {
165 pr_debug("icmpv6_error: no match\n"); 170 pr_debug("icmpv6_error: no match\n");
166 return -NF_ACCEPT; 171 return -NF_ACCEPT;
167 } else { 172 } else {
168 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 173 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
169 ctinfo += IP_CT_IS_REPLY; 174 ctinfo += IP_CT_IS_REPLY;
170 } 175 }
171 176
172 /* Update skb to refer to this connection */ 177 /* Update skb to refer to this connection */
173 nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo); 178 nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
174 return NF_ACCEPT; 179 return NF_ACCEPT;
175 } 180 }
176 181
177 static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, 182 static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
178 u8 pf, const char *msg) 183 u8 pf, const char *msg)
179 { 184 {
180 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); 185 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
181 } 186 }
182 187
183 static int 188 static int
184 icmpv6_error(struct net *net, struct nf_conn *tmpl, 189 icmpv6_error(struct net *net, struct nf_conn *tmpl,
185 struct sk_buff *skb, unsigned int dataoff, 190 struct sk_buff *skb, unsigned int dataoff,
186 u8 pf, unsigned int hooknum) 191 u8 pf, unsigned int hooknum)
187 { 192 {
188 const struct icmp6hdr *icmp6h; 193 const struct icmp6hdr *icmp6h;
189 struct icmp6hdr _ih; 194 struct icmp6hdr _ih;
190 int type; 195 int type;
191 196
192 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 197 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
193 if (icmp6h == NULL) { 198 if (icmp6h == NULL) {
194 icmpv6_error_log(skb, net, pf, "short packet"); 199 icmpv6_error_log(skb, net, pf, "short packet");
195 return -NF_ACCEPT; 200 return -NF_ACCEPT;
196 } 201 }
197 202
198 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 203 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
199 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 204 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
200 icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); 205 icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
201 return -NF_ACCEPT; 206 return -NF_ACCEPT;
202 } 207 }
203 208
204 type = icmp6h->icmp6_type - 130; 209 type = icmp6h->icmp6_type - 130;
205 if (type >= 0 && type < sizeof(noct_valid_new) && 210 if (type >= 0 && type < sizeof(noct_valid_new) &&
206 noct_valid_new[type]) { 211 noct_valid_new[type]) {
207 nf_ct_set(skb, NULL, IP_CT_UNTRACKED); 212 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
208 return NF_ACCEPT; 213 return NF_ACCEPT;
209 } 214 }
210 215
211 /* is not error message ? */ 216 /* is not error message ? */
212 if (icmp6h->icmp6_type >= 128) 217 if (icmp6h->icmp6_type >= 128)
213 return NF_ACCEPT; 218 return NF_ACCEPT;
214 219
215 return icmpv6_error_message(net, tmpl, skb, dataoff); 220 return icmpv6_error_message(net, tmpl, skb, dataoff);
216 } 221 }
217 222
218 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 223 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
219 224
220 #include <linux/netfilter/nfnetlink.h> 225 #include <linux/netfilter/nfnetlink.h>
221 #include <linux/netfilter/nfnetlink_conntrack.h> 226 #include <linux/netfilter/nfnetlink_conntrack.h>
222 static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, 227 static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
223 const struct nf_conntrack_tuple *t) 228 const struct nf_conntrack_tuple *t)
224 { 229 {
225 if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) || 230 if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
226 nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) || 231 nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
227 nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code)) 232 nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
228 goto nla_put_failure; 233 goto nla_put_failure;
229 return 0; 234 return 0;
230 235
231 nla_put_failure: 236 nla_put_failure:
232 return -1; 237 return -1;
233 } 238 }
234 239
235 static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { 240 static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
236 [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 }, 241 [CTA_PROTO_ICMPV6_TYPE] = { .type = NLA_U8 },
237 [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 }, 242 [CTA_PROTO_ICMPV6_CODE] = { .type = NLA_U8 },
238 [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 }, 243 [CTA_PROTO_ICMPV6_ID] = { .type = NLA_U16 },
239 }; 244 };
240 245
241 static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], 246 static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
242 struct nf_conntrack_tuple *tuple) 247 struct nf_conntrack_tuple *tuple)
243 { 248 {
244 if (!tb[CTA_PROTO_ICMPV6_TYPE] || 249 if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
245 !tb[CTA_PROTO_ICMPV6_CODE] || 250 !tb[CTA_PROTO_ICMPV6_CODE] ||
246 !tb[CTA_PROTO_ICMPV6_ID]) 251 !tb[CTA_PROTO_ICMPV6_ID])
247 return -EINVAL; 252 return -EINVAL;
248 253
249 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); 254 tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
250 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); 255 tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
251 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); 256 tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
252 257
253 if (tuple->dst.u.icmp.type < 128 || 258 if (tuple->dst.u.icmp.type < 128 ||
254 tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || 259 tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
255 !invmap[tuple->dst.u.icmp.type - 128]) 260 !invmap[tuple->dst.u.icmp.type - 128])
256 return -EINVAL; 261 return -EINVAL;
257 262
258 return 0; 263 return 0;
259 } 264 }
260 265
261 static unsigned int icmpv6_nlattr_tuple_size(void) 266 static unsigned int icmpv6_nlattr_tuple_size(void)
262 { 267 {
263 static unsigned int size __read_mostly; 268 static unsigned int size __read_mostly;
264 269
265 if (!size) 270 if (!size)
266 size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); 271 size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
267 272
268 return size; 273 return size;
269 } 274 }
270 #endif 275 #endif
271 276
272 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 277 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
273 278
274 #include <linux/netfilter/nfnetlink.h> 279 #include <linux/netfilter/nfnetlink.h>
275 #include <linux/netfilter/nfnetlink_cttimeout.h> 280 #include <linux/netfilter/nfnetlink_cttimeout.h>
276 281
277 static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], 282 static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
278 struct net *net, void *data) 283 struct net *net, void *data)
279 { 284 {
280 unsigned int *timeout = data; 285 unsigned int *timeout = data;
281 struct nf_icmp_net *in = icmpv6_pernet(net); 286 struct nf_icmp_net *in = icmpv6_pernet(net);
282 287
288 if (!timeout)
289 timeout = icmpv6_get_timeouts(net);
283 if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { 290 if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
284 *timeout = 291 *timeout =
285 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; 292 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
286 } else { 293 } else {
287 /* Set default ICMPv6 timeout. */ 294 /* Set default ICMPv6 timeout. */
288 *timeout = in->timeout; 295 *timeout = in->timeout;
289 } 296 }
290 return 0; 297 return 0;
291 } 298 }
292 299
293 static int 300 static int
294 icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 301 icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
295 { 302 {
296 const unsigned int *timeout = data; 303 const unsigned int *timeout = data;
297 304
298 if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ))) 305 if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
299 goto nla_put_failure; 306 goto nla_put_failure;
300 return 0; 307 return 0;
301 308
302 nla_put_failure: 309 nla_put_failure:
303 return -ENOSPC; 310 return -ENOSPC;
304 } 311 }
305 312
306 static const struct nla_policy 313 static const struct nla_policy
307 icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { 314 icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
308 [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, 315 [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 },
309 }; 316 };
310 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 317 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
311 318
312 #ifdef CONFIG_SYSCTL 319 #ifdef CONFIG_SYSCTL
313 static struct ctl_table icmpv6_sysctl_table[] = { 320 static struct ctl_table icmpv6_sysctl_table[] = {
314 { 321 {
315 .procname = "nf_conntrack_icmpv6_timeout", 322 .procname = "nf_conntrack_icmpv6_timeout",
316 .maxlen = sizeof(unsigned int), 323 .maxlen = sizeof(unsigned int),
317 .mode = 0644, 324 .mode = 0644,
318 .proc_handler = proc_dointvec_jiffies, 325 .proc_handler = proc_dointvec_jiffies,
319 }, 326 },
320 { } 327 { }
321 }; 328 };
322 #endif /* CONFIG_SYSCTL */ 329 #endif /* CONFIG_SYSCTL */
323 330
324 static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, 331 static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
325 struct nf_icmp_net *in) 332 struct nf_icmp_net *in)
326 { 333 {
327 #ifdef CONFIG_SYSCTL 334 #ifdef CONFIG_SYSCTL
328 pn->ctl_table = kmemdup(icmpv6_sysctl_table, 335 pn->ctl_table = kmemdup(icmpv6_sysctl_table,
329 sizeof(icmpv6_sysctl_table), 336 sizeof(icmpv6_sysctl_table),
330 GFP_KERNEL); 337 GFP_KERNEL);
331 if (!pn->ctl_table) 338 if (!pn->ctl_table)
332 return -ENOMEM; 339 return -ENOMEM;
333 340
334 pn->ctl_table[0].data = &in->timeout; 341 pn->ctl_table[0].data = &in->timeout;
335 #endif 342 #endif
336 return 0; 343 return 0;
337 } 344 }
338 345
339 static int icmpv6_init_net(struct net *net, u_int16_t proto) 346 static int icmpv6_init_net(struct net *net, u_int16_t proto)
340 { 347 {
341 struct nf_icmp_net *in = icmpv6_pernet(net); 348 struct nf_icmp_net *in = icmpv6_pernet(net);
342 struct nf_proto_net *pn = &in->pn; 349 struct nf_proto_net *pn = &in->pn;
343 350
344 in->timeout = nf_ct_icmpv6_timeout; 351 in->timeout = nf_ct_icmpv6_timeout;
345 352
346 return icmpv6_kmemdup_sysctl_table(pn, in); 353 return icmpv6_kmemdup_sysctl_table(pn, in);
347 } 354 }
348 355
349 static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) 356 static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
350 { 357 {
351 return &net->ct.nf_ct_proto.icmpv6.pn; 358 return &net->ct.nf_ct_proto.icmpv6.pn;
352 } 359 }
353 360
354 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = 361 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
355 { 362 {
356 .l3proto = PF_INET6, 363 .l3proto = PF_INET6,
357 .l4proto = IPPROTO_ICMPV6, 364 .l4proto = IPPROTO_ICMPV6,
358 .pkt_to_tuple = icmpv6_pkt_to_tuple, 365 .pkt_to_tuple = icmpv6_pkt_to_tuple,
359 .invert_tuple = icmpv6_invert_tuple, 366 .invert_tuple = icmpv6_invert_tuple,
360 .packet = icmpv6_packet, 367 .packet = icmpv6_packet,
361 .get_timeouts = icmpv6_get_timeouts,
362 .new = icmpv6_new, 368 .new = icmpv6_new,
363 .error = icmpv6_error, 369 .error = icmpv6_error,
364 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 370 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
365 .tuple_to_nlattr = icmpv6_tuple_to_nlattr, 371 .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
366 .nlattr_tuple_size = icmpv6_nlattr_tuple_size, 372 .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
367 .nlattr_to_tuple = icmpv6_nlattr_to_tuple, 373 .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
368 .nla_policy = icmpv6_nla_policy, 374 .nla_policy = icmpv6_nla_policy,
369 #endif 375 #endif
370 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 376 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
371 .ctnl_timeout = { 377 .ctnl_timeout = {
372 .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, 378 .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj,
373 .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, 379 .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr,
374 .nlattr_max = CTA_TIMEOUT_ICMP_MAX, 380 .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
375 .obj_size = sizeof(unsigned int), 381 .obj_size = sizeof(unsigned int),
376 .nla_policy = icmpv6_timeout_nla_policy, 382 .nla_policy = icmpv6_timeout_nla_policy,
377 }, 383 },
378 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 384 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
379 .init_net = icmpv6_init_net, 385 .init_net = icmpv6_init_net,
380 .get_net_proto = icmpv6_get_net_proto, 386 .get_net_proto = icmpv6_get_net_proto,
net/netfilter/nf_conntrack_core.c
1 /* Connection state tracking for netfilter. This is separated from, 1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables 2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */ 3 extension. */
4 4
5 /* (C) 1999-2001 Paul `Rusty' Russell 5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net> 8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 16
17 #include <linux/types.h> 17 #include <linux/types.h>
18 #include <linux/netfilter.h> 18 #include <linux/netfilter.h>
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/sched.h> 20 #include <linux/sched.h>
21 #include <linux/skbuff.h> 21 #include <linux/skbuff.h>
22 #include <linux/proc_fs.h> 22 #include <linux/proc_fs.h>
23 #include <linux/vmalloc.h> 23 #include <linux/vmalloc.h>
24 #include <linux/stddef.h> 24 #include <linux/stddef.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/random.h> 26 #include <linux/random.h>
27 #include <linux/jhash.h> 27 #include <linux/jhash.h>
28 #include <linux/err.h> 28 #include <linux/err.h>
29 #include <linux/percpu.h> 29 #include <linux/percpu.h>
30 #include <linux/moduleparam.h> 30 #include <linux/moduleparam.h>
31 #include <linux/notifier.h> 31 #include <linux/notifier.h>
32 #include <linux/kernel.h> 32 #include <linux/kernel.h>
33 #include <linux/netdevice.h> 33 #include <linux/netdevice.h>
34 #include <linux/socket.h> 34 #include <linux/socket.h>
35 #include <linux/mm.h> 35 #include <linux/mm.h>
36 #include <linux/nsproxy.h> 36 #include <linux/nsproxy.h>
37 #include <linux/rculist_nulls.h> 37 #include <linux/rculist_nulls.h>
38 38
39 #include <net/netfilter/nf_conntrack.h> 39 #include <net/netfilter/nf_conntrack.h>
40 #include <net/netfilter/nf_conntrack_l4proto.h> 40 #include <net/netfilter/nf_conntrack_l4proto.h>
41 #include <net/netfilter/nf_conntrack_expect.h> 41 #include <net/netfilter/nf_conntrack_expect.h>
42 #include <net/netfilter/nf_conntrack_helper.h> 42 #include <net/netfilter/nf_conntrack_helper.h>
43 #include <net/netfilter/nf_conntrack_seqadj.h> 43 #include <net/netfilter/nf_conntrack_seqadj.h>
44 #include <net/netfilter/nf_conntrack_core.h> 44 #include <net/netfilter/nf_conntrack_core.h>
45 #include <net/netfilter/nf_conntrack_extend.h> 45 #include <net/netfilter/nf_conntrack_extend.h>
46 #include <net/netfilter/nf_conntrack_acct.h> 46 #include <net/netfilter/nf_conntrack_acct.h>
47 #include <net/netfilter/nf_conntrack_ecache.h> 47 #include <net/netfilter/nf_conntrack_ecache.h>
48 #include <net/netfilter/nf_conntrack_zones.h> 48 #include <net/netfilter/nf_conntrack_zones.h>
49 #include <net/netfilter/nf_conntrack_timestamp.h> 49 #include <net/netfilter/nf_conntrack_timestamp.h>
50 #include <net/netfilter/nf_conntrack_timeout.h> 50 #include <net/netfilter/nf_conntrack_timeout.h>
51 #include <net/netfilter/nf_conntrack_labels.h> 51 #include <net/netfilter/nf_conntrack_labels.h>
52 #include <net/netfilter/nf_conntrack_synproxy.h> 52 #include <net/netfilter/nf_conntrack_synproxy.h>
53 #include <net/netfilter/nf_nat.h> 53 #include <net/netfilter/nf_nat.h>
54 #include <net/netfilter/nf_nat_core.h> 54 #include <net/netfilter/nf_nat_core.h>
55 #include <net/netfilter/nf_nat_helper.h> 55 #include <net/netfilter/nf_nat_helper.h>
56 #include <net/netns/hash.h> 56 #include <net/netns/hash.h>
57 #include <net/ip.h> 57 #include <net/ip.h>
58 58
59 #include "nf_internals.h" 59 #include "nf_internals.h"
60 60
61 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; 61 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
62 EXPORT_SYMBOL_GPL(nf_conntrack_locks); 62 EXPORT_SYMBOL_GPL(nf_conntrack_locks);
63 63
64 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 64 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
65 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 65 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
66 66
67 struct hlist_nulls_head *nf_conntrack_hash __read_mostly; 67 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
68 EXPORT_SYMBOL_GPL(nf_conntrack_hash); 68 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
69 69
70 struct conntrack_gc_work { 70 struct conntrack_gc_work {
71 struct delayed_work dwork; 71 struct delayed_work dwork;
72 u32 last_bucket; 72 u32 last_bucket;
73 bool exiting; 73 bool exiting;
74 bool early_drop; 74 bool early_drop;
75 long next_gc_run; 75 long next_gc_run;
76 }; 76 };
77 77
78 static __read_mostly struct kmem_cache *nf_conntrack_cachep; 78 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
79 static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 79 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
80 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); 80 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
81 static __read_mostly bool nf_conntrack_locks_all; 81 static __read_mostly bool nf_conntrack_locks_all;
82 82
83 /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ 83 /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
84 #define GC_MAX_BUCKETS_DIV 128u 84 #define GC_MAX_BUCKETS_DIV 128u
85 /* upper bound of full table scan */ 85 /* upper bound of full table scan */
86 #define GC_MAX_SCAN_JIFFIES (16u * HZ) 86 #define GC_MAX_SCAN_JIFFIES (16u * HZ)
87 /* desired ratio of entries found to be expired */ 87 /* desired ratio of entries found to be expired */
88 #define GC_EVICT_RATIO 50u 88 #define GC_EVICT_RATIO 50u
89 89
90 static struct conntrack_gc_work conntrack_gc_work; 90 static struct conntrack_gc_work conntrack_gc_work;
91 91
92 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 92 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
93 { 93 {
94 /* 1) Acquire the lock */ 94 /* 1) Acquire the lock */
95 spin_lock(lock); 95 spin_lock(lock);
96 96
97 /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics 97 /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
98 * It pairs with the smp_store_release() in nf_conntrack_all_unlock() 98 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
99 */ 99 */
100 if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) 100 if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
101 return; 101 return;
102 102
103 /* fast path failed, unlock */ 103 /* fast path failed, unlock */
104 spin_unlock(lock); 104 spin_unlock(lock);
105 105
106 /* Slow path 1) get global lock */ 106 /* Slow path 1) get global lock */
107 spin_lock(&nf_conntrack_locks_all_lock); 107 spin_lock(&nf_conntrack_locks_all_lock);
108 108
109 /* Slow path 2) get the lock we want */ 109 /* Slow path 2) get the lock we want */
110 spin_lock(lock); 110 spin_lock(lock);
111 111
112 /* Slow path 3) release the global lock */ 112 /* Slow path 3) release the global lock */
113 spin_unlock(&nf_conntrack_locks_all_lock); 113 spin_unlock(&nf_conntrack_locks_all_lock);
114 } 114 }
115 EXPORT_SYMBOL_GPL(nf_conntrack_lock); 115 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
116 116
117 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) 117 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
118 { 118 {
119 h1 %= CONNTRACK_LOCKS; 119 h1 %= CONNTRACK_LOCKS;
120 h2 %= CONNTRACK_LOCKS; 120 h2 %= CONNTRACK_LOCKS;
121 spin_unlock(&nf_conntrack_locks[h1]); 121 spin_unlock(&nf_conntrack_locks[h1]);
122 if (h1 != h2) 122 if (h1 != h2)
123 spin_unlock(&nf_conntrack_locks[h2]); 123 spin_unlock(&nf_conntrack_locks[h2]);
124 } 124 }
125 125
126 /* return true if we need to recompute hashes (in case hash table was resized) */ 126 /* return true if we need to recompute hashes (in case hash table was resized) */
127 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, 127 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
128 unsigned int h2, unsigned int sequence) 128 unsigned int h2, unsigned int sequence)
129 { 129 {
130 h1 %= CONNTRACK_LOCKS; 130 h1 %= CONNTRACK_LOCKS;
131 h2 %= CONNTRACK_LOCKS; 131 h2 %= CONNTRACK_LOCKS;
132 if (h1 <= h2) { 132 if (h1 <= h2) {
133 nf_conntrack_lock(&nf_conntrack_locks[h1]); 133 nf_conntrack_lock(&nf_conntrack_locks[h1]);
134 if (h1 != h2) 134 if (h1 != h2)
135 spin_lock_nested(&nf_conntrack_locks[h2], 135 spin_lock_nested(&nf_conntrack_locks[h2],
136 SINGLE_DEPTH_NESTING); 136 SINGLE_DEPTH_NESTING);
137 } else { 137 } else {
138 nf_conntrack_lock(&nf_conntrack_locks[h2]); 138 nf_conntrack_lock(&nf_conntrack_locks[h2]);
139 spin_lock_nested(&nf_conntrack_locks[h1], 139 spin_lock_nested(&nf_conntrack_locks[h1],
140 SINGLE_DEPTH_NESTING); 140 SINGLE_DEPTH_NESTING);
141 } 141 }
142 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { 142 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
143 nf_conntrack_double_unlock(h1, h2); 143 nf_conntrack_double_unlock(h1, h2);
144 return true; 144 return true;
145 } 145 }
146 return false; 146 return false;
147 } 147 }
148 148
149 static void nf_conntrack_all_lock(void) 149 static void nf_conntrack_all_lock(void)
150 { 150 {
151 int i; 151 int i;
152 152
153 spin_lock(&nf_conntrack_locks_all_lock); 153 spin_lock(&nf_conntrack_locks_all_lock);
154 154
155 nf_conntrack_locks_all = true; 155 nf_conntrack_locks_all = true;
156 156
157 for (i = 0; i < CONNTRACK_LOCKS; i++) { 157 for (i = 0; i < CONNTRACK_LOCKS; i++) {
158 spin_lock(&nf_conntrack_locks[i]); 158 spin_lock(&nf_conntrack_locks[i]);
159 159
160 /* This spin_unlock provides the "release" to ensure that 160 /* This spin_unlock provides the "release" to ensure that
161 * nf_conntrack_locks_all==true is visible to everyone that 161 * nf_conntrack_locks_all==true is visible to everyone that
162 * acquired spin_lock(&nf_conntrack_locks[]). 162 * acquired spin_lock(&nf_conntrack_locks[]).
163 */ 163 */
164 spin_unlock(&nf_conntrack_locks[i]); 164 spin_unlock(&nf_conntrack_locks[i]);
165 } 165 }
166 } 166 }
167 167
168 static void nf_conntrack_all_unlock(void) 168 static void nf_conntrack_all_unlock(void)
169 { 169 {
170 /* All prior stores must be complete before we clear 170 /* All prior stores must be complete before we clear
171 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() 171 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
172 * might observe the false value but not the entire 172 * might observe the false value but not the entire
173 * critical section. 173 * critical section.
174 * It pairs with the smp_load_acquire() in nf_conntrack_lock() 174 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
175 */ 175 */
176 smp_store_release(&nf_conntrack_locks_all, false); 176 smp_store_release(&nf_conntrack_locks_all, false);
177 spin_unlock(&nf_conntrack_locks_all_lock); 177 spin_unlock(&nf_conntrack_locks_all_lock);
178 } 178 }
179 179
180 unsigned int nf_conntrack_htable_size __read_mostly; 180 unsigned int nf_conntrack_htable_size __read_mostly;
181 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 181 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
182 182
183 unsigned int nf_conntrack_max __read_mostly; 183 unsigned int nf_conntrack_max __read_mostly;
184 EXPORT_SYMBOL_GPL(nf_conntrack_max); 184 EXPORT_SYMBOL_GPL(nf_conntrack_max);
185 seqcount_t nf_conntrack_generation __read_mostly; 185 seqcount_t nf_conntrack_generation __read_mostly;
186 static unsigned int nf_conntrack_hash_rnd __read_mostly; 186 static unsigned int nf_conntrack_hash_rnd __read_mostly;
187 187
188 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, 188 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
189 const struct net *net) 189 const struct net *net)
190 { 190 {
191 unsigned int n; 191 unsigned int n;
192 u32 seed; 192 u32 seed;
193 193
194 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); 194 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
195 195
196 /* The direction must be ignored, so we hash everything up to the 196 /* The direction must be ignored, so we hash everything up to the
197 * destination ports (which is a multiple of 4) and treat the last 197 * destination ports (which is a multiple of 4) and treat the last
198 * three bytes manually. 198 * three bytes manually.
199 */ 199 */
200 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); 200 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
201 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 201 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
202 return jhash2((u32 *)tuple, n, seed ^ 202 return jhash2((u32 *)tuple, n, seed ^
203 (((__force __u16)tuple->dst.u.all << 16) | 203 (((__force __u16)tuple->dst.u.all << 16) |
204 tuple->dst.protonum)); 204 tuple->dst.protonum));
205 } 205 }
206 206
207 static u32 scale_hash(u32 hash) 207 static u32 scale_hash(u32 hash)
208 { 208 {
209 return reciprocal_scale(hash, nf_conntrack_htable_size); 209 return reciprocal_scale(hash, nf_conntrack_htable_size);
210 } 210 }
211 211
212 static u32 __hash_conntrack(const struct net *net, 212 static u32 __hash_conntrack(const struct net *net,
213 const struct nf_conntrack_tuple *tuple, 213 const struct nf_conntrack_tuple *tuple,
214 unsigned int size) 214 unsigned int size)
215 { 215 {
216 return reciprocal_scale(hash_conntrack_raw(tuple, net), size); 216 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
217 } 217 }
218 218
219 static u32 hash_conntrack(const struct net *net, 219 static u32 hash_conntrack(const struct net *net,
220 const struct nf_conntrack_tuple *tuple) 220 const struct nf_conntrack_tuple *tuple)
221 { 221 {
222 return scale_hash(hash_conntrack_raw(tuple, net)); 222 return scale_hash(hash_conntrack_raw(tuple, net));
223 } 223 }
224 224
225 static bool 225 static bool
226 nf_ct_get_tuple(const struct sk_buff *skb, 226 nf_ct_get_tuple(const struct sk_buff *skb,
227 unsigned int nhoff, 227 unsigned int nhoff,
228 unsigned int dataoff, 228 unsigned int dataoff,
229 u_int16_t l3num, 229 u_int16_t l3num,
230 u_int8_t protonum, 230 u_int8_t protonum,
231 struct net *net, 231 struct net *net,
232 struct nf_conntrack_tuple *tuple, 232 struct nf_conntrack_tuple *tuple,
233 const struct nf_conntrack_l4proto *l4proto) 233 const struct nf_conntrack_l4proto *l4proto)
234 { 234 {
235 unsigned int size; 235 unsigned int size;
236 const __be32 *ap; 236 const __be32 *ap;
237 __be32 _addrs[8]; 237 __be32 _addrs[8];
238 struct { 238 struct {
239 __be16 sport; 239 __be16 sport;
240 __be16 dport; 240 __be16 dport;
241 } _inet_hdr, *inet_hdr; 241 } _inet_hdr, *inet_hdr;
242 242
243 memset(tuple, 0, sizeof(*tuple)); 243 memset(tuple, 0, sizeof(*tuple));
244 244
245 tuple->src.l3num = l3num; 245 tuple->src.l3num = l3num;
246 switch (l3num) { 246 switch (l3num) {
247 case NFPROTO_IPV4: 247 case NFPROTO_IPV4:
248 nhoff += offsetof(struct iphdr, saddr); 248 nhoff += offsetof(struct iphdr, saddr);
249 size = 2 * sizeof(__be32); 249 size = 2 * sizeof(__be32);
250 break; 250 break;
251 case NFPROTO_IPV6: 251 case NFPROTO_IPV6:
252 nhoff += offsetof(struct ipv6hdr, saddr); 252 nhoff += offsetof(struct ipv6hdr, saddr);
253 size = sizeof(_addrs); 253 size = sizeof(_addrs);
254 break; 254 break;
255 default: 255 default:
256 return true; 256 return true;
257 } 257 }
258 258
259 ap = skb_header_pointer(skb, nhoff, size, _addrs); 259 ap = skb_header_pointer(skb, nhoff, size, _addrs);
260 if (!ap) 260 if (!ap)
261 return false; 261 return false;
262 262
263 switch (l3num) { 263 switch (l3num) {
264 case NFPROTO_IPV4: 264 case NFPROTO_IPV4:
265 tuple->src.u3.ip = ap[0]; 265 tuple->src.u3.ip = ap[0];
266 tuple->dst.u3.ip = ap[1]; 266 tuple->dst.u3.ip = ap[1];
267 break; 267 break;
268 case NFPROTO_IPV6: 268 case NFPROTO_IPV6:
269 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 269 memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
270 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 270 memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
271 break; 271 break;
272 } 272 }
273 273
274 tuple->dst.protonum = protonum; 274 tuple->dst.protonum = protonum;
275 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 275 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
276 276
277 if (unlikely(l4proto->pkt_to_tuple)) 277 if (unlikely(l4proto->pkt_to_tuple))
278 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 278 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
279 279
280 /* Actually only need first 4 bytes to get ports. */ 280 /* Actually only need first 4 bytes to get ports. */
281 inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); 281 inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
282 if (!inet_hdr) 282 if (!inet_hdr)
283 return false; 283 return false;
284 284
285 tuple->src.u.udp.port = inet_hdr->sport; 285 tuple->src.u.udp.port = inet_hdr->sport;
286 tuple->dst.u.udp.port = inet_hdr->dport; 286 tuple->dst.u.udp.port = inet_hdr->dport;
287 return true; 287 return true;
288 } 288 }
289 289
290 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 290 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
291 u_int8_t *protonum) 291 u_int8_t *protonum)
292 { 292 {
293 int dataoff = -1; 293 int dataoff = -1;
294 #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) 294 #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
295 const struct iphdr *iph; 295 const struct iphdr *iph;
296 struct iphdr _iph; 296 struct iphdr _iph;
297 297
298 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 298 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
299 if (!iph) 299 if (!iph)
300 return -1; 300 return -1;
301 301
302 /* Conntrack defragments packets, we might still see fragments 302 /* Conntrack defragments packets, we might still see fragments
303 * inside ICMP packets though. 303 * inside ICMP packets though.
304 */ 304 */
305 if (iph->frag_off & htons(IP_OFFSET)) 305 if (iph->frag_off & htons(IP_OFFSET))
306 return -1; 306 return -1;
307 307
308 dataoff = nhoff + (iph->ihl << 2); 308 dataoff = nhoff + (iph->ihl << 2);
309 *protonum = iph->protocol; 309 *protonum = iph->protocol;
310 310
311 /* Check bogus IP headers */ 311 /* Check bogus IP headers */
312 if (dataoff > skb->len) { 312 if (dataoff > skb->len) {
313 pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", 313 pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
314 nhoff, iph->ihl << 2, skb->len); 314 nhoff, iph->ihl << 2, skb->len);
315 return -1; 315 return -1;
316 } 316 }
317 #endif 317 #endif
318 return dataoff; 318 return dataoff;
319 } 319 }
320 320
321 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 321 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
322 u8 *protonum) 322 u8 *protonum)
323 { 323 {
324 int protoff = -1; 324 int protoff = -1;
325 #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) 325 #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
326 unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 326 unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
327 __be16 frag_off; 327 __be16 frag_off;
328 u8 nexthdr; 328 u8 nexthdr;
329 329
330 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 330 if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
331 &nexthdr, sizeof(nexthdr)) != 0) { 331 &nexthdr, sizeof(nexthdr)) != 0) {
332 pr_debug("can't get nexthdr\n"); 332 pr_debug("can't get nexthdr\n");
333 return -1; 333 return -1;
334 } 334 }
335 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 335 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
336 /* 336 /*
337 * (protoff == skb->len) means the packet has not data, just 337 * (protoff == skb->len) means the packet has not data, just
338 * IPv6 and possibly extensions headers, but it is tracked anyway 338 * IPv6 and possibly extensions headers, but it is tracked anyway
339 */ 339 */
340 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 340 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
341 pr_debug("can't find proto in pkt\n"); 341 pr_debug("can't find proto in pkt\n");
342 return -1; 342 return -1;
343 } 343 }
344 344
345 *protonum = nexthdr; 345 *protonum = nexthdr;
346 #endif 346 #endif
347 return protoff; 347 return protoff;
348 } 348 }
349 349
350 static int get_l4proto(const struct sk_buff *skb, 350 static int get_l4proto(const struct sk_buff *skb,
351 unsigned int nhoff, u8 pf, u8 *l4num) 351 unsigned int nhoff, u8 pf, u8 *l4num)
352 { 352 {
353 switch (pf) { 353 switch (pf) {
354 case NFPROTO_IPV4: 354 case NFPROTO_IPV4:
355 return ipv4_get_l4proto(skb, nhoff, l4num); 355 return ipv4_get_l4proto(skb, nhoff, l4num);
356 case NFPROTO_IPV6: 356 case NFPROTO_IPV6:
357 return ipv6_get_l4proto(skb, nhoff, l4num); 357 return ipv6_get_l4proto(skb, nhoff, l4num);
358 default: 358 default:
359 *l4num = 0; 359 *l4num = 0;
360 break; 360 break;
361 } 361 }
362 return -1; 362 return -1;
363 } 363 }
364 364
365 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 365 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
366 u_int16_t l3num, 366 u_int16_t l3num,
367 struct net *net, struct nf_conntrack_tuple *tuple) 367 struct net *net, struct nf_conntrack_tuple *tuple)
368 { 368 {
369 const struct nf_conntrack_l4proto *l4proto; 369 const struct nf_conntrack_l4proto *l4proto;
370 u8 protonum; 370 u8 protonum;
371 int protoff; 371 int protoff;
372 int ret; 372 int ret;
373 373
374 rcu_read_lock(); 374 rcu_read_lock();
375 375
376 protoff = get_l4proto(skb, nhoff, l3num, &protonum); 376 protoff = get_l4proto(skb, nhoff, l3num, &protonum);
377 if (protoff <= 0) { 377 if (protoff <= 0) {
378 rcu_read_unlock(); 378 rcu_read_unlock();
379 return false; 379 return false;
380 } 380 }
381 381
382 l4proto = __nf_ct_l4proto_find(l3num, protonum); 382 l4proto = __nf_ct_l4proto_find(l3num, protonum);
383 383
384 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, 384 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
385 l4proto); 385 l4proto);
386 386
387 rcu_read_unlock(); 387 rcu_read_unlock();
388 return ret; 388 return ret;
389 } 389 }
390 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); 390 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
391 391
392 bool 392 bool
393 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 393 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
394 const struct nf_conntrack_tuple *orig, 394 const struct nf_conntrack_tuple *orig,
395 const struct nf_conntrack_l4proto *l4proto) 395 const struct nf_conntrack_l4proto *l4proto)
396 { 396 {
397 memset(inverse, 0, sizeof(*inverse)); 397 memset(inverse, 0, sizeof(*inverse));
398 398
399 inverse->src.l3num = orig->src.l3num; 399 inverse->src.l3num = orig->src.l3num;
400 400
401 switch (orig->src.l3num) { 401 switch (orig->src.l3num) {
402 case NFPROTO_IPV4: 402 case NFPROTO_IPV4:
403 inverse->src.u3.ip = orig->dst.u3.ip; 403 inverse->src.u3.ip = orig->dst.u3.ip;
404 inverse->dst.u3.ip = orig->src.u3.ip; 404 inverse->dst.u3.ip = orig->src.u3.ip;
405 break; 405 break;
406 case NFPROTO_IPV6: 406 case NFPROTO_IPV6:
407 inverse->src.u3.in6 = orig->dst.u3.in6; 407 inverse->src.u3.in6 = orig->dst.u3.in6;
408 inverse->dst.u3.in6 = orig->src.u3.in6; 408 inverse->dst.u3.in6 = orig->src.u3.in6;
409 break; 409 break;
410 default: 410 default:
411 break; 411 break;
412 } 412 }
413 413
414 inverse->dst.dir = !orig->dst.dir; 414 inverse->dst.dir = !orig->dst.dir;
415 415
416 inverse->dst.protonum = orig->dst.protonum; 416 inverse->dst.protonum = orig->dst.protonum;
417 417
418 if (unlikely(l4proto->invert_tuple)) 418 if (unlikely(l4proto->invert_tuple))
419 return l4proto->invert_tuple(inverse, orig); 419 return l4proto->invert_tuple(inverse, orig);
420 420
421 inverse->src.u.all = orig->dst.u.all; 421 inverse->src.u.all = orig->dst.u.all;
422 inverse->dst.u.all = orig->src.u.all; 422 inverse->dst.u.all = orig->src.u.all;
423 return true; 423 return true;
424 } 424 }
425 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 425 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
426 426
427 static void 427 static void
428 clean_from_lists(struct nf_conn *ct) 428 clean_from_lists(struct nf_conn *ct)
429 { 429 {
430 pr_debug("clean_from_lists(%p)\n", ct); 430 pr_debug("clean_from_lists(%p)\n", ct);
431 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 431 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
432 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); 432 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
433 433
434 /* Destroy all pending expectations */ 434 /* Destroy all pending expectations */
435 nf_ct_remove_expectations(ct); 435 nf_ct_remove_expectations(ct);
436 } 436 }
437 437
438 /* must be called with local_bh_disable */ 438 /* must be called with local_bh_disable */
439 static void nf_ct_add_to_dying_list(struct nf_conn *ct) 439 static void nf_ct_add_to_dying_list(struct nf_conn *ct)
440 { 440 {
441 struct ct_pcpu *pcpu; 441 struct ct_pcpu *pcpu;
442 442
443 /* add this conntrack to the (per cpu) dying list */ 443 /* add this conntrack to the (per cpu) dying list */
444 ct->cpu = smp_processor_id(); 444 ct->cpu = smp_processor_id();
445 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 445 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
446 446
447 spin_lock(&pcpu->lock); 447 spin_lock(&pcpu->lock);
448 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 448 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
449 &pcpu->dying); 449 &pcpu->dying);
450 spin_unlock(&pcpu->lock); 450 spin_unlock(&pcpu->lock);
451 } 451 }
452 452
453 /* must be called with local_bh_disable */ 453 /* must be called with local_bh_disable */
454 static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) 454 static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
455 { 455 {
456 struct ct_pcpu *pcpu; 456 struct ct_pcpu *pcpu;
457 457
458 /* add this conntrack to the (per cpu) unconfirmed list */ 458 /* add this conntrack to the (per cpu) unconfirmed list */
459 ct->cpu = smp_processor_id(); 459 ct->cpu = smp_processor_id();
460 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 460 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
461 461
462 spin_lock(&pcpu->lock); 462 spin_lock(&pcpu->lock);
463 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 463 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
464 &pcpu->unconfirmed); 464 &pcpu->unconfirmed);
465 spin_unlock(&pcpu->lock); 465 spin_unlock(&pcpu->lock);
466 } 466 }
467 467
468 /* must be called with local_bh_disable */ 468 /* must be called with local_bh_disable */
469 static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) 469 static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
470 { 470 {
471 struct ct_pcpu *pcpu; 471 struct ct_pcpu *pcpu;
472 472
473 /* We overload first tuple to link into unconfirmed or dying list.*/ 473 /* We overload first tuple to link into unconfirmed or dying list.*/
474 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 474 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
475 475
476 spin_lock(&pcpu->lock); 476 spin_lock(&pcpu->lock);
477 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); 477 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
478 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 478 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
479 spin_unlock(&pcpu->lock); 479 spin_unlock(&pcpu->lock);
480 } 480 }
481 481
482 #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) 482 #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
483 483
484 /* Released via destroy_conntrack() */ 484 /* Released via destroy_conntrack() */
485 struct nf_conn *nf_ct_tmpl_alloc(struct net *net, 485 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
486 const struct nf_conntrack_zone *zone, 486 const struct nf_conntrack_zone *zone,
487 gfp_t flags) 487 gfp_t flags)
488 { 488 {
489 struct nf_conn *tmpl, *p; 489 struct nf_conn *tmpl, *p;
490 490
491 if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { 491 if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) {
492 tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); 492 tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags);
493 if (!tmpl) 493 if (!tmpl)
494 return NULL; 494 return NULL;
495 495
496 p = tmpl; 496 p = tmpl;
497 tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); 497 tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
498 if (tmpl != p) { 498 if (tmpl != p) {
499 tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); 499 tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p);
500 tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; 500 tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p;
501 } 501 }
502 } else { 502 } else {
503 tmpl = kzalloc(sizeof(*tmpl), flags); 503 tmpl = kzalloc(sizeof(*tmpl), flags);
504 if (!tmpl) 504 if (!tmpl)
505 return NULL; 505 return NULL;
506 } 506 }
507 507
508 tmpl->status = IPS_TEMPLATE; 508 tmpl->status = IPS_TEMPLATE;
509 write_pnet(&tmpl->ct_net, net); 509 write_pnet(&tmpl->ct_net, net);
510 nf_ct_zone_add(tmpl, zone); 510 nf_ct_zone_add(tmpl, zone);
511 atomic_set(&tmpl->ct_general.use, 0); 511 atomic_set(&tmpl->ct_general.use, 0);
512 512
513 return tmpl; 513 return tmpl;
514 } 514 }
515 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); 515 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
516 516
517 void nf_ct_tmpl_free(struct nf_conn *tmpl) 517 void nf_ct_tmpl_free(struct nf_conn *tmpl)
518 { 518 {
519 nf_ct_ext_destroy(tmpl); 519 nf_ct_ext_destroy(tmpl);
520 nf_ct_ext_free(tmpl); 520 nf_ct_ext_free(tmpl);
521 521
522 if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) 522 if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
523 kfree((char *)tmpl - tmpl->proto.tmpl_padto); 523 kfree((char *)tmpl - tmpl->proto.tmpl_padto);
524 else 524 else
525 kfree(tmpl); 525 kfree(tmpl);
526 } 526 }
527 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); 527 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
528 528
529 static void 529 static void
530 destroy_conntrack(struct nf_conntrack *nfct) 530 destroy_conntrack(struct nf_conntrack *nfct)
531 { 531 {
532 struct nf_conn *ct = (struct nf_conn *)nfct; 532 struct nf_conn *ct = (struct nf_conn *)nfct;
533 const struct nf_conntrack_l4proto *l4proto; 533 const struct nf_conntrack_l4proto *l4proto;
534 534
535 pr_debug("destroy_conntrack(%p)\n", ct); 535 pr_debug("destroy_conntrack(%p)\n", ct);
536 WARN_ON(atomic_read(&nfct->use) != 0); 536 WARN_ON(atomic_read(&nfct->use) != 0);
537 537
538 if (unlikely(nf_ct_is_template(ct))) { 538 if (unlikely(nf_ct_is_template(ct))) {
539 nf_ct_tmpl_free(ct); 539 nf_ct_tmpl_free(ct);
540 return; 540 return;
541 } 541 }
542 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 542 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
543 if (l4proto->destroy) 543 if (l4proto->destroy)
544 l4proto->destroy(ct); 544 l4proto->destroy(ct);
545 545
546 local_bh_disable(); 546 local_bh_disable();
547 /* Expectations will have been removed in clean_from_lists, 547 /* Expectations will have been removed in clean_from_lists,
548 * except TFTP can create an expectation on the first packet, 548 * except TFTP can create an expectation on the first packet,
549 * before connection is in the list, so we need to clean here, 549 * before connection is in the list, so we need to clean here,
550 * too. 550 * too.
551 */ 551 */
552 nf_ct_remove_expectations(ct); 552 nf_ct_remove_expectations(ct);
553 553
554 nf_ct_del_from_dying_or_unconfirmed_list(ct); 554 nf_ct_del_from_dying_or_unconfirmed_list(ct);
555 555
556 local_bh_enable(); 556 local_bh_enable();
557 557
558 if (ct->master) 558 if (ct->master)
559 nf_ct_put(ct->master); 559 nf_ct_put(ct->master);
560 560
561 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); 561 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
562 nf_conntrack_free(ct); 562 nf_conntrack_free(ct);
563 } 563 }
564 564
565 static void nf_ct_delete_from_lists(struct nf_conn *ct) 565 static void nf_ct_delete_from_lists(struct nf_conn *ct)
566 { 566 {
567 struct net *net = nf_ct_net(ct); 567 struct net *net = nf_ct_net(ct);
568 unsigned int hash, reply_hash; 568 unsigned int hash, reply_hash;
569 unsigned int sequence; 569 unsigned int sequence;
570 570
571 nf_ct_helper_destroy(ct); 571 nf_ct_helper_destroy(ct);
572 572
573 local_bh_disable(); 573 local_bh_disable();
574 do { 574 do {
575 sequence = read_seqcount_begin(&nf_conntrack_generation); 575 sequence = read_seqcount_begin(&nf_conntrack_generation);
576 hash = hash_conntrack(net, 576 hash = hash_conntrack(net,
577 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 577 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
578 reply_hash = hash_conntrack(net, 578 reply_hash = hash_conntrack(net,
579 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 579 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
580 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 580 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
581 581
582 clean_from_lists(ct); 582 clean_from_lists(ct);
583 nf_conntrack_double_unlock(hash, reply_hash); 583 nf_conntrack_double_unlock(hash, reply_hash);
584 584
585 nf_ct_add_to_dying_list(ct); 585 nf_ct_add_to_dying_list(ct);
586 586
587 local_bh_enable(); 587 local_bh_enable();
588 } 588 }
589 589
590 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) 590 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
591 { 591 {
592 struct nf_conn_tstamp *tstamp; 592 struct nf_conn_tstamp *tstamp;
593 593
594 if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) 594 if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
595 return false; 595 return false;
596 596
597 tstamp = nf_conn_tstamp_find(ct); 597 tstamp = nf_conn_tstamp_find(ct);
598 if (tstamp && tstamp->stop == 0) 598 if (tstamp && tstamp->stop == 0)
599 tstamp->stop = ktime_get_real_ns(); 599 tstamp->stop = ktime_get_real_ns();
600 600
601 if (nf_conntrack_event_report(IPCT_DESTROY, ct, 601 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
602 portid, report) < 0) { 602 portid, report) < 0) {
603 /* destroy event was not delivered. nf_ct_put will 603 /* destroy event was not delivered. nf_ct_put will
604 * be done by event cache worker on redelivery. 604 * be done by event cache worker on redelivery.
605 */ 605 */
606 nf_ct_delete_from_lists(ct); 606 nf_ct_delete_from_lists(ct);
607 nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); 607 nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
608 return false; 608 return false;
609 } 609 }
610 610
611 nf_conntrack_ecache_work(nf_ct_net(ct)); 611 nf_conntrack_ecache_work(nf_ct_net(ct));
612 nf_ct_delete_from_lists(ct); 612 nf_ct_delete_from_lists(ct);
613 nf_ct_put(ct); 613 nf_ct_put(ct);
614 return true; 614 return true;
615 } 615 }
616 EXPORT_SYMBOL_GPL(nf_ct_delete); 616 EXPORT_SYMBOL_GPL(nf_ct_delete);
617 617
618 static inline bool 618 static inline bool
619 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, 619 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
620 const struct nf_conntrack_tuple *tuple, 620 const struct nf_conntrack_tuple *tuple,
621 const struct nf_conntrack_zone *zone, 621 const struct nf_conntrack_zone *zone,
622 const struct net *net) 622 const struct net *net)
623 { 623 {
624 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 624 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
625 625
626 /* A conntrack can be recreated with the equal tuple, 626 /* A conntrack can be recreated with the equal tuple,
627 * so we need to check that the conntrack is confirmed 627 * so we need to check that the conntrack is confirmed
628 */ 628 */
629 return nf_ct_tuple_equal(tuple, &h->tuple) && 629 return nf_ct_tuple_equal(tuple, &h->tuple) &&
630 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && 630 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
631 nf_ct_is_confirmed(ct) && 631 nf_ct_is_confirmed(ct) &&
632 net_eq(net, nf_ct_net(ct)); 632 net_eq(net, nf_ct_net(ct));
633 } 633 }
634 634
635 /* caller must hold rcu readlock and none of the nf_conntrack_locks */ 635 /* caller must hold rcu readlock and none of the nf_conntrack_locks */
636 static void nf_ct_gc_expired(struct nf_conn *ct) 636 static void nf_ct_gc_expired(struct nf_conn *ct)
637 { 637 {
638 if (!atomic_inc_not_zero(&ct->ct_general.use)) 638 if (!atomic_inc_not_zero(&ct->ct_general.use))
639 return; 639 return;
640 640
641 if (nf_ct_should_gc(ct)) 641 if (nf_ct_should_gc(ct))
642 nf_ct_kill(ct); 642 nf_ct_kill(ct);
643 643
644 nf_ct_put(ct); 644 nf_ct_put(ct);
645 } 645 }
646 646
647 /* 647 /*
648 * Warning : 648 * Warning :
649 * - Caller must take a reference on returned object 649 * - Caller must take a reference on returned object
650 * and recheck nf_ct_tuple_equal(tuple, &h->tuple) 650 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
651 */ 651 */
652 static struct nf_conntrack_tuple_hash * 652 static struct nf_conntrack_tuple_hash *
653 ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, 653 ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
654 const struct nf_conntrack_tuple *tuple, u32 hash) 654 const struct nf_conntrack_tuple *tuple, u32 hash)
655 { 655 {
656 struct nf_conntrack_tuple_hash *h; 656 struct nf_conntrack_tuple_hash *h;
657 struct hlist_nulls_head *ct_hash; 657 struct hlist_nulls_head *ct_hash;
658 struct hlist_nulls_node *n; 658 struct hlist_nulls_node *n;
659 unsigned int bucket, hsize; 659 unsigned int bucket, hsize;
660 660
661 begin: 661 begin:
662 nf_conntrack_get_ht(&ct_hash, &hsize); 662 nf_conntrack_get_ht(&ct_hash, &hsize);
663 bucket = reciprocal_scale(hash, hsize); 663 bucket = reciprocal_scale(hash, hsize);
664 664
665 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { 665 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
666 struct nf_conn *ct; 666 struct nf_conn *ct;
667 667
668 ct = nf_ct_tuplehash_to_ctrack(h); 668 ct = nf_ct_tuplehash_to_ctrack(h);
669 if (nf_ct_is_expired(ct)) { 669 if (nf_ct_is_expired(ct)) {
670 nf_ct_gc_expired(ct); 670 nf_ct_gc_expired(ct);
671 continue; 671 continue;
672 } 672 }
673 673
674 if (nf_ct_is_dying(ct)) 674 if (nf_ct_is_dying(ct))
675 continue; 675 continue;
676 676
677 if (nf_ct_key_equal(h, tuple, zone, net)) 677 if (nf_ct_key_equal(h, tuple, zone, net))
678 return h; 678 return h;
679 } 679 }
680 /* 680 /*
681 * if the nulls value we got at the end of this lookup is 681 * if the nulls value we got at the end of this lookup is
682 * not the expected one, we must restart lookup. 682 * not the expected one, we must restart lookup.
683 * We probably met an item that was moved to another chain. 683 * We probably met an item that was moved to another chain.
684 */ 684 */
685 if (get_nulls_value(n) != bucket) { 685 if (get_nulls_value(n) != bucket) {
686 NF_CT_STAT_INC_ATOMIC(net, search_restart); 686 NF_CT_STAT_INC_ATOMIC(net, search_restart);
687 goto begin; 687 goto begin;
688 } 688 }
689 689
690 return NULL; 690 return NULL;
691 } 691 }
692 692
693 /* Find a connection corresponding to a tuple. */ 693 /* Find a connection corresponding to a tuple. */
694 static struct nf_conntrack_tuple_hash * 694 static struct nf_conntrack_tuple_hash *
695 __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, 695 __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
696 const struct nf_conntrack_tuple *tuple, u32 hash) 696 const struct nf_conntrack_tuple *tuple, u32 hash)
697 { 697 {
698 struct nf_conntrack_tuple_hash *h; 698 struct nf_conntrack_tuple_hash *h;
699 struct nf_conn *ct; 699 struct nf_conn *ct;
700 700
701 rcu_read_lock(); 701 rcu_read_lock();
702 begin: 702 begin:
703 h = ____nf_conntrack_find(net, zone, tuple, hash); 703 h = ____nf_conntrack_find(net, zone, tuple, hash);
704 if (h) { 704 if (h) {
705 ct = nf_ct_tuplehash_to_ctrack(h); 705 ct = nf_ct_tuplehash_to_ctrack(h);
706 if (unlikely(nf_ct_is_dying(ct) || 706 if (unlikely(nf_ct_is_dying(ct) ||
707 !atomic_inc_not_zero(&ct->ct_general.use))) 707 !atomic_inc_not_zero(&ct->ct_general.use)))
708 h = NULL; 708 h = NULL;
709 else { 709 else {
710 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { 710 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
711 nf_ct_put(ct); 711 nf_ct_put(ct);
712 goto begin; 712 goto begin;
713 } 713 }
714 } 714 }
715 } 715 }
716 rcu_read_unlock(); 716 rcu_read_unlock();
717 717
718 return h; 718 return h;
719 } 719 }
720 720
721 struct nf_conntrack_tuple_hash * 721 struct nf_conntrack_tuple_hash *
722 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, 722 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
723 const struct nf_conntrack_tuple *tuple) 723 const struct nf_conntrack_tuple *tuple)
724 { 724 {
725 return __nf_conntrack_find_get(net, zone, tuple, 725 return __nf_conntrack_find_get(net, zone, tuple,
726 hash_conntrack_raw(tuple, net)); 726 hash_conntrack_raw(tuple, net));
727 } 727 }
728 EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 728 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
729 729
730 static void __nf_conntrack_hash_insert(struct nf_conn *ct, 730 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
731 unsigned int hash, 731 unsigned int hash,
732 unsigned int reply_hash) 732 unsigned int reply_hash)
733 { 733 {
734 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 734 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
735 &nf_conntrack_hash[hash]); 735 &nf_conntrack_hash[hash]);
736 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 736 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
737 &nf_conntrack_hash[reply_hash]); 737 &nf_conntrack_hash[reply_hash]);
738 } 738 }
739 739
740 int 740 int
741 nf_conntrack_hash_check_insert(struct nf_conn *ct) 741 nf_conntrack_hash_check_insert(struct nf_conn *ct)
742 { 742 {
743 const struct nf_conntrack_zone *zone; 743 const struct nf_conntrack_zone *zone;
744 struct net *net = nf_ct_net(ct); 744 struct net *net = nf_ct_net(ct);
745 unsigned int hash, reply_hash; 745 unsigned int hash, reply_hash;
746 struct nf_conntrack_tuple_hash *h; 746 struct nf_conntrack_tuple_hash *h;
747 struct hlist_nulls_node *n; 747 struct hlist_nulls_node *n;
748 unsigned int sequence; 748 unsigned int sequence;
749 749
750 zone = nf_ct_zone(ct); 750 zone = nf_ct_zone(ct);
751 751
752 local_bh_disable(); 752 local_bh_disable();
753 do { 753 do {
754 sequence = read_seqcount_begin(&nf_conntrack_generation); 754 sequence = read_seqcount_begin(&nf_conntrack_generation);
755 hash = hash_conntrack(net, 755 hash = hash_conntrack(net,
756 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 756 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
757 reply_hash = hash_conntrack(net, 757 reply_hash = hash_conntrack(net,
758 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 758 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
759 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 759 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
760 760
761 /* See if there's one in the list already, including reverse */ 761 /* See if there's one in the list already, including reverse */
762 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 762 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
763 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 763 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
764 zone, net)) 764 zone, net))
765 goto out; 765 goto out;
766 766
767 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 767 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
768 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 768 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
769 zone, net)) 769 zone, net))
770 goto out; 770 goto out;
771 771
772 smp_wmb(); 772 smp_wmb();
773 /* The caller holds a reference to this object */ 773 /* The caller holds a reference to this object */
774 atomic_set(&ct->ct_general.use, 2); 774 atomic_set(&ct->ct_general.use, 2);
775 __nf_conntrack_hash_insert(ct, hash, reply_hash); 775 __nf_conntrack_hash_insert(ct, hash, reply_hash);
776 nf_conntrack_double_unlock(hash, reply_hash); 776 nf_conntrack_double_unlock(hash, reply_hash);
777 NF_CT_STAT_INC(net, insert); 777 NF_CT_STAT_INC(net, insert);
778 local_bh_enable(); 778 local_bh_enable();
779 return 0; 779 return 0;
780 780
781 out: 781 out:
782 nf_conntrack_double_unlock(hash, reply_hash); 782 nf_conntrack_double_unlock(hash, reply_hash);
783 NF_CT_STAT_INC(net, insert_failed); 783 NF_CT_STAT_INC(net, insert_failed);
784 local_bh_enable(); 784 local_bh_enable();
785 return -EEXIST; 785 return -EEXIST;
786 } 786 }
787 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 787 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
788 788
789 static inline void nf_ct_acct_update(struct nf_conn *ct, 789 static inline void nf_ct_acct_update(struct nf_conn *ct,
790 enum ip_conntrack_info ctinfo, 790 enum ip_conntrack_info ctinfo,
791 unsigned int len) 791 unsigned int len)
792 { 792 {
793 struct nf_conn_acct *acct; 793 struct nf_conn_acct *acct;
794 794
795 acct = nf_conn_acct_find(ct); 795 acct = nf_conn_acct_find(ct);
796 if (acct) { 796 if (acct) {
797 struct nf_conn_counter *counter = acct->counter; 797 struct nf_conn_counter *counter = acct->counter;
798 798
799 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); 799 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
800 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); 800 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
801 } 801 }
802 } 802 }
803 803
804 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, 804 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
805 const struct nf_conn *loser_ct) 805 const struct nf_conn *loser_ct)
806 { 806 {
807 struct nf_conn_acct *acct; 807 struct nf_conn_acct *acct;
808 808
809 acct = nf_conn_acct_find(loser_ct); 809 acct = nf_conn_acct_find(loser_ct);
810 if (acct) { 810 if (acct) {
811 struct nf_conn_counter *counter = acct->counter; 811 struct nf_conn_counter *counter = acct->counter;
812 unsigned int bytes; 812 unsigned int bytes;
813 813
814 /* u32 should be fine since we must have seen one packet. */ 814 /* u32 should be fine since we must have seen one packet. */
815 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); 815 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
816 nf_ct_acct_update(ct, ctinfo, bytes); 816 nf_ct_acct_update(ct, ctinfo, bytes);
817 } 817 }
818 } 818 }
819 819
820 /* Resolve race on insertion if this protocol allows this. */ 820 /* Resolve race on insertion if this protocol allows this. */
821 static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, 821 static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
822 enum ip_conntrack_info ctinfo, 822 enum ip_conntrack_info ctinfo,
823 struct nf_conntrack_tuple_hash *h) 823 struct nf_conntrack_tuple_hash *h)
824 { 824 {
825 /* This is the conntrack entry already in hashes that won race. */ 825 /* This is the conntrack entry already in hashes that won race. */
826 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 826 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
827 const struct nf_conntrack_l4proto *l4proto; 827 const struct nf_conntrack_l4proto *l4proto;
828 828
829 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 829 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
830 if (l4proto->allow_clash && 830 if (l4proto->allow_clash &&
831 ((ct->status & IPS_NAT_DONE_MASK) == 0) && 831 ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
832 !nf_ct_is_dying(ct) && 832 !nf_ct_is_dying(ct) &&
833 atomic_inc_not_zero(&ct->ct_general.use)) { 833 atomic_inc_not_zero(&ct->ct_general.use)) {
834 enum ip_conntrack_info oldinfo; 834 enum ip_conntrack_info oldinfo;
835 struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 835 struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
836 836
837 nf_ct_acct_merge(ct, ctinfo, loser_ct); 837 nf_ct_acct_merge(ct, ctinfo, loser_ct);
838 nf_conntrack_put(&loser_ct->ct_general); 838 nf_conntrack_put(&loser_ct->ct_general);
839 nf_ct_set(skb, ct, oldinfo); 839 nf_ct_set(skb, ct, oldinfo);
840 return NF_ACCEPT; 840 return NF_ACCEPT;
841 } 841 }
842 NF_CT_STAT_INC(net, drop); 842 NF_CT_STAT_INC(net, drop);
843 return NF_DROP; 843 return NF_DROP;
844 } 844 }
845 845
846 /* Confirm a connection given skb; places it in hash table */ 846 /* Confirm a connection given skb; places it in hash table */
847 int 847 int
848 __nf_conntrack_confirm(struct sk_buff *skb) 848 __nf_conntrack_confirm(struct sk_buff *skb)
849 { 849 {
850 const struct nf_conntrack_zone *zone; 850 const struct nf_conntrack_zone *zone;
851 unsigned int hash, reply_hash; 851 unsigned int hash, reply_hash;
852 struct nf_conntrack_tuple_hash *h; 852 struct nf_conntrack_tuple_hash *h;
853 struct nf_conn *ct; 853 struct nf_conn *ct;
854 struct nf_conn_help *help; 854 struct nf_conn_help *help;
855 struct nf_conn_tstamp *tstamp; 855 struct nf_conn_tstamp *tstamp;
856 struct hlist_nulls_node *n; 856 struct hlist_nulls_node *n;
857 enum ip_conntrack_info ctinfo; 857 enum ip_conntrack_info ctinfo;
858 struct net *net; 858 struct net *net;
859 unsigned int sequence; 859 unsigned int sequence;
860 int ret = NF_DROP; 860 int ret = NF_DROP;
861 861
862 ct = nf_ct_get(skb, &ctinfo); 862 ct = nf_ct_get(skb, &ctinfo);
863 net = nf_ct_net(ct); 863 net = nf_ct_net(ct);
864 864
865 /* ipt_REJECT uses nf_conntrack_attach to attach related 865 /* ipt_REJECT uses nf_conntrack_attach to attach related
866 ICMP/TCP RST packets in other direction. Actual packet 866 ICMP/TCP RST packets in other direction. Actual packet
867 which created connection will be IP_CT_NEW or for an 867 which created connection will be IP_CT_NEW or for an
868 expected connection, IP_CT_RELATED. */ 868 expected connection, IP_CT_RELATED. */
869 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 869 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
870 return NF_ACCEPT; 870 return NF_ACCEPT;
871 871
872 zone = nf_ct_zone(ct); 872 zone = nf_ct_zone(ct);
873 local_bh_disable(); 873 local_bh_disable();
874 874
875 do { 875 do {
876 sequence = read_seqcount_begin(&nf_conntrack_generation); 876 sequence = read_seqcount_begin(&nf_conntrack_generation);
877 /* reuse the hash saved before */ 877 /* reuse the hash saved before */
878 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 878 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
879 hash = scale_hash(hash); 879 hash = scale_hash(hash);
880 reply_hash = hash_conntrack(net, 880 reply_hash = hash_conntrack(net,
881 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 881 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
882 882
883 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 883 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
884 884
885 /* We're not in hash table, and we refuse to set up related 885 /* We're not in hash table, and we refuse to set up related
886 * connections for unconfirmed conns. But packet copies and 886 * connections for unconfirmed conns. But packet copies and
887 * REJECT will give spurious warnings here. 887 * REJECT will give spurious warnings here.
888 */ 888 */
889 889
890 /* No external references means no one else could have 890 /* No external references means no one else could have
891 * confirmed us. 891 * confirmed us.
892 */ 892 */
893 WARN_ON(nf_ct_is_confirmed(ct)); 893 WARN_ON(nf_ct_is_confirmed(ct));
894 pr_debug("Confirming conntrack %p\n", ct); 894 pr_debug("Confirming conntrack %p\n", ct);
895 /* We have to check the DYING flag after unlink to prevent 895 /* We have to check the DYING flag after unlink to prevent
896 * a race against nf_ct_get_next_corpse() possibly called from 896 * a race against nf_ct_get_next_corpse() possibly called from
897 * user context, else we insert an already 'dead' hash, blocking 897 * user context, else we insert an already 'dead' hash, blocking
898 * further use of that particular connection -JM. 898 * further use of that particular connection -JM.
899 */ 899 */
900 nf_ct_del_from_dying_or_unconfirmed_list(ct); 900 nf_ct_del_from_dying_or_unconfirmed_list(ct);
901 901
902 if (unlikely(nf_ct_is_dying(ct))) { 902 if (unlikely(nf_ct_is_dying(ct))) {
903 nf_ct_add_to_dying_list(ct); 903 nf_ct_add_to_dying_list(ct);
904 goto dying; 904 goto dying;
905 } 905 }
906 906
907 /* See if there's one in the list already, including reverse: 907 /* See if there's one in the list already, including reverse:
908 NAT could have grabbed it without realizing, since we're 908 NAT could have grabbed it without realizing, since we're
909 not in the hash. If there is, we lost race. */ 909 not in the hash. If there is, we lost race. */
910 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 910 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
911 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 911 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
912 zone, net)) 912 zone, net))
913 goto out; 913 goto out;
914 914
915 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 915 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
916 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 916 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
917 zone, net)) 917 zone, net))
918 goto out; 918 goto out;
919 919
920 /* Timer relative to confirmation time, not original 920 /* Timer relative to confirmation time, not original
921 setting time, otherwise we'd get timer wrap in 921 setting time, otherwise we'd get timer wrap in
922 weird delay cases. */ 922 weird delay cases. */
923 ct->timeout += nfct_time_stamp; 923 ct->timeout += nfct_time_stamp;
924 atomic_inc(&ct->ct_general.use); 924 atomic_inc(&ct->ct_general.use);
925 ct->status |= IPS_CONFIRMED; 925 ct->status |= IPS_CONFIRMED;
926 926
927 /* set conntrack timestamp, if enabled. */ 927 /* set conntrack timestamp, if enabled. */
928 tstamp = nf_conn_tstamp_find(ct); 928 tstamp = nf_conn_tstamp_find(ct);
929 if (tstamp) { 929 if (tstamp) {
930 if (skb->tstamp == 0) 930 if (skb->tstamp == 0)
931 __net_timestamp(skb); 931 __net_timestamp(skb);
932 932
933 tstamp->start = ktime_to_ns(skb->tstamp); 933 tstamp->start = ktime_to_ns(skb->tstamp);
934 } 934 }
935 /* Since the lookup is lockless, hash insertion must be done after 935 /* Since the lookup is lockless, hash insertion must be done after
936 * starting the timer and setting the CONFIRMED bit. The RCU barriers 936 * starting the timer and setting the CONFIRMED bit. The RCU barriers
937 * guarantee that no other CPU can find the conntrack before the above 937 * guarantee that no other CPU can find the conntrack before the above
938 * stores are visible. 938 * stores are visible.
939 */ 939 */
940 __nf_conntrack_hash_insert(ct, hash, reply_hash); 940 __nf_conntrack_hash_insert(ct, hash, reply_hash);
941 nf_conntrack_double_unlock(hash, reply_hash); 941 nf_conntrack_double_unlock(hash, reply_hash);
942 local_bh_enable(); 942 local_bh_enable();
943 943
944 help = nfct_help(ct); 944 help = nfct_help(ct);
945 if (help && help->helper) 945 if (help && help->helper)
946 nf_conntrack_event_cache(IPCT_HELPER, ct); 946 nf_conntrack_event_cache(IPCT_HELPER, ct);
947 947
948 nf_conntrack_event_cache(master_ct(ct) ? 948 nf_conntrack_event_cache(master_ct(ct) ?
949 IPCT_RELATED : IPCT_NEW, ct); 949 IPCT_RELATED : IPCT_NEW, ct);
950 return NF_ACCEPT; 950 return NF_ACCEPT;
951 951
952 out: 952 out:
953 nf_ct_add_to_dying_list(ct); 953 nf_ct_add_to_dying_list(ct);
954 ret = nf_ct_resolve_clash(net, skb, ctinfo, h); 954 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
955 dying: 955 dying:
956 nf_conntrack_double_unlock(hash, reply_hash); 956 nf_conntrack_double_unlock(hash, reply_hash);
957 NF_CT_STAT_INC(net, insert_failed); 957 NF_CT_STAT_INC(net, insert_failed);
958 local_bh_enable(); 958 local_bh_enable();
959 return ret; 959 return ret;
960 } 960 }
961 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 961 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
962 962
963 /* Returns true if a connection correspondings to the tuple (required 963 /* Returns true if a connection correspondings to the tuple (required
964 for NAT). */ 964 for NAT). */
965 int 965 int
966 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 966 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
967 const struct nf_conn *ignored_conntrack) 967 const struct nf_conn *ignored_conntrack)
968 { 968 {
969 struct net *net = nf_ct_net(ignored_conntrack); 969 struct net *net = nf_ct_net(ignored_conntrack);
970 const struct nf_conntrack_zone *zone; 970 const struct nf_conntrack_zone *zone;
971 struct nf_conntrack_tuple_hash *h; 971 struct nf_conntrack_tuple_hash *h;
972 struct hlist_nulls_head *ct_hash; 972 struct hlist_nulls_head *ct_hash;
973 unsigned int hash, hsize; 973 unsigned int hash, hsize;
974 struct hlist_nulls_node *n; 974 struct hlist_nulls_node *n;
975 struct nf_conn *ct; 975 struct nf_conn *ct;
976 976
977 zone = nf_ct_zone(ignored_conntrack); 977 zone = nf_ct_zone(ignored_conntrack);
978 978
979 rcu_read_lock(); 979 rcu_read_lock();
980 begin: 980 begin:
981 nf_conntrack_get_ht(&ct_hash, &hsize); 981 nf_conntrack_get_ht(&ct_hash, &hsize);
982 hash = __hash_conntrack(net, tuple, hsize); 982 hash = __hash_conntrack(net, tuple, hsize);
983 983
984 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { 984 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
985 ct = nf_ct_tuplehash_to_ctrack(h); 985 ct = nf_ct_tuplehash_to_ctrack(h);
986 986
987 if (ct == ignored_conntrack) 987 if (ct == ignored_conntrack)
988 continue; 988 continue;
989 989
990 if (nf_ct_is_expired(ct)) { 990 if (nf_ct_is_expired(ct)) {
991 nf_ct_gc_expired(ct); 991 nf_ct_gc_expired(ct);
992 continue; 992 continue;
993 } 993 }
994 994
995 if (nf_ct_key_equal(h, tuple, zone, net)) { 995 if (nf_ct_key_equal(h, tuple, zone, net)) {
996 NF_CT_STAT_INC_ATOMIC(net, found); 996 NF_CT_STAT_INC_ATOMIC(net, found);
997 rcu_read_unlock(); 997 rcu_read_unlock();
998 return 1; 998 return 1;
999 } 999 }
1000 } 1000 }
1001 1001
1002 if (get_nulls_value(n) != hash) { 1002 if (get_nulls_value(n) != hash) {
1003 NF_CT_STAT_INC_ATOMIC(net, search_restart); 1003 NF_CT_STAT_INC_ATOMIC(net, search_restart);
1004 goto begin; 1004 goto begin;
1005 } 1005 }
1006 1006
1007 rcu_read_unlock(); 1007 rcu_read_unlock();
1008 1008
1009 return 0; 1009 return 0;
1010 } 1010 }
1011 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 1011 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
1012 1012
1013 #define NF_CT_EVICTION_RANGE 8 1013 #define NF_CT_EVICTION_RANGE 8
1014 1014
1015 /* There's a small race here where we may free a just-assured 1015 /* There's a small race here where we may free a just-assured
1016 connection. Too bad: we're in trouble anyway. */ 1016 connection. Too bad: we're in trouble anyway. */
1017 static unsigned int early_drop_list(struct net *net, 1017 static unsigned int early_drop_list(struct net *net,
1018 struct hlist_nulls_head *head) 1018 struct hlist_nulls_head *head)
1019 { 1019 {
1020 struct nf_conntrack_tuple_hash *h; 1020 struct nf_conntrack_tuple_hash *h;
1021 struct hlist_nulls_node *n; 1021 struct hlist_nulls_node *n;
1022 unsigned int drops = 0; 1022 unsigned int drops = 0;
1023 struct nf_conn *tmp; 1023 struct nf_conn *tmp;
1024 1024
1025 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { 1025 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
1026 tmp = nf_ct_tuplehash_to_ctrack(h); 1026 tmp = nf_ct_tuplehash_to_ctrack(h);
1027 1027
1028 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) 1028 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
1029 continue; 1029 continue;
1030 1030
1031 if (nf_ct_is_expired(tmp)) { 1031 if (nf_ct_is_expired(tmp)) {
1032 nf_ct_gc_expired(tmp); 1032 nf_ct_gc_expired(tmp);
1033 continue; 1033 continue;
1034 } 1034 }
1035 1035
1036 if (test_bit(IPS_ASSURED_BIT, &tmp->status) || 1036 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
1037 !net_eq(nf_ct_net(tmp), net) || 1037 !net_eq(nf_ct_net(tmp), net) ||
1038 nf_ct_is_dying(tmp)) 1038 nf_ct_is_dying(tmp))
1039 continue; 1039 continue;
1040 1040
1041 if (!atomic_inc_not_zero(&tmp->ct_general.use)) 1041 if (!atomic_inc_not_zero(&tmp->ct_general.use))
1042 continue; 1042 continue;
1043 1043
1044 /* kill only if still in same netns -- might have moved due to 1044 /* kill only if still in same netns -- might have moved due to
1045 * SLAB_TYPESAFE_BY_RCU rules. 1045 * SLAB_TYPESAFE_BY_RCU rules.
1046 * 1046 *
1047 * We steal the timer reference. If that fails timer has 1047 * We steal the timer reference. If that fails timer has
1048 * already fired or someone else deleted it. Just drop ref 1048 * already fired or someone else deleted it. Just drop ref
1049 * and move to next entry. 1049 * and move to next entry.
1050 */ 1050 */
1051 if (net_eq(nf_ct_net(tmp), net) && 1051 if (net_eq(nf_ct_net(tmp), net) &&
1052 nf_ct_is_confirmed(tmp) && 1052 nf_ct_is_confirmed(tmp) &&
1053 nf_ct_delete(tmp, 0, 0)) 1053 nf_ct_delete(tmp, 0, 0))
1054 drops++; 1054 drops++;
1055 1055
1056 nf_ct_put(tmp); 1056 nf_ct_put(tmp);
1057 } 1057 }
1058 1058
1059 return drops; 1059 return drops;
1060 } 1060 }
1061 1061
1062 static noinline int early_drop(struct net *net, unsigned int _hash) 1062 static noinline int early_drop(struct net *net, unsigned int _hash)
1063 { 1063 {
1064 unsigned int i; 1064 unsigned int i;
1065 1065
1066 for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { 1066 for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
1067 struct hlist_nulls_head *ct_hash; 1067 struct hlist_nulls_head *ct_hash;
1068 unsigned int hash, hsize, drops; 1068 unsigned int hash, hsize, drops;
1069 1069
1070 rcu_read_lock(); 1070 rcu_read_lock();
1071 nf_conntrack_get_ht(&ct_hash, &hsize); 1071 nf_conntrack_get_ht(&ct_hash, &hsize);
1072 hash = reciprocal_scale(_hash++, hsize); 1072 hash = reciprocal_scale(_hash++, hsize);
1073 1073
1074 drops = early_drop_list(net, &ct_hash[hash]); 1074 drops = early_drop_list(net, &ct_hash[hash]);
1075 rcu_read_unlock(); 1075 rcu_read_unlock();
1076 1076
1077 if (drops) { 1077 if (drops) {
1078 NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); 1078 NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
1079 return true; 1079 return true;
1080 } 1080 }
1081 } 1081 }
1082 1082
1083 return false; 1083 return false;
1084 } 1084 }
1085 1085
1086 static bool gc_worker_skip_ct(const struct nf_conn *ct) 1086 static bool gc_worker_skip_ct(const struct nf_conn *ct)
1087 { 1087 {
1088 return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); 1088 return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct);
1089 } 1089 }
1090 1090
1091 static bool gc_worker_can_early_drop(const struct nf_conn *ct) 1091 static bool gc_worker_can_early_drop(const struct nf_conn *ct)
1092 { 1092 {
1093 const struct nf_conntrack_l4proto *l4proto; 1093 const struct nf_conntrack_l4proto *l4proto;
1094 1094
1095 if (!test_bit(IPS_ASSURED_BIT, &ct->status)) 1095 if (!test_bit(IPS_ASSURED_BIT, &ct->status))
1096 return true; 1096 return true;
1097 1097
1098 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 1098 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
1099 if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) 1099 if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
1100 return true; 1100 return true;
1101 1101
1102 return false; 1102 return false;
1103 } 1103 }
1104 1104
1105 #define DAY (86400 * HZ) 1105 #define DAY (86400 * HZ)
1106 1106
1107 /* Set an arbitrary timeout large enough not to ever expire, this save 1107 /* Set an arbitrary timeout large enough not to ever expire, this save
1108 * us a check for the IPS_OFFLOAD_BIT from the packet path via 1108 * us a check for the IPS_OFFLOAD_BIT from the packet path via
1109 * nf_ct_is_expired(). 1109 * nf_ct_is_expired().
1110 */ 1110 */
1111 static void nf_ct_offload_timeout(struct nf_conn *ct) 1111 static void nf_ct_offload_timeout(struct nf_conn *ct)
1112 { 1112 {
1113 if (nf_ct_expires(ct) < DAY / 2) 1113 if (nf_ct_expires(ct) < DAY / 2)
1114 ct->timeout = nfct_time_stamp + DAY; 1114 ct->timeout = nfct_time_stamp + DAY;
1115 } 1115 }
1116 1116
1117 static void gc_worker(struct work_struct *work) 1117 static void gc_worker(struct work_struct *work)
1118 { 1118 {
1119 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); 1119 unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
1120 unsigned int i, goal, buckets = 0, expired_count = 0; 1120 unsigned int i, goal, buckets = 0, expired_count = 0;
1121 unsigned int nf_conntrack_max95 = 0; 1121 unsigned int nf_conntrack_max95 = 0;
1122 struct conntrack_gc_work *gc_work; 1122 struct conntrack_gc_work *gc_work;
1123 unsigned int ratio, scanned = 0; 1123 unsigned int ratio, scanned = 0;
1124 unsigned long next_run; 1124 unsigned long next_run;
1125 1125
1126 gc_work = container_of(work, struct conntrack_gc_work, dwork.work); 1126 gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
1127 1127
1128 goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; 1128 goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
1129 i = gc_work->last_bucket; 1129 i = gc_work->last_bucket;
1130 if (gc_work->early_drop) 1130 if (gc_work->early_drop)
1131 nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; 1131 nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
1132 1132
1133 do { 1133 do {
1134 struct nf_conntrack_tuple_hash *h; 1134 struct nf_conntrack_tuple_hash *h;
1135 struct hlist_nulls_head *ct_hash; 1135 struct hlist_nulls_head *ct_hash;
1136 struct hlist_nulls_node *n; 1136 struct hlist_nulls_node *n;
1137 unsigned int hashsz; 1137 unsigned int hashsz;
1138 struct nf_conn *tmp; 1138 struct nf_conn *tmp;
1139 1139
1140 i++; 1140 i++;
1141 rcu_read_lock(); 1141 rcu_read_lock();
1142 1142
1143 nf_conntrack_get_ht(&ct_hash, &hashsz); 1143 nf_conntrack_get_ht(&ct_hash, &hashsz);
1144 if (i >= hashsz) 1144 if (i >= hashsz)
1145 i = 0; 1145 i = 0;
1146 1146
1147 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { 1147 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
1148 struct net *net; 1148 struct net *net;
1149 1149
1150 tmp = nf_ct_tuplehash_to_ctrack(h); 1150 tmp = nf_ct_tuplehash_to_ctrack(h);
1151 1151
1152 scanned++; 1152 scanned++;
1153 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { 1153 if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
1154 nf_ct_offload_timeout(tmp); 1154 nf_ct_offload_timeout(tmp);
1155 continue; 1155 continue;
1156 } 1156 }
1157 1157
1158 if (nf_ct_is_expired(tmp)) { 1158 if (nf_ct_is_expired(tmp)) {
1159 nf_ct_gc_expired(tmp); 1159 nf_ct_gc_expired(tmp);
1160 expired_count++; 1160 expired_count++;
1161 continue; 1161 continue;
1162 } 1162 }
1163 1163
1164 if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) 1164 if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
1165 continue; 1165 continue;
1166 1166
1167 net = nf_ct_net(tmp); 1167 net = nf_ct_net(tmp);
1168 if (atomic_read(&net->ct.count) < nf_conntrack_max95) 1168 if (atomic_read(&net->ct.count) < nf_conntrack_max95)
1169 continue; 1169 continue;
1170 1170
1171 /* need to take reference to avoid possible races */ 1171 /* need to take reference to avoid possible races */
1172 if (!atomic_inc_not_zero(&tmp->ct_general.use)) 1172 if (!atomic_inc_not_zero(&tmp->ct_general.use))
1173 continue; 1173 continue;
1174 1174
1175 if (gc_worker_skip_ct(tmp)) { 1175 if (gc_worker_skip_ct(tmp)) {
1176 nf_ct_put(tmp); 1176 nf_ct_put(tmp);
1177 continue; 1177 continue;
1178 } 1178 }
1179 1179
1180 if (gc_worker_can_early_drop(tmp)) 1180 if (gc_worker_can_early_drop(tmp))
1181 nf_ct_kill(tmp); 1181 nf_ct_kill(tmp);
1182 1182
1183 nf_ct_put(tmp); 1183 nf_ct_put(tmp);
1184 } 1184 }
1185 1185
1186 /* could check get_nulls_value() here and restart if ct 1186 /* could check get_nulls_value() here and restart if ct
1187 * was moved to another chain. But given gc is best-effort 1187 * was moved to another chain. But given gc is best-effort
1188 * we will just continue with next hash slot. 1188 * we will just continue with next hash slot.
1189 */ 1189 */
1190 rcu_read_unlock(); 1190 rcu_read_unlock();
1191 cond_resched(); 1191 cond_resched();
1192 } while (++buckets < goal); 1192 } while (++buckets < goal);
1193 1193
1194 if (gc_work->exiting) 1194 if (gc_work->exiting)
1195 return; 1195 return;
1196 1196
1197 /* 1197 /*
1198 * Eviction will normally happen from the packet path, and not 1198 * Eviction will normally happen from the packet path, and not
1199 * from this gc worker. 1199 * from this gc worker.
1200 * 1200 *
1201 * This worker is only here to reap expired entries when system went 1201 * This worker is only here to reap expired entries when system went
1202 * idle after a busy period. 1202 * idle after a busy period.
1203 * 1203 *
1204 * The heuristics below are supposed to balance conflicting goals: 1204 * The heuristics below are supposed to balance conflicting goals:
1205 * 1205 *
1206 * 1. Minimize time until we notice a stale entry 1206 * 1. Minimize time until we notice a stale entry
1207 * 2. Maximize scan intervals to not waste cycles 1207 * 2. Maximize scan intervals to not waste cycles
1208 * 1208 *
1209 * Normally, expire ratio will be close to 0. 1209 * Normally, expire ratio will be close to 0.
1210 * 1210 *
1211 * As soon as a sizeable fraction of the entries have expired 1211 * As soon as a sizeable fraction of the entries have expired
1212 * increase scan frequency. 1212 * increase scan frequency.
1213 */ 1213 */
1214 ratio = scanned ? expired_count * 100 / scanned : 0; 1214 ratio = scanned ? expired_count * 100 / scanned : 0;
1215 if (ratio > GC_EVICT_RATIO) { 1215 if (ratio > GC_EVICT_RATIO) {
1216 gc_work->next_gc_run = min_interval; 1216 gc_work->next_gc_run = min_interval;
1217 } else { 1217 } else {
1218 unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; 1218 unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
1219 1219
1220 BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); 1220 BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
1221 1221
1222 gc_work->next_gc_run += min_interval; 1222 gc_work->next_gc_run += min_interval;
1223 if (gc_work->next_gc_run > max) 1223 if (gc_work->next_gc_run > max)
1224 gc_work->next_gc_run = max; 1224 gc_work->next_gc_run = max;
1225 } 1225 }
1226 1226
1227 next_run = gc_work->next_gc_run; 1227 next_run = gc_work->next_gc_run;
1228 gc_work->last_bucket = i; 1228 gc_work->last_bucket = i;
1229 gc_work->early_drop = false; 1229 gc_work->early_drop = false;
1230 queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); 1230 queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
1231 } 1231 }
1232 1232
1233 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) 1233 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
1234 { 1234 {
1235 INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); 1235 INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
1236 gc_work->next_gc_run = HZ; 1236 gc_work->next_gc_run = HZ;
1237 gc_work->exiting = false; 1237 gc_work->exiting = false;
1238 } 1238 }
1239 1239
1240 static struct nf_conn * 1240 static struct nf_conn *
1241 __nf_conntrack_alloc(struct net *net, 1241 __nf_conntrack_alloc(struct net *net,
1242 const struct nf_conntrack_zone *zone, 1242 const struct nf_conntrack_zone *zone,
1243 const struct nf_conntrack_tuple *orig, 1243 const struct nf_conntrack_tuple *orig,
1244 const struct nf_conntrack_tuple *repl, 1244 const struct nf_conntrack_tuple *repl,
1245 gfp_t gfp, u32 hash) 1245 gfp_t gfp, u32 hash)
1246 { 1246 {
1247 struct nf_conn *ct; 1247 struct nf_conn *ct;
1248 1248
1249 /* We don't want any race condition at early drop stage */ 1249 /* We don't want any race condition at early drop stage */
1250 atomic_inc(&net->ct.count); 1250 atomic_inc(&net->ct.count);
1251 1251
1252 if (nf_conntrack_max && 1252 if (nf_conntrack_max &&
1253 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 1253 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
1254 if (!early_drop(net, hash)) { 1254 if (!early_drop(net, hash)) {
1255 if (!conntrack_gc_work.early_drop) 1255 if (!conntrack_gc_work.early_drop)
1256 conntrack_gc_work.early_drop = true; 1256 conntrack_gc_work.early_drop = true;
1257 atomic_dec(&net->ct.count); 1257 atomic_dec(&net->ct.count);
1258 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); 1258 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
1259 return ERR_PTR(-ENOMEM); 1259 return ERR_PTR(-ENOMEM);
1260 } 1260 }
1261 } 1261 }
1262 1262
1263 /* 1263 /*
1264 * Do not use kmem_cache_zalloc(), as this cache uses 1264 * Do not use kmem_cache_zalloc(), as this cache uses
1265 * SLAB_TYPESAFE_BY_RCU. 1265 * SLAB_TYPESAFE_BY_RCU.
1266 */ 1266 */
1267 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); 1267 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
1268 if (ct == NULL) 1268 if (ct == NULL)
1269 goto out; 1269 goto out;
1270 1270
1271 spin_lock_init(&ct->lock); 1271 spin_lock_init(&ct->lock);
1272 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 1272 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
1273 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 1273 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
1274 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 1274 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
1275 /* save hash for reusing when confirming */ 1275 /* save hash for reusing when confirming */
1276 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; 1276 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
1277 ct->status = 0; 1277 ct->status = 0;
1278 write_pnet(&ct->ct_net, net); 1278 write_pnet(&ct->ct_net, net);
1279 memset(&ct->__nfct_init_offset[0], 0, 1279 memset(&ct->__nfct_init_offset[0], 0,
1280 offsetof(struct nf_conn, proto) - 1280 offsetof(struct nf_conn, proto) -
1281 offsetof(struct nf_conn, __nfct_init_offset[0])); 1281 offsetof(struct nf_conn, __nfct_init_offset[0]));
1282 1282
1283 nf_ct_zone_add(ct, zone); 1283 nf_ct_zone_add(ct, zone);
1284 1284
1285 /* Because we use RCU lookups, we set ct_general.use to zero before 1285 /* Because we use RCU lookups, we set ct_general.use to zero before
1286 * this is inserted in any list. 1286 * this is inserted in any list.
1287 */ 1287 */
1288 atomic_set(&ct->ct_general.use, 0); 1288 atomic_set(&ct->ct_general.use, 0);
1289 return ct; 1289 return ct;
1290 out: 1290 out:
1291 atomic_dec(&net->ct.count); 1291 atomic_dec(&net->ct.count);
1292 return ERR_PTR(-ENOMEM); 1292 return ERR_PTR(-ENOMEM);
1293 } 1293 }
1294 1294
1295 struct nf_conn *nf_conntrack_alloc(struct net *net, 1295 struct nf_conn *nf_conntrack_alloc(struct net *net,
1296 const struct nf_conntrack_zone *zone, 1296 const struct nf_conntrack_zone *zone,
1297 const struct nf_conntrack_tuple *orig, 1297 const struct nf_conntrack_tuple *orig,
1298 const struct nf_conntrack_tuple *repl, 1298 const struct nf_conntrack_tuple *repl,
1299 gfp_t gfp) 1299 gfp_t gfp)
1300 { 1300 {
1301 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); 1301 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
1302 } 1302 }
1303 EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 1303 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
1304 1304
1305 void nf_conntrack_free(struct nf_conn *ct) 1305 void nf_conntrack_free(struct nf_conn *ct)
1306 { 1306 {
1307 struct net *net = nf_ct_net(ct); 1307 struct net *net = nf_ct_net(ct);
1308 1308
1309 /* A freed object has refcnt == 0, that's 1309 /* A freed object has refcnt == 0, that's
1310 * the golden rule for SLAB_TYPESAFE_BY_RCU 1310 * the golden rule for SLAB_TYPESAFE_BY_RCU
1311 */ 1311 */
1312 WARN_ON(atomic_read(&ct->ct_general.use) != 0); 1312 WARN_ON(atomic_read(&ct->ct_general.use) != 0);
1313 1313
1314 nf_ct_ext_destroy(ct); 1314 nf_ct_ext_destroy(ct);
1315 nf_ct_ext_free(ct); 1315 nf_ct_ext_free(ct);
1316 kmem_cache_free(nf_conntrack_cachep, ct); 1316 kmem_cache_free(nf_conntrack_cachep, ct);
1317 smp_mb__before_atomic(); 1317 smp_mb__before_atomic();
1318 atomic_dec(&net->ct.count); 1318 atomic_dec(&net->ct.count);
1319 } 1319 }
1320 EXPORT_SYMBOL_GPL(nf_conntrack_free); 1320 EXPORT_SYMBOL_GPL(nf_conntrack_free);
1321 1321
1322 1322
1323 /* Allocate a new conntrack: we return -ENOMEM if classification 1323 /* Allocate a new conntrack: we return -ENOMEM if classification
1324 failed due to stress. Otherwise it really is unclassifiable. */ 1324 failed due to stress. Otherwise it really is unclassifiable. */
1325 static noinline struct nf_conntrack_tuple_hash * 1325 static noinline struct nf_conntrack_tuple_hash *
1326 init_conntrack(struct net *net, struct nf_conn *tmpl, 1326 init_conntrack(struct net *net, struct nf_conn *tmpl,
1327 const struct nf_conntrack_tuple *tuple, 1327 const struct nf_conntrack_tuple *tuple,
1328 const struct nf_conntrack_l4proto *l4proto, 1328 const struct nf_conntrack_l4proto *l4proto,
1329 struct sk_buff *skb, 1329 struct sk_buff *skb,
1330 unsigned int dataoff, u32 hash) 1330 unsigned int dataoff, u32 hash)
1331 { 1331 {
1332 struct nf_conn *ct; 1332 struct nf_conn *ct;
1333 struct nf_conn_help *help; 1333 struct nf_conn_help *help;
1334 struct nf_conntrack_tuple repl_tuple; 1334 struct nf_conntrack_tuple repl_tuple;
1335 struct nf_conntrack_ecache *ecache; 1335 struct nf_conntrack_ecache *ecache;
1336 struct nf_conntrack_expect *exp = NULL; 1336 struct nf_conntrack_expect *exp = NULL;
1337 const struct nf_conntrack_zone *zone; 1337 const struct nf_conntrack_zone *zone;
1338 struct nf_conn_timeout *timeout_ext; 1338 struct nf_conn_timeout *timeout_ext;
1339 struct nf_conntrack_zone tmp; 1339 struct nf_conntrack_zone tmp;
1340 unsigned int *timeouts;
1341 1340
1342 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { 1341 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
1343 pr_debug("Can't invert tuple.\n"); 1342 pr_debug("Can't invert tuple.\n");
1344 return NULL; 1343 return NULL;
1345 } 1344 }
1346 1345
1347 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1346 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1348 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, 1347 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
1349 hash); 1348 hash);
1350 if (IS_ERR(ct)) 1349 if (IS_ERR(ct))
1351 return (struct nf_conntrack_tuple_hash *)ct; 1350 return (struct nf_conntrack_tuple_hash *)ct;
1352 1351
1353 if (!nf_ct_add_synproxy(ct, tmpl)) { 1352 if (!nf_ct_add_synproxy(ct, tmpl)) {
1354 nf_conntrack_free(ct); 1353 nf_conntrack_free(ct);
1355 return ERR_PTR(-ENOMEM); 1354 return ERR_PTR(-ENOMEM);
1356 } 1355 }
1357 1356
1358 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 1357 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
1359 if (timeout_ext) {
1360 timeouts = nf_ct_timeout_data(timeout_ext);
1361 if (unlikely(!timeouts))
1362 timeouts = l4proto->get_timeouts(net);
1363 } else {
1364 timeouts = l4proto->get_timeouts(net);
1365 }
1366 1358
1367 if (!l4proto->new(ct, skb, dataoff, timeouts)) { 1359 if (!l4proto->new(ct, skb, dataoff)) {
1368 nf_conntrack_free(ct); 1360 nf_conntrack_free(ct);
1369 pr_debug("can't track with proto module\n"); 1361 pr_debug("can't track with proto module\n");
1370 return NULL; 1362 return NULL;
1371 } 1363 }
1372 1364
1373 if (timeout_ext) 1365 if (timeout_ext)
1374 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), 1366 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
1375 GFP_ATOMIC); 1367 GFP_ATOMIC);
1376 1368
1377 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1369 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1378 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 1370 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1379 nf_ct_labels_ext_add(ct); 1371 nf_ct_labels_ext_add(ct);
1380 1372
1381 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 1373 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
1382 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 1374 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
1383 ecache ? ecache->expmask : 0, 1375 ecache ? ecache->expmask : 0,
1384 GFP_ATOMIC); 1376 GFP_ATOMIC);
1385 1377
1386 local_bh_disable(); 1378 local_bh_disable();
1387 if (net->ct.expect_count) { 1379 if (net->ct.expect_count) {
1388 spin_lock(&nf_conntrack_expect_lock); 1380 spin_lock(&nf_conntrack_expect_lock);
1389 exp = nf_ct_find_expectation(net, zone, tuple); 1381 exp = nf_ct_find_expectation(net, zone, tuple);
1390 if (exp) { 1382 if (exp) {
1391 pr_debug("expectation arrives ct=%p exp=%p\n", 1383 pr_debug("expectation arrives ct=%p exp=%p\n",
1392 ct, exp); 1384 ct, exp);
1393 /* Welcome, Mr. Bond. We've been expecting you... */ 1385 /* Welcome, Mr. Bond. We've been expecting you... */
1394 __set_bit(IPS_EXPECTED_BIT, &ct->status); 1386 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1395 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ 1387 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
1396 ct->master = exp->master; 1388 ct->master = exp->master;
1397 if (exp->helper) { 1389 if (exp->helper) {
1398 help = nf_ct_helper_ext_add(ct, exp->helper, 1390 help = nf_ct_helper_ext_add(ct, exp->helper,
1399 GFP_ATOMIC); 1391 GFP_ATOMIC);
1400 if (help) 1392 if (help)
1401 rcu_assign_pointer(help->helper, exp->helper); 1393 rcu_assign_pointer(help->helper, exp->helper);
1402 } 1394 }
1403 1395
1404 #ifdef CONFIG_NF_CONNTRACK_MARK 1396 #ifdef CONFIG_NF_CONNTRACK_MARK
1405 ct->mark = exp->master->mark; 1397 ct->mark = exp->master->mark;
1406 #endif 1398 #endif
1407 #ifdef CONFIG_NF_CONNTRACK_SECMARK 1399 #ifdef CONFIG_NF_CONNTRACK_SECMARK
1408 ct->secmark = exp->master->secmark; 1400 ct->secmark = exp->master->secmark;
1409 #endif 1401 #endif
1410 NF_CT_STAT_INC(net, expect_new); 1402 NF_CT_STAT_INC(net, expect_new);
1411 } 1403 }
1412 spin_unlock(&nf_conntrack_expect_lock); 1404 spin_unlock(&nf_conntrack_expect_lock);
1413 } 1405 }
1414 if (!exp) 1406 if (!exp)
1415 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); 1407 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
1416 1408
1417 /* Now it is inserted into the unconfirmed list, bump refcount */ 1409 /* Now it is inserted into the unconfirmed list, bump refcount */
1418 nf_conntrack_get(&ct->ct_general); 1410 nf_conntrack_get(&ct->ct_general);
1419 nf_ct_add_to_unconfirmed_list(ct); 1411 nf_ct_add_to_unconfirmed_list(ct);
1420 1412
1421 local_bh_enable(); 1413 local_bh_enable();
1422 1414
1423 if (exp) { 1415 if (exp) {
1424 if (exp->expectfn) 1416 if (exp->expectfn)
1425 exp->expectfn(ct, exp); 1417 exp->expectfn(ct, exp);
1426 nf_ct_expect_put(exp); 1418 nf_ct_expect_put(exp);
1427 } 1419 }
1428 1420
1429 return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; 1421 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
1430 } 1422 }
1431 1423
1432 /* On success, returns 0, sets skb->_nfct | ctinfo */ 1424 /* On success, returns 0, sets skb->_nfct | ctinfo */
1433 static int 1425 static int
1434 resolve_normal_ct(struct net *net, struct nf_conn *tmpl, 1426 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1435 struct sk_buff *skb, 1427 struct sk_buff *skb,
1436 unsigned int dataoff, 1428 unsigned int dataoff,
1437 u_int16_t l3num, 1429 u_int16_t l3num,
1438 u_int8_t protonum, 1430 u_int8_t protonum,
1439 const struct nf_conntrack_l4proto *l4proto) 1431 const struct nf_conntrack_l4proto *l4proto)
1440 { 1432 {
1441 const struct nf_conntrack_zone *zone; 1433 const struct nf_conntrack_zone *zone;
1442 struct nf_conntrack_tuple tuple; 1434 struct nf_conntrack_tuple tuple;
1443 struct nf_conntrack_tuple_hash *h; 1435 struct nf_conntrack_tuple_hash *h;
1444 enum ip_conntrack_info ctinfo; 1436 enum ip_conntrack_info ctinfo;
1445 struct nf_conntrack_zone tmp; 1437 struct nf_conntrack_zone tmp;
1446 struct nf_conn *ct; 1438 struct nf_conn *ct;
1447 u32 hash; 1439 u32 hash;
1448 1440
1449 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 1441 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
1450 dataoff, l3num, protonum, net, &tuple, l4proto)) { 1442 dataoff, l3num, protonum, net, &tuple, l4proto)) {
1451 pr_debug("Can't get tuple\n"); 1443 pr_debug("Can't get tuple\n");
1452 return 0; 1444 return 0;
1453 } 1445 }
1454 1446
1455 /* look for tuple match */ 1447 /* look for tuple match */
1456 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1448 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1457 hash = hash_conntrack_raw(&tuple, net); 1449 hash = hash_conntrack_raw(&tuple, net);
1458 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1450 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1459 if (!h) { 1451 if (!h) {
1460 h = init_conntrack(net, tmpl, &tuple, l4proto, 1452 h = init_conntrack(net, tmpl, &tuple, l4proto,
1461 skb, dataoff, hash); 1453 skb, dataoff, hash);
1462 if (!h) 1454 if (!h)
1463 return 0; 1455 return 0;
1464 if (IS_ERR(h)) 1456 if (IS_ERR(h))
1465 return PTR_ERR(h); 1457 return PTR_ERR(h);
1466 } 1458 }
1467 ct = nf_ct_tuplehash_to_ctrack(h); 1459 ct = nf_ct_tuplehash_to_ctrack(h);
1468 1460
1469 /* It exists; we have (non-exclusive) reference. */ 1461 /* It exists; we have (non-exclusive) reference. */
1470 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 1462 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1471 ctinfo = IP_CT_ESTABLISHED_REPLY; 1463 ctinfo = IP_CT_ESTABLISHED_REPLY;
1472 } else { 1464 } else {
1473 /* Once we've had two way comms, always ESTABLISHED. */ 1465 /* Once we've had two way comms, always ESTABLISHED. */
1474 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 1466 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1475 pr_debug("normal packet for %p\n", ct); 1467 pr_debug("normal packet for %p\n", ct);
1476 ctinfo = IP_CT_ESTABLISHED; 1468 ctinfo = IP_CT_ESTABLISHED;
1477 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 1469 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1478 pr_debug("related packet for %p\n", ct); 1470 pr_debug("related packet for %p\n", ct);
1479 ctinfo = IP_CT_RELATED; 1471 ctinfo = IP_CT_RELATED;
1480 } else { 1472 } else {
1481 pr_debug("new packet for %p\n", ct); 1473 pr_debug("new packet for %p\n", ct);
1482 ctinfo = IP_CT_NEW; 1474 ctinfo = IP_CT_NEW;
1483 } 1475 }
1484 } 1476 }
1485 nf_ct_set(skb, ct, ctinfo); 1477 nf_ct_set(skb, ct, ctinfo);
1486 return 0; 1478 return 0;
1487 } 1479 }
1488 1480
1489 unsigned int 1481 unsigned int
1490 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 1482 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1491 struct sk_buff *skb) 1483 struct sk_buff *skb)
1492 { 1484 {
1493 const struct nf_conntrack_l4proto *l4proto; 1485 const struct nf_conntrack_l4proto *l4proto;
1494 struct nf_conn *ct, *tmpl; 1486 struct nf_conn *ct, *tmpl;
1495 enum ip_conntrack_info ctinfo; 1487 enum ip_conntrack_info ctinfo;
1496 unsigned int *timeouts;
1497 u_int8_t protonum; 1488 u_int8_t protonum;
1498 int dataoff, ret; 1489 int dataoff, ret;
1499 1490
1500 tmpl = nf_ct_get(skb, &ctinfo); 1491 tmpl = nf_ct_get(skb, &ctinfo);
1501 if (tmpl || ctinfo == IP_CT_UNTRACKED) { 1492 if (tmpl || ctinfo == IP_CT_UNTRACKED) {
1502 /* Previously seen (loopback or untracked)? Ignore. */ 1493 /* Previously seen (loopback or untracked)? Ignore. */
1503 if ((tmpl && !nf_ct_is_template(tmpl)) || 1494 if ((tmpl && !nf_ct_is_template(tmpl)) ||
1504 ctinfo == IP_CT_UNTRACKED) { 1495 ctinfo == IP_CT_UNTRACKED) {
1505 NF_CT_STAT_INC_ATOMIC(net, ignore); 1496 NF_CT_STAT_INC_ATOMIC(net, ignore);
1506 return NF_ACCEPT; 1497 return NF_ACCEPT;
1507 } 1498 }
1508 skb->_nfct = 0; 1499 skb->_nfct = 0;
1509 } 1500 }
1510 1501
1511 /* rcu_read_lock()ed by nf_hook_thresh */ 1502 /* rcu_read_lock()ed by nf_hook_thresh */
1512 dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum); 1503 dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
1513 if (dataoff <= 0) { 1504 if (dataoff <= 0) {
1514 pr_debug("not prepared to track yet or error occurred\n"); 1505 pr_debug("not prepared to track yet or error occurred\n");
1515 NF_CT_STAT_INC_ATOMIC(net, error); 1506 NF_CT_STAT_INC_ATOMIC(net, error);
1516 NF_CT_STAT_INC_ATOMIC(net, invalid); 1507 NF_CT_STAT_INC_ATOMIC(net, invalid);
1517 ret = NF_ACCEPT; 1508 ret = NF_ACCEPT;
1518 goto out; 1509 goto out;
1519 } 1510 }
1520 1511
1521 l4proto = __nf_ct_l4proto_find(pf, protonum); 1512 l4proto = __nf_ct_l4proto_find(pf, protonum);
1522 1513
1523 /* It may be an special packet, error, unclean... 1514 /* It may be an special packet, error, unclean...
1524 * inverse of the return code tells to the netfilter 1515 * inverse of the return code tells to the netfilter
1525 * core what to do with the packet. */ 1516 * core what to do with the packet. */
1526 if (l4proto->error != NULL) { 1517 if (l4proto->error != NULL) {
1527 ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); 1518 ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum);
1528 if (ret <= 0) { 1519 if (ret <= 0) {
1529 NF_CT_STAT_INC_ATOMIC(net, error); 1520 NF_CT_STAT_INC_ATOMIC(net, error);
1530 NF_CT_STAT_INC_ATOMIC(net, invalid); 1521 NF_CT_STAT_INC_ATOMIC(net, invalid);
1531 ret = -ret; 1522 ret = -ret;
1532 goto out; 1523 goto out;
1533 } 1524 }
1534 /* ICMP[v6] protocol trackers may assign one conntrack. */ 1525 /* ICMP[v6] protocol trackers may assign one conntrack. */
1535 if (skb->_nfct) 1526 if (skb->_nfct)
1536 goto out; 1527 goto out;
1537 } 1528 }
1538 repeat: 1529 repeat:
1539 ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto); 1530 ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
1540 if (ret < 0) { 1531 if (ret < 0) {
1541 /* Too stressed to deal. */ 1532 /* Too stressed to deal. */
1542 NF_CT_STAT_INC_ATOMIC(net, drop); 1533 NF_CT_STAT_INC_ATOMIC(net, drop);
1543 ret = NF_DROP; 1534 ret = NF_DROP;
1544 goto out; 1535 goto out;
1545 } 1536 }
1546 1537
1547 ct = nf_ct_get(skb, &ctinfo); 1538 ct = nf_ct_get(skb, &ctinfo);
1548 if (!ct) { 1539 if (!ct) {
1549 /* Not valid part of a connection */ 1540 /* Not valid part of a connection */
1550 NF_CT_STAT_INC_ATOMIC(net, invalid); 1541 NF_CT_STAT_INC_ATOMIC(net, invalid);
1551 ret = NF_ACCEPT; 1542 ret = NF_ACCEPT;
1552 goto out; 1543 goto out;
1553 } 1544 }
1554 1545
1555 /* Decide what timeout policy we want to apply to this flow. */ 1546 ret = l4proto->packet(ct, skb, dataoff, ctinfo);
1556 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1557
1558 ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
1559 if (ret <= 0) { 1547 if (ret <= 0) {
1560 /* Invalid: inverse of the return code tells 1548 /* Invalid: inverse of the return code tells
1561 * the netfilter core what to do */ 1549 * the netfilter core what to do */
1562 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 1550 pr_debug("nf_conntrack_in: Can't track with proto module\n");
1563 nf_conntrack_put(&ct->ct_general); 1551 nf_conntrack_put(&ct->ct_general);
1564 skb->_nfct = 0; 1552 skb->_nfct = 0;
1565 NF_CT_STAT_INC_ATOMIC(net, invalid); 1553 NF_CT_STAT_INC_ATOMIC(net, invalid);
1566 if (ret == -NF_DROP) 1554 if (ret == -NF_DROP)
1567 NF_CT_STAT_INC_ATOMIC(net, drop); 1555 NF_CT_STAT_INC_ATOMIC(net, drop);
1568 /* Special case: TCP tracker reports an attempt to reopen a 1556 /* Special case: TCP tracker reports an attempt to reopen a
1569 * closed/aborted connection. We have to go back and create a 1557 * closed/aborted connection. We have to go back and create a
1570 * fresh conntrack. 1558 * fresh conntrack.
1571 */ 1559 */
1572 if (ret == -NF_REPEAT) 1560 if (ret == -NF_REPEAT)
1573 goto repeat; 1561 goto repeat;
1574 ret = -ret; 1562 ret = -ret;
1575 goto out; 1563 goto out;
1576 } 1564 }
1577 1565
1578 if (ctinfo == IP_CT_ESTABLISHED_REPLY && 1566 if (ctinfo == IP_CT_ESTABLISHED_REPLY &&
1579 !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 1567 !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1580 nf_conntrack_event_cache(IPCT_REPLY, ct); 1568 nf_conntrack_event_cache(IPCT_REPLY, ct);
1581 out: 1569 out:
1582 if (tmpl) 1570 if (tmpl)
1583 nf_ct_put(tmpl); 1571 nf_ct_put(tmpl);
1584 1572
1585 return ret; 1573 return ret;
1586 } 1574 }
1587 EXPORT_SYMBOL_GPL(nf_conntrack_in); 1575 EXPORT_SYMBOL_GPL(nf_conntrack_in);
1588 1576
1589 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 1577 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1590 const struct nf_conntrack_tuple *orig) 1578 const struct nf_conntrack_tuple *orig)
1591 { 1579 {
1592 bool ret; 1580 bool ret;
1593 1581
1594 rcu_read_lock(); 1582 rcu_read_lock();
1595 ret = nf_ct_invert_tuple(inverse, orig, 1583 ret = nf_ct_invert_tuple(inverse, orig,
1596 __nf_ct_l4proto_find(orig->src.l3num, 1584 __nf_ct_l4proto_find(orig->src.l3num,
1597 orig->dst.protonum)); 1585 orig->dst.protonum));
1598 rcu_read_unlock(); 1586 rcu_read_unlock();
1599 return ret; 1587 return ret;
1600 } 1588 }
1601 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); 1589 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
1602 1590
1603 /* Alter reply tuple (maybe alter helper). This is for NAT, and is 1591 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1604 implicitly racy: see __nf_conntrack_confirm */ 1592 implicitly racy: see __nf_conntrack_confirm */
1605 void nf_conntrack_alter_reply(struct nf_conn *ct, 1593 void nf_conntrack_alter_reply(struct nf_conn *ct,
1606 const struct nf_conntrack_tuple *newreply) 1594 const struct nf_conntrack_tuple *newreply)
1607 { 1595 {
1608 struct nf_conn_help *help = nfct_help(ct); 1596 struct nf_conn_help *help = nfct_help(ct);
1609 1597
1610 /* Should be unconfirmed, so not in hash table yet */ 1598 /* Should be unconfirmed, so not in hash table yet */
1611 WARN_ON(nf_ct_is_confirmed(ct)); 1599 WARN_ON(nf_ct_is_confirmed(ct));
1612 1600
1613 pr_debug("Altering reply tuple of %p to ", ct); 1601 pr_debug("Altering reply tuple of %p to ", ct);
1614 nf_ct_dump_tuple(newreply); 1602 nf_ct_dump_tuple(newreply);
1615 1603
1616 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 1604 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1617 if (ct->master || (help && !hlist_empty(&help->expectations))) 1605 if (ct->master || (help && !hlist_empty(&help->expectations)))
1618 return; 1606 return;
1619 1607
1620 rcu_read_lock(); 1608 rcu_read_lock();
1621 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); 1609 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
1622 rcu_read_unlock(); 1610 rcu_read_unlock();
1623 } 1611 }
1624 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 1612 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
1625 1613
1626 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 1614 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1627 void __nf_ct_refresh_acct(struct nf_conn *ct, 1615 void __nf_ct_refresh_acct(struct nf_conn *ct,
1628 enum ip_conntrack_info ctinfo, 1616 enum ip_conntrack_info ctinfo,
1629 const struct sk_buff *skb, 1617 const struct sk_buff *skb,
1630 unsigned long extra_jiffies, 1618 unsigned long extra_jiffies,
1631 int do_acct) 1619 int do_acct)
1632 { 1620 {
1633 WARN_ON(!skb); 1621 WARN_ON(!skb);
1634 1622
1635 /* Only update if this is not a fixed timeout */ 1623 /* Only update if this is not a fixed timeout */
1636 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 1624 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1637 goto acct; 1625 goto acct;
1638 1626
1639 /* If not in hash table, timer will not be active yet */ 1627 /* If not in hash table, timer will not be active yet */
1640 if (nf_ct_is_confirmed(ct)) 1628 if (nf_ct_is_confirmed(ct))
1641 extra_jiffies += nfct_time_stamp; 1629 extra_jiffies += nfct_time_stamp;
1642 1630
1643 ct->timeout = extra_jiffies; 1631 ct->timeout = extra_jiffies;
1644 acct: 1632 acct:
1645 if (do_acct) 1633 if (do_acct)
1646 nf_ct_acct_update(ct, ctinfo, skb->len); 1634 nf_ct_acct_update(ct, ctinfo, skb->len);
1647 } 1635 }
1648 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 1636 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1649 1637
1650 bool nf_ct_kill_acct(struct nf_conn *ct, 1638 bool nf_ct_kill_acct(struct nf_conn *ct,
1651 enum ip_conntrack_info ctinfo, 1639 enum ip_conntrack_info ctinfo,
1652 const struct sk_buff *skb) 1640 const struct sk_buff *skb)
1653 { 1641 {
1654 nf_ct_acct_update(ct, ctinfo, skb->len); 1642 nf_ct_acct_update(ct, ctinfo, skb->len);
1655 1643
1656 return nf_ct_delete(ct, 0, 0); 1644 return nf_ct_delete(ct, 0, 0);
1657 } 1645 }
1658 EXPORT_SYMBOL_GPL(nf_ct_kill_acct); 1646 EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
1659 1647
1660 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1648 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1661 1649
1662 #include <linux/netfilter/nfnetlink.h> 1650 #include <linux/netfilter/nfnetlink.h>
1663 #include <linux/netfilter/nfnetlink_conntrack.h> 1651 #include <linux/netfilter/nfnetlink_conntrack.h>
1664 #include <linux/mutex.h> 1652 #include <linux/mutex.h>
1665 1653
1666 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 1654 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1667 * in ip_conntrack_core, since we don't want the protocols to autoload 1655 * in ip_conntrack_core, since we don't want the protocols to autoload
1668 * or depend on ctnetlink */ 1656 * or depend on ctnetlink */
1669 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 1657 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
1670 const struct nf_conntrack_tuple *tuple) 1658 const struct nf_conntrack_tuple *tuple)
1671 { 1659 {
1672 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || 1660 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1673 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) 1661 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1674 goto nla_put_failure; 1662 goto nla_put_failure;
1675 return 0; 1663 return 0;
1676 1664
1677 nla_put_failure: 1665 nla_put_failure:
1678 return -1; 1666 return -1;
1679 } 1667 }
1680 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); 1668 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
1681 1669
1682 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { 1670 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1683 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, 1671 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
1684 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, 1672 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
1685 }; 1673 };
1686 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); 1674 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
1687 1675
1688 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 1676 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1689 struct nf_conntrack_tuple *t) 1677 struct nf_conntrack_tuple *t)
1690 { 1678 {
1691 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 1679 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
1692 return -EINVAL; 1680 return -EINVAL;
1693 1681
1694 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); 1682 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1695 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); 1683 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1696 1684
1697 return 0; 1685 return 0;
1698 } 1686 }
1699 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 1687 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1700 1688
1701 unsigned int nf_ct_port_nlattr_tuple_size(void) 1689 unsigned int nf_ct_port_nlattr_tuple_size(void)
1702 { 1690 {
1703 static unsigned int size __read_mostly; 1691 static unsigned int size __read_mostly;
1704 1692
1705 if (!size) 1693 if (!size)
1706 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1694 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1707 1695
1708 return size; 1696 return size;
1709 } 1697 }
1710 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); 1698 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1711 #endif 1699 #endif
1712 1700
1713 /* Used by ipt_REJECT and ip6t_REJECT. */ 1701 /* Used by ipt_REJECT and ip6t_REJECT. */
1714 static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) 1702 static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1715 { 1703 {
1716 struct nf_conn *ct; 1704 struct nf_conn *ct;
1717 enum ip_conntrack_info ctinfo; 1705 enum ip_conntrack_info ctinfo;
1718 1706
1719 /* This ICMP is in reverse direction to the packet which caused it */ 1707 /* This ICMP is in reverse direction to the packet which caused it */
1720 ct = nf_ct_get(skb, &ctinfo); 1708 ct = nf_ct_get(skb, &ctinfo);
1721 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1709 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1722 ctinfo = IP_CT_RELATED_REPLY; 1710 ctinfo = IP_CT_RELATED_REPLY;
1723 else 1711 else
1724 ctinfo = IP_CT_RELATED; 1712 ctinfo = IP_CT_RELATED;
1725 1713
1726 /* Attach to new skbuff, and increment count */ 1714 /* Attach to new skbuff, and increment count */
1727 nf_ct_set(nskb, ct, ctinfo); 1715 nf_ct_set(nskb, ct, ctinfo);
1728 nf_conntrack_get(skb_nfct(nskb)); 1716 nf_conntrack_get(skb_nfct(nskb));
1729 } 1717 }
1730 1718
1731 static int nf_conntrack_update(struct net *net, struct sk_buff *skb) 1719 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
1732 { 1720 {
1733 const struct nf_conntrack_l4proto *l4proto; 1721 const struct nf_conntrack_l4proto *l4proto;
1734 struct nf_conntrack_tuple_hash *h; 1722 struct nf_conntrack_tuple_hash *h;
1735 struct nf_conntrack_tuple tuple; 1723 struct nf_conntrack_tuple tuple;
1736 enum ip_conntrack_info ctinfo; 1724 enum ip_conntrack_info ctinfo;
1737 struct nf_nat_hook *nat_hook; 1725 struct nf_nat_hook *nat_hook;
1738 unsigned int status; 1726 unsigned int status;
1739 struct nf_conn *ct; 1727 struct nf_conn *ct;
1740 int dataoff; 1728 int dataoff;
1741 u16 l3num; 1729 u16 l3num;
1742 u8 l4num; 1730 u8 l4num;
1743 1731
1744 ct = nf_ct_get(skb, &ctinfo); 1732 ct = nf_ct_get(skb, &ctinfo);
1745 if (!ct || nf_ct_is_confirmed(ct)) 1733 if (!ct || nf_ct_is_confirmed(ct))
1746 return 0; 1734 return 0;
1747 1735
1748 l3num = nf_ct_l3num(ct); 1736 l3num = nf_ct_l3num(ct);
1749 1737
1750 dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); 1738 dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
1751 if (dataoff <= 0) 1739 if (dataoff <= 0)
1752 return -1; 1740 return -1;
1753 1741
1754 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 1742 l4proto = nf_ct_l4proto_find_get(l3num, l4num);
1755 1743
1756 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 1744 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
1757 l4num, net, &tuple, l4proto)) 1745 l4num, net, &tuple, l4proto))
1758 return -1; 1746 return -1;
1759 1747
1760 if (ct->status & IPS_SRC_NAT) { 1748 if (ct->status & IPS_SRC_NAT) {
1761 memcpy(tuple.src.u3.all, 1749 memcpy(tuple.src.u3.all,
1762 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all, 1750 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.all,
1763 sizeof(tuple.src.u3.all)); 1751 sizeof(tuple.src.u3.all));
1764 tuple.src.u.all = 1752 tuple.src.u.all =
1765 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all; 1753 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all;
1766 } 1754 }
1767 1755
1768 if (ct->status & IPS_DST_NAT) { 1756 if (ct->status & IPS_DST_NAT) {
1769 memcpy(tuple.dst.u3.all, 1757 memcpy(tuple.dst.u3.all,
1770 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all, 1758 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.all,
1771 sizeof(tuple.dst.u3.all)); 1759 sizeof(tuple.dst.u3.all));
1772 tuple.dst.u.all = 1760 tuple.dst.u.all =
1773 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all; 1761 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.all;
1774 } 1762 }
1775 1763
1776 h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); 1764 h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple);
1777 if (!h) 1765 if (!h)
1778 return 0; 1766 return 0;
1779 1767
1780 /* Store status bits of the conntrack that is clashing to re-do NAT 1768 /* Store status bits of the conntrack that is clashing to re-do NAT
1781 * mangling according to what it has been done already to this packet. 1769 * mangling according to what it has been done already to this packet.
1782 */ 1770 */
1783 status = ct->status; 1771 status = ct->status;
1784 1772
1785 nf_ct_put(ct); 1773 nf_ct_put(ct);
1786 ct = nf_ct_tuplehash_to_ctrack(h); 1774 ct = nf_ct_tuplehash_to_ctrack(h);
1787 nf_ct_set(skb, ct, ctinfo); 1775 nf_ct_set(skb, ct, ctinfo);
1788 1776
1789 nat_hook = rcu_dereference(nf_nat_hook); 1777 nat_hook = rcu_dereference(nf_nat_hook);
1790 if (!nat_hook) 1778 if (!nat_hook)
1791 return 0; 1779 return 0;
1792 1780
1793 if (status & IPS_SRC_NAT && 1781 if (status & IPS_SRC_NAT &&
1794 nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, 1782 nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC,
1795 IP_CT_DIR_ORIGINAL) == NF_DROP) 1783 IP_CT_DIR_ORIGINAL) == NF_DROP)
1796 return -1; 1784 return -1;
1797 1785
1798 if (status & IPS_DST_NAT && 1786 if (status & IPS_DST_NAT &&
1799 nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, 1787 nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST,
1800 IP_CT_DIR_ORIGINAL) == NF_DROP) 1788 IP_CT_DIR_ORIGINAL) == NF_DROP)
1801 return -1; 1789 return -1;
1802 1790
1803 return 0; 1791 return 0;
1804 } 1792 }
1805 1793
1806 static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, 1794 static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
1807 const struct sk_buff *skb) 1795 const struct sk_buff *skb)
1808 { 1796 {
1809 const struct nf_conntrack_tuple *src_tuple; 1797 const struct nf_conntrack_tuple *src_tuple;
1810 const struct nf_conntrack_tuple_hash *hash; 1798 const struct nf_conntrack_tuple_hash *hash;
1811 struct nf_conntrack_tuple srctuple; 1799 struct nf_conntrack_tuple srctuple;
1812 enum ip_conntrack_info ctinfo; 1800 enum ip_conntrack_info ctinfo;
1813 struct nf_conn *ct; 1801 struct nf_conn *ct;
1814 1802
1815 ct = nf_ct_get(skb, &ctinfo); 1803 ct = nf_ct_get(skb, &ctinfo);
1816 if (ct) { 1804 if (ct) {
1817 src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); 1805 src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
1818 memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); 1806 memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
1819 return true; 1807 return true;
1820 } 1808 }
1821 1809
1822 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), 1810 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
1823 NFPROTO_IPV4, dev_net(skb->dev), 1811 NFPROTO_IPV4, dev_net(skb->dev),
1824 &srctuple)) 1812 &srctuple))
1825 return false; 1813 return false;
1826 1814
1827 hash = nf_conntrack_find_get(dev_net(skb->dev), 1815 hash = nf_conntrack_find_get(dev_net(skb->dev),
1828 &nf_ct_zone_dflt, 1816 &nf_ct_zone_dflt,
1829 &srctuple); 1817 &srctuple);
1830 if (!hash) 1818 if (!hash)
1831 return false; 1819 return false;
1832 1820
1833 ct = nf_ct_tuplehash_to_ctrack(hash); 1821 ct = nf_ct_tuplehash_to_ctrack(hash);
1834 src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); 1822 src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
1835 memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); 1823 memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
1836 nf_ct_put(ct); 1824 nf_ct_put(ct);
1837 1825
1838 return true; 1826 return true;
1839 } 1827 }
1840 1828
1841 /* Bring out ya dead! */ 1829 /* Bring out ya dead! */
1842 static struct nf_conn * 1830 static struct nf_conn *
1843 get_next_corpse(int (*iter)(struct nf_conn *i, void *data), 1831 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1844 void *data, unsigned int *bucket) 1832 void *data, unsigned int *bucket)
1845 { 1833 {
1846 struct nf_conntrack_tuple_hash *h; 1834 struct nf_conntrack_tuple_hash *h;
1847 struct nf_conn *ct; 1835 struct nf_conn *ct;
1848 struct hlist_nulls_node *n; 1836 struct hlist_nulls_node *n;
1849 spinlock_t *lockp; 1837 spinlock_t *lockp;
1850 1838
1851 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 1839 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1852 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1840 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1853 local_bh_disable(); 1841 local_bh_disable();
1854 nf_conntrack_lock(lockp); 1842 nf_conntrack_lock(lockp);
1855 if (*bucket < nf_conntrack_htable_size) { 1843 if (*bucket < nf_conntrack_htable_size) {
1856 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { 1844 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
1857 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1845 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1858 continue; 1846 continue;
1859 ct = nf_ct_tuplehash_to_ctrack(h); 1847 ct = nf_ct_tuplehash_to_ctrack(h);
1860 if (iter(ct, data)) 1848 if (iter(ct, data))
1861 goto found; 1849 goto found;
1862 } 1850 }
1863 } 1851 }
1864 spin_unlock(lockp); 1852 spin_unlock(lockp);
1865 local_bh_enable(); 1853 local_bh_enable();
1866 cond_resched(); 1854 cond_resched();
1867 } 1855 }
1868 1856
1869 return NULL; 1857 return NULL;
1870 found: 1858 found:
1871 atomic_inc(&ct->ct_general.use); 1859 atomic_inc(&ct->ct_general.use);
1872 spin_unlock(lockp); 1860 spin_unlock(lockp);
1873 local_bh_enable(); 1861 local_bh_enable();
1874 return ct; 1862 return ct;
1875 } 1863 }
1876 1864
1877 static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), 1865 static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
1878 void *data, u32 portid, int report) 1866 void *data, u32 portid, int report)
1879 { 1867 {
1880 unsigned int bucket = 0, sequence; 1868 unsigned int bucket = 0, sequence;
1881 struct nf_conn *ct; 1869 struct nf_conn *ct;
1882 1870
1883 might_sleep(); 1871 might_sleep();
1884 1872
1885 for (;;) { 1873 for (;;) {
1886 sequence = read_seqcount_begin(&nf_conntrack_generation); 1874 sequence = read_seqcount_begin(&nf_conntrack_generation);
1887 1875
1888 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { 1876 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1889 /* Time to push up daises... */ 1877 /* Time to push up daises... */
1890 1878
1891 nf_ct_delete(ct, portid, report); 1879 nf_ct_delete(ct, portid, report);
1892 nf_ct_put(ct); 1880 nf_ct_put(ct);
1893 cond_resched(); 1881 cond_resched();
1894 } 1882 }
1895 1883
1896 if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) 1884 if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
1897 break; 1885 break;
1898 bucket = 0; 1886 bucket = 0;
1899 } 1887 }
1900 } 1888 }
1901 1889
1902 struct iter_data { 1890 struct iter_data {
1903 int (*iter)(struct nf_conn *i, void *data); 1891 int (*iter)(struct nf_conn *i, void *data);
1904 void *data; 1892 void *data;
1905 struct net *net; 1893 struct net *net;
1906 }; 1894 };
1907 1895
1908 static int iter_net_only(struct nf_conn *i, void *data) 1896 static int iter_net_only(struct nf_conn *i, void *data)
1909 { 1897 {
1910 struct iter_data *d = data; 1898 struct iter_data *d = data;
1911 1899
1912 if (!net_eq(d->net, nf_ct_net(i))) 1900 if (!net_eq(d->net, nf_ct_net(i)))
1913 return 0; 1901 return 0;
1914 1902
1915 return d->iter(i, d->data); 1903 return d->iter(i, d->data);
1916 } 1904 }
1917 1905
1918 static void 1906 static void
1919 __nf_ct_unconfirmed_destroy(struct net *net) 1907 __nf_ct_unconfirmed_destroy(struct net *net)
1920 { 1908 {
1921 int cpu; 1909 int cpu;
1922 1910
1923 for_each_possible_cpu(cpu) { 1911 for_each_possible_cpu(cpu) {
1924 struct nf_conntrack_tuple_hash *h; 1912 struct nf_conntrack_tuple_hash *h;
1925 struct hlist_nulls_node *n; 1913 struct hlist_nulls_node *n;
1926 struct ct_pcpu *pcpu; 1914 struct ct_pcpu *pcpu;
1927 1915
1928 pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); 1916 pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1929 1917
1930 spin_lock_bh(&pcpu->lock); 1918 spin_lock_bh(&pcpu->lock);
1931 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { 1919 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
1932 struct nf_conn *ct; 1920 struct nf_conn *ct;
1933 1921
1934 ct = nf_ct_tuplehash_to_ctrack(h); 1922 ct = nf_ct_tuplehash_to_ctrack(h);
1935 1923
1936 /* we cannot call iter() on unconfirmed list, the 1924 /* we cannot call iter() on unconfirmed list, the
1937 * owning cpu can reallocate ct->ext at any time. 1925 * owning cpu can reallocate ct->ext at any time.
1938 */ 1926 */
1939 set_bit(IPS_DYING_BIT, &ct->status); 1927 set_bit(IPS_DYING_BIT, &ct->status);
1940 } 1928 }
1941 spin_unlock_bh(&pcpu->lock); 1929 spin_unlock_bh(&pcpu->lock);
1942 cond_resched(); 1930 cond_resched();
1943 } 1931 }
1944 } 1932 }
1945 1933
1946 void nf_ct_unconfirmed_destroy(struct net *net) 1934 void nf_ct_unconfirmed_destroy(struct net *net)
1947 { 1935 {
1948 might_sleep(); 1936 might_sleep();
1949 1937
1950 if (atomic_read(&net->ct.count) > 0) { 1938 if (atomic_read(&net->ct.count) > 0) {
1951 __nf_ct_unconfirmed_destroy(net); 1939 __nf_ct_unconfirmed_destroy(net);
1952 nf_queue_nf_hook_drop(net); 1940 nf_queue_nf_hook_drop(net);
1953 synchronize_net(); 1941 synchronize_net();
1954 } 1942 }
1955 } 1943 }
1956 EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); 1944 EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
1957 1945
1958 void nf_ct_iterate_cleanup_net(struct net *net, 1946 void nf_ct_iterate_cleanup_net(struct net *net,
1959 int (*iter)(struct nf_conn *i, void *data), 1947 int (*iter)(struct nf_conn *i, void *data),
1960 void *data, u32 portid, int report) 1948 void *data, u32 portid, int report)
1961 { 1949 {
1962 struct iter_data d; 1950 struct iter_data d;
1963 1951
1964 might_sleep(); 1952 might_sleep();
1965 1953
1966 if (atomic_read(&net->ct.count) == 0) 1954 if (atomic_read(&net->ct.count) == 0)
1967 return; 1955 return;
1968 1956
1969 d.iter = iter; 1957 d.iter = iter;
1970 d.data = data; 1958 d.data = data;
1971 d.net = net; 1959 d.net = net;
1972 1960
1973 nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); 1961 nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
1974 } 1962 }
1975 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); 1963 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
1976 1964
1977 /** 1965 /**
1978 * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table 1966 * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table
1979 * @iter: callback to invoke for each conntrack 1967 * @iter: callback to invoke for each conntrack
1980 * @data: data to pass to @iter 1968 * @data: data to pass to @iter
1981 * 1969 *
1982 * Like nf_ct_iterate_cleanup, but first marks conntracks on the 1970 * Like nf_ct_iterate_cleanup, but first marks conntracks on the
1983 * unconfirmed list as dying (so they will not be inserted into 1971 * unconfirmed list as dying (so they will not be inserted into
1984 * main table). 1972 * main table).
1985 * 1973 *
1986 * Can only be called in module exit path. 1974 * Can only be called in module exit path.
1987 */ 1975 */
1988 void 1976 void
1989 nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) 1977 nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
1990 { 1978 {
1991 struct net *net; 1979 struct net *net;
1992 1980
1993 down_read(&net_rwsem); 1981 down_read(&net_rwsem);
1994 for_each_net(net) { 1982 for_each_net(net) {
1995 if (atomic_read(&net->ct.count) == 0) 1983 if (atomic_read(&net->ct.count) == 0)
1996 continue; 1984 continue;
1997 __nf_ct_unconfirmed_destroy(net); 1985 __nf_ct_unconfirmed_destroy(net);
1998 nf_queue_nf_hook_drop(net); 1986 nf_queue_nf_hook_drop(net);
1999 } 1987 }
2000 up_read(&net_rwsem); 1988 up_read(&net_rwsem);
2001 1989
2002 /* Need to wait for netns cleanup worker to finish, if its 1990 /* Need to wait for netns cleanup worker to finish, if its
2003 * running -- it might have deleted a net namespace from 1991 * running -- it might have deleted a net namespace from
2004 * the global list, so our __nf_ct_unconfirmed_destroy() might 1992 * the global list, so our __nf_ct_unconfirmed_destroy() might
2005 * not have affected all namespaces. 1993 * not have affected all namespaces.
2006 */ 1994 */
2007 net_ns_barrier(); 1995 net_ns_barrier();
2008 1996
2009 /* a conntrack could have been unlinked from unconfirmed list 1997 /* a conntrack could have been unlinked from unconfirmed list
2010 * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy(). 1998 * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
2011 * This makes sure its inserted into conntrack table. 1999 * This makes sure its inserted into conntrack table.
2012 */ 2000 */
2013 synchronize_net(); 2001 synchronize_net();
2014 2002
2015 nf_ct_iterate_cleanup(iter, data, 0, 0); 2003 nf_ct_iterate_cleanup(iter, data, 0, 0);
2016 } 2004 }
2017 EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); 2005 EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
2018 2006
2019 static int kill_all(struct nf_conn *i, void *data) 2007 static int kill_all(struct nf_conn *i, void *data)
2020 { 2008 {
2021 return net_eq(nf_ct_net(i), data); 2009 return net_eq(nf_ct_net(i), data);
2022 } 2010 }
2023 2011
2024 void nf_ct_free_hashtable(void *hash, unsigned int size) 2012 void nf_ct_free_hashtable(void *hash, unsigned int size)
2025 { 2013 {
2026 if (is_vmalloc_addr(hash)) 2014 if (is_vmalloc_addr(hash))
2027 vfree(hash); 2015 vfree(hash);
2028 else 2016 else
2029 free_pages((unsigned long)hash, 2017 free_pages((unsigned long)hash,
2030 get_order(sizeof(struct hlist_head) * size)); 2018 get_order(sizeof(struct hlist_head) * size));
2031 } 2019 }
2032 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 2020 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
2033 2021
2034 void nf_conntrack_cleanup_start(void) 2022 void nf_conntrack_cleanup_start(void)
2035 { 2023 {
2036 conntrack_gc_work.exiting = true; 2024 conntrack_gc_work.exiting = true;
2037 RCU_INIT_POINTER(ip_ct_attach, NULL); 2025 RCU_INIT_POINTER(ip_ct_attach, NULL);
2038 } 2026 }
2039 2027
2040 void nf_conntrack_cleanup_end(void) 2028 void nf_conntrack_cleanup_end(void)
2041 { 2029 {
2042 RCU_INIT_POINTER(nf_ct_hook, NULL); 2030 RCU_INIT_POINTER(nf_ct_hook, NULL);
2043 cancel_delayed_work_sync(&conntrack_gc_work.dwork); 2031 cancel_delayed_work_sync(&conntrack_gc_work.dwork);
2044 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 2032 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
2045 2033
2046 nf_conntrack_proto_fini(); 2034 nf_conntrack_proto_fini();
2047 nf_conntrack_seqadj_fini(); 2035 nf_conntrack_seqadj_fini();
2048 nf_conntrack_labels_fini(); 2036 nf_conntrack_labels_fini();
2049 nf_conntrack_helper_fini(); 2037 nf_conntrack_helper_fini();
2050 nf_conntrack_timeout_fini(); 2038 nf_conntrack_timeout_fini();
2051 nf_conntrack_ecache_fini(); 2039 nf_conntrack_ecache_fini();
2052 nf_conntrack_tstamp_fini(); 2040 nf_conntrack_tstamp_fini();
2053 nf_conntrack_acct_fini(); 2041 nf_conntrack_acct_fini();
2054 nf_conntrack_expect_fini(); 2042 nf_conntrack_expect_fini();
2055 2043
2056 kmem_cache_destroy(nf_conntrack_cachep); 2044 kmem_cache_destroy(nf_conntrack_cachep);
2057 } 2045 }
2058 2046
2059 /* 2047 /*
2060 * Mishearing the voices in his head, our hero wonders how he's 2048 * Mishearing the voices in his head, our hero wonders how he's
2061 * supposed to kill the mall. 2049 * supposed to kill the mall.
2062 */ 2050 */
2063 void nf_conntrack_cleanup_net(struct net *net) 2051 void nf_conntrack_cleanup_net(struct net *net)
2064 { 2052 {
2065 LIST_HEAD(single); 2053 LIST_HEAD(single);
2066 2054
2067 list_add(&net->exit_list, &single); 2055 list_add(&net->exit_list, &single);
2068 nf_conntrack_cleanup_net_list(&single); 2056 nf_conntrack_cleanup_net_list(&single);
2069 } 2057 }
2070 2058
2071 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) 2059 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
2072 { 2060 {
2073 int busy; 2061 int busy;
2074 struct net *net; 2062 struct net *net;
2075 2063
2076 /* 2064 /*
2077 * This makes sure all current packets have passed through 2065 * This makes sure all current packets have passed through
2078 * netfilter framework. Roll on, two-stage module 2066 * netfilter framework. Roll on, two-stage module
2079 * delete... 2067 * delete...
2080 */ 2068 */
2081 synchronize_net(); 2069 synchronize_net();
2082 i_see_dead_people: 2070 i_see_dead_people:
2083 busy = 0; 2071 busy = 0;
2084 list_for_each_entry(net, net_exit_list, exit_list) { 2072 list_for_each_entry(net, net_exit_list, exit_list) {
2085 nf_ct_iterate_cleanup(kill_all, net, 0, 0); 2073 nf_ct_iterate_cleanup(kill_all, net, 0, 0);
2086 if (atomic_read(&net->ct.count) != 0) 2074 if (atomic_read(&net->ct.count) != 0)
2087 busy = 1; 2075 busy = 1;
2088 } 2076 }
2089 if (busy) { 2077 if (busy) {
2090 schedule(); 2078 schedule();
2091 goto i_see_dead_people; 2079 goto i_see_dead_people;
2092 } 2080 }
2093 2081
2094 list_for_each_entry(net, net_exit_list, exit_list) { 2082 list_for_each_entry(net, net_exit_list, exit_list) {
2095 nf_conntrack_proto_pernet_fini(net); 2083 nf_conntrack_proto_pernet_fini(net);
2096 nf_conntrack_helper_pernet_fini(net); 2084 nf_conntrack_helper_pernet_fini(net);
2097 nf_conntrack_ecache_pernet_fini(net); 2085 nf_conntrack_ecache_pernet_fini(net);
2098 nf_conntrack_tstamp_pernet_fini(net); 2086 nf_conntrack_tstamp_pernet_fini(net);
2099 nf_conntrack_acct_pernet_fini(net); 2087 nf_conntrack_acct_pernet_fini(net);
2100 nf_conntrack_expect_pernet_fini(net); 2088 nf_conntrack_expect_pernet_fini(net);
2101 free_percpu(net->ct.stat); 2089 free_percpu(net->ct.stat);
2102 free_percpu(net->ct.pcpu_lists); 2090 free_percpu(net->ct.pcpu_lists);
2103 } 2091 }
2104 } 2092 }
2105 2093
2106 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 2094 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
2107 { 2095 {
2108 struct hlist_nulls_head *hash; 2096 struct hlist_nulls_head *hash;
2109 unsigned int nr_slots, i; 2097 unsigned int nr_slots, i;
2110 size_t sz; 2098 size_t sz;
2111 2099
2112 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) 2100 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
2113 return NULL; 2101 return NULL;
2114 2102
2115 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 2103 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
2116 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 2104 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
2117 2105
2118 if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head))) 2106 if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
2119 return NULL; 2107 return NULL;
2120 2108
2121 sz = nr_slots * sizeof(struct hlist_nulls_head); 2109 sz = nr_slots * sizeof(struct hlist_nulls_head);
2122 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2110 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
2123 get_order(sz)); 2111 get_order(sz));
2124 if (!hash) 2112 if (!hash)
2125 hash = vzalloc(sz); 2113 hash = vzalloc(sz);
2126 2114
2127 if (hash && nulls) 2115 if (hash && nulls)
2128 for (i = 0; i < nr_slots; i++) 2116 for (i = 0; i < nr_slots; i++)
2129 INIT_HLIST_NULLS_HEAD(&hash[i], i); 2117 INIT_HLIST_NULLS_HEAD(&hash[i], i);
2130 2118
2131 return hash; 2119 return hash;
2132 } 2120 }
2133 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 2121 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
2134 2122
2135 int nf_conntrack_hash_resize(unsigned int hashsize) 2123 int nf_conntrack_hash_resize(unsigned int hashsize)
2136 { 2124 {
2137 int i, bucket; 2125 int i, bucket;
2138 unsigned int old_size; 2126 unsigned int old_size;
2139 struct hlist_nulls_head *hash, *old_hash; 2127 struct hlist_nulls_head *hash, *old_hash;
2140 struct nf_conntrack_tuple_hash *h; 2128 struct nf_conntrack_tuple_hash *h;
2141 struct nf_conn *ct; 2129 struct nf_conn *ct;
2142 2130
2143 if (!hashsize) 2131 if (!hashsize)
2144 return -EINVAL; 2132 return -EINVAL;
2145 2133
2146 hash = nf_ct_alloc_hashtable(&hashsize, 1); 2134 hash = nf_ct_alloc_hashtable(&hashsize, 1);
2147 if (!hash) 2135 if (!hash)
2148 return -ENOMEM; 2136 return -ENOMEM;
2149 2137
2150 old_size = nf_conntrack_htable_size; 2138 old_size = nf_conntrack_htable_size;
2151 if (old_size == hashsize) { 2139 if (old_size == hashsize) {
2152 nf_ct_free_hashtable(hash, hashsize); 2140 nf_ct_free_hashtable(hash, hashsize);
2153 return 0; 2141 return 0;
2154 } 2142 }
2155 2143
2156 local_bh_disable(); 2144 local_bh_disable();
2157 nf_conntrack_all_lock(); 2145 nf_conntrack_all_lock();
2158 write_seqcount_begin(&nf_conntrack_generation); 2146 write_seqcount_begin(&nf_conntrack_generation);
2159 2147
2160 /* Lookups in the old hash might happen in parallel, which means we 2148 /* Lookups in the old hash might happen in parallel, which means we
2161 * might get false negatives during connection lookup. New connections 2149 * might get false negatives during connection lookup. New connections
2162 * created because of a false negative won't make it into the hash 2150 * created because of a false negative won't make it into the hash
2163 * though since that required taking the locks. 2151 * though since that required taking the locks.
2164 */ 2152 */
2165 2153
2166 for (i = 0; i < nf_conntrack_htable_size; i++) { 2154 for (i = 0; i < nf_conntrack_htable_size; i++) {
2167 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { 2155 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
2168 h = hlist_nulls_entry(nf_conntrack_hash[i].first, 2156 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
2169 struct nf_conntrack_tuple_hash, hnnode); 2157 struct nf_conntrack_tuple_hash, hnnode);
2170 ct = nf_ct_tuplehash_to_ctrack(h); 2158 ct = nf_ct_tuplehash_to_ctrack(h);
2171 hlist_nulls_del_rcu(&h->hnnode); 2159 hlist_nulls_del_rcu(&h->hnnode);
2172 bucket = __hash_conntrack(nf_ct_net(ct), 2160 bucket = __hash_conntrack(nf_ct_net(ct),
2173 &h->tuple, hashsize); 2161 &h->tuple, hashsize);
2174 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 2162 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
2175 } 2163 }
2176 } 2164 }
2177 old_size = nf_conntrack_htable_size; 2165 old_size = nf_conntrack_htable_size;
2178 old_hash = nf_conntrack_hash; 2166 old_hash = nf_conntrack_hash;
2179 2167
2180 nf_conntrack_hash = hash; 2168 nf_conntrack_hash = hash;
2181 nf_conntrack_htable_size = hashsize; 2169 nf_conntrack_htable_size = hashsize;
2182 2170
2183 write_seqcount_end(&nf_conntrack_generation); 2171 write_seqcount_end(&nf_conntrack_generation);
2184 nf_conntrack_all_unlock(); 2172 nf_conntrack_all_unlock();
2185 local_bh_enable(); 2173 local_bh_enable();
2186 2174
2187 synchronize_net(); 2175 synchronize_net();
2188 nf_ct_free_hashtable(old_hash, old_size); 2176 nf_ct_free_hashtable(old_hash, old_size);
2189 return 0; 2177 return 0;
2190 } 2178 }
2191 2179
2192 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) 2180 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
2193 { 2181 {
2194 unsigned int hashsize; 2182 unsigned int hashsize;
2195 int rc; 2183 int rc;
2196 2184
2197 if (current->nsproxy->net_ns != &init_net) 2185 if (current->nsproxy->net_ns != &init_net)
2198 return -EOPNOTSUPP; 2186 return -EOPNOTSUPP;
2199 2187
2200 /* On boot, we can set this without any fancy locking. */ 2188 /* On boot, we can set this without any fancy locking. */
2201 if (!nf_conntrack_htable_size) 2189 if (!nf_conntrack_htable_size)
2202 return param_set_uint(val, kp); 2190 return param_set_uint(val, kp);
2203 2191
2204 rc = kstrtouint(val, 0, &hashsize); 2192 rc = kstrtouint(val, 0, &hashsize);
2205 if (rc) 2193 if (rc)
2206 return rc; 2194 return rc;
2207 2195
2208 return nf_conntrack_hash_resize(hashsize); 2196 return nf_conntrack_hash_resize(hashsize);
2209 } 2197 }
2210 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 2198 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
2211 2199
2212 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 2200 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
2213 &nf_conntrack_htable_size, 0600); 2201 &nf_conntrack_htable_size, 0600);
2214 2202
2215 static __always_inline unsigned int total_extension_size(void) 2203 static __always_inline unsigned int total_extension_size(void)
2216 { 2204 {
2217 /* remember to add new extensions below */ 2205 /* remember to add new extensions below */
2218 BUILD_BUG_ON(NF_CT_EXT_NUM > 9); 2206 BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
2219 2207
2220 return sizeof(struct nf_ct_ext) + 2208 return sizeof(struct nf_ct_ext) +
2221 sizeof(struct nf_conn_help) 2209 sizeof(struct nf_conn_help)
2222 #if IS_ENABLED(CONFIG_NF_NAT) 2210 #if IS_ENABLED(CONFIG_NF_NAT)
2223 + sizeof(struct nf_conn_nat) 2211 + sizeof(struct nf_conn_nat)
2224 #endif 2212 #endif
2225 + sizeof(struct nf_conn_seqadj) 2213 + sizeof(struct nf_conn_seqadj)
2226 + sizeof(struct nf_conn_acct) 2214 + sizeof(struct nf_conn_acct)
2227 #ifdef CONFIG_NF_CONNTRACK_EVENTS 2215 #ifdef CONFIG_NF_CONNTRACK_EVENTS
2228 + sizeof(struct nf_conntrack_ecache) 2216 + sizeof(struct nf_conntrack_ecache)
2229 #endif 2217 #endif
2230 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 2218 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
2231 + sizeof(struct nf_conn_tstamp) 2219 + sizeof(struct nf_conn_tstamp)
2232 #endif 2220 #endif
2233 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 2221 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
2234 + sizeof(struct nf_conn_timeout) 2222 + sizeof(struct nf_conn_timeout)
2235 #endif 2223 #endif
2236 #ifdef CONFIG_NF_CONNTRACK_LABELS 2224 #ifdef CONFIG_NF_CONNTRACK_LABELS
2237 + sizeof(struct nf_conn_labels) 2225 + sizeof(struct nf_conn_labels)
2238 #endif 2226 #endif
2239 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) 2227 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
2240 + sizeof(struct nf_conn_synproxy) 2228 + sizeof(struct nf_conn_synproxy)
2241 #endif 2229 #endif
2242 ; 2230 ;
2243 }; 2231 };
2244 2232
2245 int nf_conntrack_init_start(void) 2233 int nf_conntrack_init_start(void)
2246 { 2234 {
2247 int max_factor = 8; 2235 int max_factor = 8;
2248 int ret = -ENOMEM; 2236 int ret = -ENOMEM;
2249 int i; 2237 int i;
2250 2238
2251 /* struct nf_ct_ext uses u8 to store offsets/size */ 2239 /* struct nf_ct_ext uses u8 to store offsets/size */
2252 BUILD_BUG_ON(total_extension_size() > 255u); 2240 BUILD_BUG_ON(total_extension_size() > 255u);
2253 2241
2254 seqcount_init(&nf_conntrack_generation); 2242 seqcount_init(&nf_conntrack_generation);
2255 2243
2256 for (i = 0; i < CONNTRACK_LOCKS; i++) 2244 for (i = 0; i < CONNTRACK_LOCKS; i++)
2257 spin_lock_init(&nf_conntrack_locks[i]); 2245 spin_lock_init(&nf_conntrack_locks[i]);
2258 2246
2259 if (!nf_conntrack_htable_size) { 2247 if (!nf_conntrack_htable_size) {
2260 /* Idea from tcp.c: use 1/16384 of memory. 2248 /* Idea from tcp.c: use 1/16384 of memory.
2261 * On i386: 32MB machine has 512 buckets. 2249 * On i386: 32MB machine has 512 buckets.
2262 * >= 1GB machines have 16384 buckets. 2250 * >= 1GB machines have 16384 buckets.
2263 * >= 4GB machines have 65536 buckets. 2251 * >= 4GB machines have 65536 buckets.
2264 */ 2252 */
2265 nf_conntrack_htable_size 2253 nf_conntrack_htable_size
2266 = (((totalram_pages << PAGE_SHIFT) / 16384) 2254 = (((totalram_pages << PAGE_SHIFT) / 16384)
2267 / sizeof(struct hlist_head)); 2255 / sizeof(struct hlist_head));
2268 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) 2256 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
2269 nf_conntrack_htable_size = 65536; 2257 nf_conntrack_htable_size = 65536;
2270 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) 2258 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
2271 nf_conntrack_htable_size = 16384; 2259 nf_conntrack_htable_size = 16384;
2272 if (nf_conntrack_htable_size < 32) 2260 if (nf_conntrack_htable_size < 32)
2273 nf_conntrack_htable_size = 32; 2261 nf_conntrack_htable_size = 32;
2274 2262
2275 /* Use a max. factor of four by default to get the same max as 2263 /* Use a max. factor of four by default to get the same max as
2276 * with the old struct list_heads. When a table size is given 2264 * with the old struct list_heads. When a table size is given
2277 * we use the old value of 8 to avoid reducing the max. 2265 * we use the old value of 8 to avoid reducing the max.
2278 * entries. */ 2266 * entries. */
2279 max_factor = 4; 2267 max_factor = 4;
2280 } 2268 }
2281 2269
2282 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); 2270 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
2283 if (!nf_conntrack_hash) 2271 if (!nf_conntrack_hash)
2284 return -ENOMEM; 2272 return -ENOMEM;
2285 2273
2286 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 2274 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
2287 2275
2288 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 2276 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
2289 sizeof(struct nf_conn), 2277 sizeof(struct nf_conn),
2290 NFCT_INFOMASK + 1, 2278 NFCT_INFOMASK + 1,
2291 SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); 2279 SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
2292 if (!nf_conntrack_cachep) 2280 if (!nf_conntrack_cachep)
2293 goto err_cachep; 2281 goto err_cachep;
2294 2282
2295 ret = nf_conntrack_expect_init(); 2283 ret = nf_conntrack_expect_init();
2296 if (ret < 0) 2284 if (ret < 0)
2297 goto err_expect; 2285 goto err_expect;
2298 2286
2299 ret = nf_conntrack_acct_init(); 2287 ret = nf_conntrack_acct_init();
2300 if (ret < 0) 2288 if (ret < 0)
2301 goto err_acct; 2289 goto err_acct;
2302 2290
2303 ret = nf_conntrack_tstamp_init(); 2291 ret = nf_conntrack_tstamp_init();
2304 if (ret < 0) 2292 if (ret < 0)
2305 goto err_tstamp; 2293 goto err_tstamp;
2306 2294
2307 ret = nf_conntrack_ecache_init(); 2295 ret = nf_conntrack_ecache_init();
2308 if (ret < 0) 2296 if (ret < 0)
2309 goto err_ecache; 2297 goto err_ecache;
2310 2298
2311 ret = nf_conntrack_timeout_init(); 2299 ret = nf_conntrack_timeout_init();
2312 if (ret < 0) 2300 if (ret < 0)
2313 goto err_timeout; 2301 goto err_timeout;
2314 2302
2315 ret = nf_conntrack_helper_init(); 2303 ret = nf_conntrack_helper_init();
2316 if (ret < 0) 2304 if (ret < 0)
2317 goto err_helper; 2305 goto err_helper;
2318 2306
2319 ret = nf_conntrack_labels_init(); 2307 ret = nf_conntrack_labels_init();
2320 if (ret < 0) 2308 if (ret < 0)
2321 goto err_labels; 2309 goto err_labels;
2322 2310
2323 ret = nf_conntrack_seqadj_init(); 2311 ret = nf_conntrack_seqadj_init();
2324 if (ret < 0) 2312 if (ret < 0)
2325 goto err_seqadj; 2313 goto err_seqadj;
2326 2314
2327 ret = nf_conntrack_proto_init(); 2315 ret = nf_conntrack_proto_init();
2328 if (ret < 0) 2316 if (ret < 0)
2329 goto err_proto; 2317 goto err_proto;
2330 2318
2331 conntrack_gc_work_init(&conntrack_gc_work); 2319 conntrack_gc_work_init(&conntrack_gc_work);
2332 queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); 2320 queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
2333 2321
2334 return 0; 2322 return 0;
2335 2323
2336 err_proto: 2324 err_proto:
2337 nf_conntrack_seqadj_fini(); 2325 nf_conntrack_seqadj_fini();
2338 err_seqadj: 2326 err_seqadj:
2339 nf_conntrack_labels_fini(); 2327 nf_conntrack_labels_fini();
2340 err_labels: 2328 err_labels:
2341 nf_conntrack_helper_fini(); 2329 nf_conntrack_helper_fini();
2342 err_helper: 2330 err_helper:
2343 nf_conntrack_timeout_fini(); 2331 nf_conntrack_timeout_fini();
2344 err_timeout: 2332 err_timeout:
2345 nf_conntrack_ecache_fini(); 2333 nf_conntrack_ecache_fini();
2346 err_ecache: 2334 err_ecache:
2347 nf_conntrack_tstamp_fini(); 2335 nf_conntrack_tstamp_fini();
2348 err_tstamp: 2336 err_tstamp:
2349 nf_conntrack_acct_fini(); 2337 nf_conntrack_acct_fini();
2350 err_acct: 2338 err_acct:
2351 nf_conntrack_expect_fini(); 2339 nf_conntrack_expect_fini();
2352 err_expect: 2340 err_expect:
2353 kmem_cache_destroy(nf_conntrack_cachep); 2341 kmem_cache_destroy(nf_conntrack_cachep);
2354 err_cachep: 2342 err_cachep:
2355 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 2343 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
2356 return ret; 2344 return ret;
2357 } 2345 }
2358 2346
2359 static struct nf_ct_hook nf_conntrack_hook = { 2347 static struct nf_ct_hook nf_conntrack_hook = {
2360 .update = nf_conntrack_update, 2348 .update = nf_conntrack_update,
2361 .destroy = destroy_conntrack, 2349 .destroy = destroy_conntrack,
2362 .get_tuple_skb = nf_conntrack_get_tuple_skb, 2350 .get_tuple_skb = nf_conntrack_get_tuple_skb,
2363 }; 2351 };
2364 2352
2365 void nf_conntrack_init_end(void) 2353 void nf_conntrack_init_end(void)
2366 { 2354 {
2367 /* For use by REJECT target */ 2355 /* For use by REJECT target */
2368 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); 2356 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
2369 RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); 2357 RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook);
2370 } 2358 }
2371 2359
2372 /* 2360 /*
2373 * We need to use special "null" values, not used in hash table 2361 * We need to use special "null" values, not used in hash table
2374 */ 2362 */
2375 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) 2363 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
2376 #define DYING_NULLS_VAL ((1<<30)+1) 2364 #define DYING_NULLS_VAL ((1<<30)+1)
2377 #define TEMPLATE_NULLS_VAL ((1<<30)+2) 2365 #define TEMPLATE_NULLS_VAL ((1<<30)+2)
2378 2366
2379 int nf_conntrack_init_net(struct net *net) 2367 int nf_conntrack_init_net(struct net *net)
2380 { 2368 {
2381 int ret = -ENOMEM; 2369 int ret = -ENOMEM;
2382 int cpu; 2370 int cpu;
2383 2371
2384 BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); 2372 BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
2385 atomic_set(&net->ct.count, 0); 2373 atomic_set(&net->ct.count, 0);
2386 2374
2387 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); 2375 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
2388 if (!net->ct.pcpu_lists) 2376 if (!net->ct.pcpu_lists)
2389 goto err_stat; 2377 goto err_stat;
2390 2378
2391 for_each_possible_cpu(cpu) { 2379 for_each_possible_cpu(cpu) {
2392 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); 2380 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
2393 2381
2394 spin_lock_init(&pcpu->lock); 2382 spin_lock_init(&pcpu->lock);
2395 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); 2383 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
2396 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); 2384 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
2397 } 2385 }
2398 2386
2399 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 2387 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
2400 if (!net->ct.stat) 2388 if (!net->ct.stat)
2401 goto err_pcpu_lists; 2389 goto err_pcpu_lists;
2402 2390
2403 ret = nf_conntrack_expect_pernet_init(net); 2391 ret = nf_conntrack_expect_pernet_init(net);
2404 if (ret < 0) 2392 if (ret < 0)
2405 goto err_expect; 2393 goto err_expect;
2406 ret = nf_conntrack_acct_pernet_init(net); 2394 ret = nf_conntrack_acct_pernet_init(net);
2407 if (ret < 0) 2395 if (ret < 0)
2408 goto err_acct; 2396 goto err_acct;
2409 ret = nf_conntrack_tstamp_pernet_init(net); 2397 ret = nf_conntrack_tstamp_pernet_init(net);
2410 if (ret < 0) 2398 if (ret < 0)
2411 goto err_tstamp; 2399 goto err_tstamp;
2412 ret = nf_conntrack_ecache_pernet_init(net); 2400 ret = nf_conntrack_ecache_pernet_init(net);
2413 if (ret < 0) 2401 if (ret < 0)
2414 goto err_ecache; 2402 goto err_ecache;
2415 ret = nf_conntrack_helper_pernet_init(net); 2403 ret = nf_conntrack_helper_pernet_init(net);
2416 if (ret < 0) 2404 if (ret < 0)
2417 goto err_helper; 2405 goto err_helper;
2418 ret = nf_conntrack_proto_pernet_init(net); 2406 ret = nf_conntrack_proto_pernet_init(net);
2419 if (ret < 0) 2407 if (ret < 0)
2420 goto err_proto; 2408 goto err_proto;
2421 return 0; 2409 return 0;
2422 2410
2423 err_proto: 2411 err_proto:
2424 nf_conntrack_helper_pernet_fini(net); 2412 nf_conntrack_helper_pernet_fini(net);
2425 err_helper: 2413 err_helper:
2426 nf_conntrack_ecache_pernet_fini(net); 2414 nf_conntrack_ecache_pernet_fini(net);
2427 err_ecache: 2415 err_ecache:
2428 nf_conntrack_tstamp_pernet_fini(net); 2416 nf_conntrack_tstamp_pernet_fini(net);
2429 err_tstamp: 2417 err_tstamp:
2430 nf_conntrack_acct_pernet_fini(net); 2418 nf_conntrack_acct_pernet_fini(net);
2431 err_acct: 2419 err_acct:
2432 nf_conntrack_expect_pernet_fini(net); 2420 nf_conntrack_expect_pernet_fini(net);
2433 err_expect: 2421 err_expect:
2434 free_percpu(net->ct.stat); 2422 free_percpu(net->ct.stat);
2435 err_pcpu_lists: 2423 err_pcpu_lists:
2436 free_percpu(net->ct.pcpu_lists); 2424 free_percpu(net->ct.pcpu_lists);
2437 err_stat: 2425 err_stat:
2438 return ret; 2426 return ret;
2439 } 2427 }
2440 2428
net/netfilter/nf_conntrack_proto_dccp.c
1 /* 1 /*
2 * DCCP connection tracking protocol helper 2 * DCCP connection tracking protocol helper
3 * 3 *
4 * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net> 4 * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 * 9 *
10 */ 10 */
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/init.h> 12 #include <linux/init.h>
13 #include <linux/sysctl.h> 13 #include <linux/sysctl.h>
14 #include <linux/spinlock.h> 14 #include <linux/spinlock.h>
15 #include <linux/skbuff.h> 15 #include <linux/skbuff.h>
16 #include <linux/dccp.h> 16 #include <linux/dccp.h>
17 #include <linux/slab.h> 17 #include <linux/slab.h>
18 18
19 #include <net/net_namespace.h> 19 #include <net/net_namespace.h>
20 #include <net/netns/generic.h> 20 #include <net/netns/generic.h>
21 21
22 #include <linux/netfilter/nfnetlink_conntrack.h> 22 #include <linux/netfilter/nfnetlink_conntrack.h>
23 #include <net/netfilter/nf_conntrack.h> 23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_conntrack_l4proto.h> 24 #include <net/netfilter/nf_conntrack_l4proto.h>
25 #include <net/netfilter/nf_conntrack_ecache.h> 25 #include <net/netfilter/nf_conntrack_ecache.h>
26 #include <net/netfilter/nf_conntrack_timeout.h>
26 #include <net/netfilter/nf_log.h> 27 #include <net/netfilter/nf_log.h>
27 28
28 /* Timeouts are based on values from RFC4340: 29 /* Timeouts are based on values from RFC4340:
29 * 30 *
30 * - REQUEST: 31 * - REQUEST:
31 * 32 *
32 * 8.1.2. Client Request 33 * 8.1.2. Client Request
33 * 34 *
34 * A client MAY give up on its DCCP-Requests after some time 35 * A client MAY give up on its DCCP-Requests after some time
35 * (3 minutes, for example). 36 * (3 minutes, for example).
36 * 37 *
37 * - RESPOND: 38 * - RESPOND:
38 * 39 *
39 * 8.1.3. Server Response 40 * 8.1.3. Server Response
40 * 41 *
41 * It MAY also leave the RESPOND state for CLOSED after a timeout of 42 * It MAY also leave the RESPOND state for CLOSED after a timeout of
42 * not less than 4MSL (8 minutes); 43 * not less than 4MSL (8 minutes);
43 * 44 *
44 * - PARTOPEN: 45 * - PARTOPEN:
45 * 46 *
46 * 8.1.5. Handshake Completion 47 * 8.1.5. Handshake Completion
47 * 48 *
48 * If the client remains in PARTOPEN for more than 4MSL (8 minutes), 49 * If the client remains in PARTOPEN for more than 4MSL (8 minutes),
49 * it SHOULD reset the connection with Reset Code 2, "Aborted". 50 * it SHOULD reset the connection with Reset Code 2, "Aborted".
50 * 51 *
51 * - OPEN: 52 * - OPEN:
52 * 53 *
53 * The DCCP timestamp overflows after 11.9 hours. If the connection 54 * The DCCP timestamp overflows after 11.9 hours. If the connection
54 * stays idle this long the sequence number won't be recognized 55 * stays idle this long the sequence number won't be recognized
55 * as valid anymore. 56 * as valid anymore.
56 * 57 *
57 * - CLOSEREQ/CLOSING: 58 * - CLOSEREQ/CLOSING:
58 * 59 *
59 * 8.3. Termination 60 * 8.3. Termination
60 * 61 *
61 * The retransmission timer should initially be set to go off in two 62 * The retransmission timer should initially be set to go off in two
62 * round-trip times and should back off to not less than once every 63 * round-trip times and should back off to not less than once every
63 * 64 seconds ... 64 * 64 seconds ...
64 * 65 *
65 * - TIMEWAIT: 66 * - TIMEWAIT:
66 * 67 *
67 * 4.3. States 68 * 4.3. States
68 * 69 *
69 * A server or client socket remains in this state for 2MSL (4 minutes) 70 * A server or client socket remains in this state for 2MSL (4 minutes)
70 * after the connection has been town down, ... 71 * after the connection has been town down, ...
71 */ 72 */
72 73
73 #define DCCP_MSL (2 * 60 * HZ) 74 #define DCCP_MSL (2 * 60 * HZ)
74 75
75 static const char * const dccp_state_names[] = { 76 static const char * const dccp_state_names[] = {
76 [CT_DCCP_NONE] = "NONE", 77 [CT_DCCP_NONE] = "NONE",
77 [CT_DCCP_REQUEST] = "REQUEST", 78 [CT_DCCP_REQUEST] = "REQUEST",
78 [CT_DCCP_RESPOND] = "RESPOND", 79 [CT_DCCP_RESPOND] = "RESPOND",
79 [CT_DCCP_PARTOPEN] = "PARTOPEN", 80 [CT_DCCP_PARTOPEN] = "PARTOPEN",
80 [CT_DCCP_OPEN] = "OPEN", 81 [CT_DCCP_OPEN] = "OPEN",
81 [CT_DCCP_CLOSEREQ] = "CLOSEREQ", 82 [CT_DCCP_CLOSEREQ] = "CLOSEREQ",
82 [CT_DCCP_CLOSING] = "CLOSING", 83 [CT_DCCP_CLOSING] = "CLOSING",
83 [CT_DCCP_TIMEWAIT] = "TIMEWAIT", 84 [CT_DCCP_TIMEWAIT] = "TIMEWAIT",
84 [CT_DCCP_IGNORE] = "IGNORE", 85 [CT_DCCP_IGNORE] = "IGNORE",
85 [CT_DCCP_INVALID] = "INVALID", 86 [CT_DCCP_INVALID] = "INVALID",
86 }; 87 };
87 88
88 #define sNO CT_DCCP_NONE 89 #define sNO CT_DCCP_NONE
89 #define sRQ CT_DCCP_REQUEST 90 #define sRQ CT_DCCP_REQUEST
90 #define sRS CT_DCCP_RESPOND 91 #define sRS CT_DCCP_RESPOND
91 #define sPO CT_DCCP_PARTOPEN 92 #define sPO CT_DCCP_PARTOPEN
92 #define sOP CT_DCCP_OPEN 93 #define sOP CT_DCCP_OPEN
93 #define sCR CT_DCCP_CLOSEREQ 94 #define sCR CT_DCCP_CLOSEREQ
94 #define sCG CT_DCCP_CLOSING 95 #define sCG CT_DCCP_CLOSING
95 #define sTW CT_DCCP_TIMEWAIT 96 #define sTW CT_DCCP_TIMEWAIT
96 #define sIG CT_DCCP_IGNORE 97 #define sIG CT_DCCP_IGNORE
97 #define sIV CT_DCCP_INVALID 98 #define sIV CT_DCCP_INVALID
98 99
99 /* 100 /*
100 * DCCP state transition table 101 * DCCP state transition table
101 * 102 *
102 * The assumption is the same as for TCP tracking: 103 * The assumption is the same as for TCP tracking:
103 * 104 *
104 * We are the man in the middle. All the packets go through us but might 105 * We are the man in the middle. All the packets go through us but might
105 * get lost in transit to the destination. It is assumed that the destination 106 * get lost in transit to the destination. It is assumed that the destination
106 * can't receive segments we haven't seen. 107 * can't receive segments we haven't seen.
107 * 108 *
108 * The following states exist: 109 * The following states exist:
109 * 110 *
110 * NONE: Initial state, expecting Request 111 * NONE: Initial state, expecting Request
111 * REQUEST: Request seen, waiting for Response from server 112 * REQUEST: Request seen, waiting for Response from server
112 * RESPOND: Response from server seen, waiting for Ack from client 113 * RESPOND: Response from server seen, waiting for Ack from client
113 * PARTOPEN: Ack after Response seen, waiting for packet other than Response, 114 * PARTOPEN: Ack after Response seen, waiting for packet other than Response,
114 * Reset or Sync from server 115 * Reset or Sync from server
115 * OPEN: Packet other than Response, Reset or Sync seen 116 * OPEN: Packet other than Response, Reset or Sync seen
116 * CLOSEREQ: CloseReq from server seen, expecting Close from client 117 * CLOSEREQ: CloseReq from server seen, expecting Close from client
117 * CLOSING: Close seen, expecting Reset 118 * CLOSING: Close seen, expecting Reset
118 * TIMEWAIT: Reset seen 119 * TIMEWAIT: Reset seen
119 * IGNORE: Not determinable whether packet is valid 120 * IGNORE: Not determinable whether packet is valid
120 * 121 *
121 * Some states exist only on one side of the connection: REQUEST, RESPOND, 122 * Some states exist only on one side of the connection: REQUEST, RESPOND,
122 * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to 123 * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to
123 * the one it was in before. 124 * the one it was in before.
124 * 125 *
125 * Packets are marked as ignored (sIG) if we don't know if they're valid 126 * Packets are marked as ignored (sIG) if we don't know if they're valid
126 * (for example a reincarnation of a connection we didn't notice is dead 127 * (for example a reincarnation of a connection we didn't notice is dead
127 * already) and the server may send back a connection closing Reset or a 128 * already) and the server may send back a connection closing Reset or a
128 * Response. They're also used for Sync/SyncAck packets, which we don't 129 * Response. They're also used for Sync/SyncAck packets, which we don't
129 * care about. 130 * care about.
130 */ 131 */
131 static const u_int8_t 132 static const u_int8_t
132 dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = { 133 dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = {
133 [CT_DCCP_ROLE_CLIENT] = { 134 [CT_DCCP_ROLE_CLIENT] = {
134 [DCCP_PKT_REQUEST] = { 135 [DCCP_PKT_REQUEST] = {
135 /* 136 /*
136 * sNO -> sRQ Regular Request 137 * sNO -> sRQ Regular Request
137 * sRQ -> sRQ Retransmitted Request or reincarnation 138 * sRQ -> sRQ Retransmitted Request or reincarnation
138 * sRS -> sRS Retransmitted Request (apparently Response 139 * sRS -> sRS Retransmitted Request (apparently Response
139 * got lost after we saw it) or reincarnation 140 * got lost after we saw it) or reincarnation
140 * sPO -> sIG Ignore, conntrack might be out of sync 141 * sPO -> sIG Ignore, conntrack might be out of sync
141 * sOP -> sIG Ignore, conntrack might be out of sync 142 * sOP -> sIG Ignore, conntrack might be out of sync
142 * sCR -> sIG Ignore, conntrack might be out of sync 143 * sCR -> sIG Ignore, conntrack might be out of sync
143 * sCG -> sIG Ignore, conntrack might be out of sync 144 * sCG -> sIG Ignore, conntrack might be out of sync
144 * sTW -> sRQ Reincarnation 145 * sTW -> sRQ Reincarnation
145 * 146 *
146 * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */ 147 * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */
147 sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ, 148 sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ,
148 }, 149 },
149 [DCCP_PKT_RESPONSE] = { 150 [DCCP_PKT_RESPONSE] = {
150 /* 151 /*
151 * sNO -> sIV Invalid 152 * sNO -> sIV Invalid
152 * sRQ -> sIG Ignore, might be response to ignored Request 153 * sRQ -> sIG Ignore, might be response to ignored Request
153 * sRS -> sIG Ignore, might be response to ignored Request 154 * sRS -> sIG Ignore, might be response to ignored Request
154 * sPO -> sIG Ignore, might be response to ignored Request 155 * sPO -> sIG Ignore, might be response to ignored Request
155 * sOP -> sIG Ignore, might be response to ignored Request 156 * sOP -> sIG Ignore, might be response to ignored Request
156 * sCR -> sIG Ignore, might be response to ignored Request 157 * sCR -> sIG Ignore, might be response to ignored Request
157 * sCG -> sIG Ignore, might be response to ignored Request 158 * sCG -> sIG Ignore, might be response to ignored Request
158 * sTW -> sIV Invalid, reincarnation in reverse direction 159 * sTW -> sIV Invalid, reincarnation in reverse direction
159 * goes through sRQ 160 * goes through sRQ
160 * 161 *
161 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 162 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
162 sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV, 163 sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV,
163 }, 164 },
164 [DCCP_PKT_ACK] = { 165 [DCCP_PKT_ACK] = {
165 /* 166 /*
166 * sNO -> sIV No connection 167 * sNO -> sIV No connection
167 * sRQ -> sIV No connection 168 * sRQ -> sIV No connection
168 * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) 169 * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
169 * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN 170 * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN
170 * sOP -> sOP Regular ACK, remain in OPEN 171 * sOP -> sOP Regular ACK, remain in OPEN
171 * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.) 172 * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.)
172 * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) 173 * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
173 * sTW -> sIV 174 * sTW -> sIV
174 * 175 *
175 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 176 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
176 sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV 177 sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
177 }, 178 },
178 [DCCP_PKT_DATA] = { 179 [DCCP_PKT_DATA] = {
179 /* 180 /*
180 * sNO -> sIV No connection 181 * sNO -> sIV No connection
181 * sRQ -> sIV No connection 182 * sRQ -> sIV No connection
182 * sRS -> sIV No connection 183 * sRS -> sIV No connection
183 * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.) 184 * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.)
184 * sOP -> sOP Regular Data packet 185 * sOP -> sOP Regular Data packet
185 * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.) 186 * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.)
186 * sCG -> sCG Data in CLOSING MAY be processed (8.3.) 187 * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
187 * sTW -> sIV 188 * sTW -> sIV
188 * 189 *
189 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 190 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
190 sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV, 191 sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV,
191 }, 192 },
192 [DCCP_PKT_DATAACK] = { 193 [DCCP_PKT_DATAACK] = {
193 /* 194 /*
194 * sNO -> sIV No connection 195 * sNO -> sIV No connection
195 * sRQ -> sIV No connection 196 * sRQ -> sIV No connection
196 * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.) 197 * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
197 * sPO -> sPO Remain in PARTOPEN state 198 * sPO -> sPO Remain in PARTOPEN state
198 * sOP -> sOP Regular DataAck packet in OPEN state 199 * sOP -> sOP Regular DataAck packet in OPEN state
199 * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.) 200 * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.)
200 * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.) 201 * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.)
201 * sTW -> sIV 202 * sTW -> sIV
202 * 203 *
203 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 204 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
204 sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV 205 sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
205 }, 206 },
206 [DCCP_PKT_CLOSEREQ] = { 207 [DCCP_PKT_CLOSEREQ] = {
207 /* 208 /*
208 * CLOSEREQ may only be sent by the server. 209 * CLOSEREQ may only be sent by the server.
209 * 210 *
210 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 211 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
211 sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV 212 sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV
212 }, 213 },
213 [DCCP_PKT_CLOSE] = { 214 [DCCP_PKT_CLOSE] = {
214 /* 215 /*
215 * sNO -> sIV No connection 216 * sNO -> sIV No connection
216 * sRQ -> sIV No connection 217 * sRQ -> sIV No connection
217 * sRS -> sIV No connection 218 * sRS -> sIV No connection
218 * sPO -> sCG Client-initiated close 219 * sPO -> sCG Client-initiated close
219 * sOP -> sCG Client-initiated close 220 * sOP -> sCG Client-initiated close
220 * sCR -> sCG Close in response to CloseReq (8.3.) 221 * sCR -> sCG Close in response to CloseReq (8.3.)
221 * sCG -> sCG Retransmit 222 * sCG -> sCG Retransmit
222 * sTW -> sIV Late retransmit, already in TIME_WAIT 223 * sTW -> sIV Late retransmit, already in TIME_WAIT
223 * 224 *
224 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 225 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
225 sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV 226 sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV
226 }, 227 },
227 [DCCP_PKT_RESET] = { 228 [DCCP_PKT_RESET] = {
228 /* 229 /*
229 * sNO -> sIV No connection 230 * sNO -> sIV No connection
230 * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.) 231 * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.)
231 * sRS -> sTW Response received without Request 232 * sRS -> sTW Response received without Request
232 * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.) 233 * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.)
233 * sOP -> sTW Connection reset 234 * sOP -> sTW Connection reset
234 * sCR -> sTW Connection reset 235 * sCR -> sTW Connection reset
235 * sCG -> sTW Connection reset 236 * sCG -> sTW Connection reset
236 * sTW -> sIG Ignore (don't refresh timer) 237 * sTW -> sIG Ignore (don't refresh timer)
237 * 238 *
238 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 239 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
239 sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG 240 sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG
240 }, 241 },
241 [DCCP_PKT_SYNC] = { 242 [DCCP_PKT_SYNC] = {
242 /* 243 /*
243 * We currently ignore Sync packets 244 * We currently ignore Sync packets
244 * 245 *
245 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 246 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
246 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, 247 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
247 }, 248 },
248 [DCCP_PKT_SYNCACK] = { 249 [DCCP_PKT_SYNCACK] = {
249 /* 250 /*
250 * We currently ignore SyncAck packets 251 * We currently ignore SyncAck packets
251 * 252 *
252 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 253 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
253 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, 254 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
254 }, 255 },
255 }, 256 },
256 [CT_DCCP_ROLE_SERVER] = { 257 [CT_DCCP_ROLE_SERVER] = {
257 [DCCP_PKT_REQUEST] = { 258 [DCCP_PKT_REQUEST] = {
258 /* 259 /*
259 * sNO -> sIV Invalid 260 * sNO -> sIV Invalid
260 * sRQ -> sIG Ignore, conntrack might be out of sync 261 * sRQ -> sIG Ignore, conntrack might be out of sync
261 * sRS -> sIG Ignore, conntrack might be out of sync 262 * sRS -> sIG Ignore, conntrack might be out of sync
262 * sPO -> sIG Ignore, conntrack might be out of sync 263 * sPO -> sIG Ignore, conntrack might be out of sync
263 * sOP -> sIG Ignore, conntrack might be out of sync 264 * sOP -> sIG Ignore, conntrack might be out of sync
264 * sCR -> sIG Ignore, conntrack might be out of sync 265 * sCR -> sIG Ignore, conntrack might be out of sync
265 * sCG -> sIG Ignore, conntrack might be out of sync 266 * sCG -> sIG Ignore, conntrack might be out of sync
266 * sTW -> sRQ Reincarnation, must reverse roles 267 * sTW -> sRQ Reincarnation, must reverse roles
267 * 268 *
268 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 269 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
269 sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ 270 sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ
270 }, 271 },
271 [DCCP_PKT_RESPONSE] = { 272 [DCCP_PKT_RESPONSE] = {
272 /* 273 /*
273 * sNO -> sIV Response without Request 274 * sNO -> sIV Response without Request
274 * sRQ -> sRS Response to clients Request 275 * sRQ -> sRS Response to clients Request
275 * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT) 276 * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT)
276 * sPO -> sIG Response to an ignored Request or late retransmit 277 * sPO -> sIG Response to an ignored Request or late retransmit
277 * sOP -> sIG Ignore, might be response to ignored Request 278 * sOP -> sIG Ignore, might be response to ignored Request
278 * sCR -> sIG Ignore, might be response to ignored Request 279 * sCR -> sIG Ignore, might be response to ignored Request
279 * sCG -> sIG Ignore, might be response to ignored Request 280 * sCG -> sIG Ignore, might be response to ignored Request
280 * sTW -> sIV Invalid, Request from client in sTW moves to sRQ 281 * sTW -> sIV Invalid, Request from client in sTW moves to sRQ
281 * 282 *
282 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 283 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
283 sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV 284 sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV
284 }, 285 },
285 [DCCP_PKT_ACK] = { 286 [DCCP_PKT_ACK] = {
286 /* 287 /*
287 * sNO -> sIV No connection 288 * sNO -> sIV No connection
288 * sRQ -> sIV No connection 289 * sRQ -> sIV No connection
289 * sRS -> sIV No connection 290 * sRS -> sIV No connection
290 * sPO -> sOP Enter OPEN state (8.1.5.) 291 * sPO -> sOP Enter OPEN state (8.1.5.)
291 * sOP -> sOP Regular Ack in OPEN state 292 * sOP -> sOP Regular Ack in OPEN state
292 * sCR -> sIV Waiting for Close from client 293 * sCR -> sIV Waiting for Close from client
293 * sCG -> sCG Ack in CLOSING MAY be processed (8.3.) 294 * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
294 * sTW -> sIV 295 * sTW -> sIV
295 * 296 *
296 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 297 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
297 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV 298 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
298 }, 299 },
299 [DCCP_PKT_DATA] = { 300 [DCCP_PKT_DATA] = {
300 /* 301 /*
301 * sNO -> sIV No connection 302 * sNO -> sIV No connection
302 * sRQ -> sIV No connection 303 * sRQ -> sIV No connection
303 * sRS -> sIV No connection 304 * sRS -> sIV No connection
304 * sPO -> sOP Enter OPEN state (8.1.5.) 305 * sPO -> sOP Enter OPEN state (8.1.5.)
305 * sOP -> sOP Regular Data packet in OPEN state 306 * sOP -> sOP Regular Data packet in OPEN state
306 * sCR -> sIV Waiting for Close from client 307 * sCR -> sIV Waiting for Close from client
307 * sCG -> sCG Data in CLOSING MAY be processed (8.3.) 308 * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
308 * sTW -> sIV 309 * sTW -> sIV
309 * 310 *
310 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 311 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
311 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV 312 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
312 }, 313 },
313 [DCCP_PKT_DATAACK] = { 314 [DCCP_PKT_DATAACK] = {
314 /* 315 /*
315 * sNO -> sIV No connection 316 * sNO -> sIV No connection
316 * sRQ -> sIV No connection 317 * sRQ -> sIV No connection
317 * sRS -> sIV No connection 318 * sRS -> sIV No connection
318 * sPO -> sOP Enter OPEN state (8.1.5.) 319 * sPO -> sOP Enter OPEN state (8.1.5.)
319 * sOP -> sOP Regular DataAck in OPEN state 320 * sOP -> sOP Regular DataAck in OPEN state
320 * sCR -> sIV Waiting for Close from client 321 * sCR -> sIV Waiting for Close from client
321 * sCG -> sCG Data in CLOSING MAY be processed (8.3.) 322 * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
322 * sTW -> sIV 323 * sTW -> sIV
323 * 324 *
324 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 325 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
325 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV 326 sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
326 }, 327 },
327 [DCCP_PKT_CLOSEREQ] = { 328 [DCCP_PKT_CLOSEREQ] = {
328 /* 329 /*
329 * sNO -> sIV No connection 330 * sNO -> sIV No connection
330 * sRQ -> sIV No connection 331 * sRQ -> sIV No connection
331 * sRS -> sIV No connection 332 * sRS -> sIV No connection
332 * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.) 333 * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.)
333 * sOP -> sCR CloseReq in OPEN state 334 * sOP -> sCR CloseReq in OPEN state
334 * sCR -> sCR Retransmit 335 * sCR -> sCR Retransmit
335 * sCG -> sCR Simultaneous close, client sends another Close 336 * sCG -> sCR Simultaneous close, client sends another Close
336 * sTW -> sIV Already closed 337 * sTW -> sIV Already closed
337 * 338 *
338 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 339 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
339 sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV 340 sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV
340 }, 341 },
341 [DCCP_PKT_CLOSE] = { 342 [DCCP_PKT_CLOSE] = {
342 /* 343 /*
343 * sNO -> sIV No connection 344 * sNO -> sIV No connection
344 * sRQ -> sIV No connection 345 * sRQ -> sIV No connection
345 * sRS -> sIV No connection 346 * sRS -> sIV No connection
346 * sPO -> sOP -> sCG Move direcly to CLOSING 347 * sPO -> sOP -> sCG Move direcly to CLOSING
347 * sOP -> sCG Move to CLOSING 348 * sOP -> sCG Move to CLOSING
348 * sCR -> sIV Close after CloseReq is invalid 349 * sCR -> sIV Close after CloseReq is invalid
349 * sCG -> sCG Retransmit 350 * sCG -> sCG Retransmit
350 * sTW -> sIV Already closed 351 * sTW -> sIV Already closed
351 * 352 *
352 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 353 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
353 sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV 354 sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV
354 }, 355 },
355 [DCCP_PKT_RESET] = { 356 [DCCP_PKT_RESET] = {
356 /* 357 /*
357 * sNO -> sIV No connection 358 * sNO -> sIV No connection
358 * sRQ -> sTW Reset in response to Request 359 * sRQ -> sTW Reset in response to Request
359 * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.) 360 * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.)
360 * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.) 361 * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.)
361 * sOP -> sTW 362 * sOP -> sTW
362 * sCR -> sTW 363 * sCR -> sTW
363 * sCG -> sTW 364 * sCG -> sTW
364 * sTW -> sIG Ignore (don't refresh timer) 365 * sTW -> sIG Ignore (don't refresh timer)
365 * 366 *
366 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */ 367 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */
367 sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG 368 sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG
368 }, 369 },
369 [DCCP_PKT_SYNC] = { 370 [DCCP_PKT_SYNC] = {
370 /* 371 /*
371 * We currently ignore Sync packets 372 * We currently ignore Sync packets
372 * 373 *
373 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 374 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
374 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, 375 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
375 }, 376 },
376 [DCCP_PKT_SYNCACK] = { 377 [DCCP_PKT_SYNCACK] = {
377 /* 378 /*
378 * We currently ignore SyncAck packets 379 * We currently ignore SyncAck packets
379 * 380 *
380 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */ 381 * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
381 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG, 382 sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
382 }, 383 },
383 }, 384 },
384 }; 385 };
385 386
386 static inline struct nf_dccp_net *dccp_pernet(struct net *net) 387 static inline struct nf_dccp_net *dccp_pernet(struct net *net)
387 { 388 {
388 return &net->ct.nf_ct_proto.dccp; 389 return &net->ct.nf_ct_proto.dccp;
389 } 390 }
390 391
391 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, 392 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
392 unsigned int dataoff, unsigned int *timeouts) 393 unsigned int dataoff)
393 { 394 {
394 struct net *net = nf_ct_net(ct); 395 struct net *net = nf_ct_net(ct);
395 struct nf_dccp_net *dn; 396 struct nf_dccp_net *dn;
396 struct dccp_hdr _dh, *dh; 397 struct dccp_hdr _dh, *dh;
397 const char *msg; 398 const char *msg;
398 u_int8_t state; 399 u_int8_t state;
399 400
400 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); 401 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
401 BUG_ON(dh == NULL); 402 BUG_ON(dh == NULL);
402 403
403 state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; 404 state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
404 switch (state) { 405 switch (state) {
405 default: 406 default:
406 dn = dccp_pernet(net); 407 dn = dccp_pernet(net);
407 if (dn->dccp_loose == 0) { 408 if (dn->dccp_loose == 0) {
408 msg = "not picking up existing connection "; 409 msg = "not picking up existing connection ";
409 goto out_invalid; 410 goto out_invalid;
410 } 411 }
411 case CT_DCCP_REQUEST: 412 case CT_DCCP_REQUEST:
412 break; 413 break;
413 case CT_DCCP_INVALID: 414 case CT_DCCP_INVALID:
414 msg = "invalid state transition "; 415 msg = "invalid state transition ";
415 goto out_invalid; 416 goto out_invalid;
416 } 417 }
417 418
418 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 419 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
419 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 420 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
420 ct->proto.dccp.state = CT_DCCP_NONE; 421 ct->proto.dccp.state = CT_DCCP_NONE;
421 ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; 422 ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
422 ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; 423 ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
423 ct->proto.dccp.handshake_seq = 0; 424 ct->proto.dccp.handshake_seq = 0;
424 return true; 425 return true;
425 426
426 out_invalid: 427 out_invalid:
427 nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); 428 nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
428 return false; 429 return false;
429 } 430 }
430 431
431 static u64 dccp_ack_seq(const struct dccp_hdr *dh) 432 static u64 dccp_ack_seq(const struct dccp_hdr *dh)
432 { 433 {
433 const struct dccp_hdr_ack_bits *dhack; 434 const struct dccp_hdr_ack_bits *dhack;
434 435
435 dhack = (void *)dh + __dccp_basic_hdr_len(dh); 436 dhack = (void *)dh + __dccp_basic_hdr_len(dh);
436 return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + 437 return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) +
437 ntohl(dhack->dccph_ack_nr_low); 438 ntohl(dhack->dccph_ack_nr_low);
438 } 439 }
439 440
440 static unsigned int *dccp_get_timeouts(struct net *net)
441 {
442 return dccp_pernet(net)->dccp_timeout;
443 }
444
445 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, 441 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
446 unsigned int dataoff, enum ip_conntrack_info ctinfo, 442 unsigned int dataoff, enum ip_conntrack_info ctinfo)
447 unsigned int *timeouts)
448 { 443 {
449 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 444 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
450 struct dccp_hdr _dh, *dh; 445 struct dccp_hdr _dh, *dh;
451 u_int8_t type, old_state, new_state; 446 u_int8_t type, old_state, new_state;
452 enum ct_dccp_roles role; 447 enum ct_dccp_roles role;
448 unsigned int *timeouts;
453 449
454 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); 450 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
455 BUG_ON(dh == NULL); 451 BUG_ON(dh == NULL);
456 type = dh->dccph_type; 452 type = dh->dccph_type;
457 453
458 if (type == DCCP_PKT_RESET && 454 if (type == DCCP_PKT_RESET &&
459 !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 455 !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
460 /* Tear down connection immediately if only reply is a RESET */ 456 /* Tear down connection immediately if only reply is a RESET */
461 nf_ct_kill_acct(ct, ctinfo, skb); 457 nf_ct_kill_acct(ct, ctinfo, skb);
462 return NF_ACCEPT; 458 return NF_ACCEPT;
463 } 459 }
464 460
465 spin_lock_bh(&ct->lock); 461 spin_lock_bh(&ct->lock);
466 462
467 role = ct->proto.dccp.role[dir]; 463 role = ct->proto.dccp.role[dir];
468 old_state = ct->proto.dccp.state; 464 old_state = ct->proto.dccp.state;
469 new_state = dccp_state_table[role][type][old_state]; 465 new_state = dccp_state_table[role][type][old_state];
470 466
471 switch (new_state) { 467 switch (new_state) {
472 case CT_DCCP_REQUEST: 468 case CT_DCCP_REQUEST:
473 if (old_state == CT_DCCP_TIMEWAIT && 469 if (old_state == CT_DCCP_TIMEWAIT &&
474 role == CT_DCCP_ROLE_SERVER) { 470 role == CT_DCCP_ROLE_SERVER) {
475 /* Reincarnation in the reverse direction: reopen and 471 /* Reincarnation in the reverse direction: reopen and
476 * reverse client/server roles. */ 472 * reverse client/server roles. */
477 ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT; 473 ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT;
478 ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER; 474 ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER;
479 } 475 }
480 break; 476 break;
481 case CT_DCCP_RESPOND: 477 case CT_DCCP_RESPOND:
482 if (old_state == CT_DCCP_REQUEST) 478 if (old_state == CT_DCCP_REQUEST)
483 ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); 479 ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
484 break; 480 break;
485 case CT_DCCP_PARTOPEN: 481 case CT_DCCP_PARTOPEN:
486 if (old_state == CT_DCCP_RESPOND && 482 if (old_state == CT_DCCP_RESPOND &&
487 type == DCCP_PKT_ACK && 483 type == DCCP_PKT_ACK &&
488 dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq) 484 dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq)
489 set_bit(IPS_ASSURED_BIT, &ct->status); 485 set_bit(IPS_ASSURED_BIT, &ct->status);
490 break; 486 break;
491 case CT_DCCP_IGNORE: 487 case CT_DCCP_IGNORE:
492 /* 488 /*
493 * Connection tracking might be out of sync, so we ignore 489 * Connection tracking might be out of sync, so we ignore
494 * packets that might establish a new connection and resync 490 * packets that might establish a new connection and resync
495 * if the server responds with a valid Response. 491 * if the server responds with a valid Response.
496 */ 492 */
497 if (ct->proto.dccp.last_dir == !dir && 493 if (ct->proto.dccp.last_dir == !dir &&
498 ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST && 494 ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST &&
499 type == DCCP_PKT_RESPONSE) { 495 type == DCCP_PKT_RESPONSE) {
500 ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT; 496 ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT;
501 ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER; 497 ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER;
502 ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh); 498 ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
503 new_state = CT_DCCP_RESPOND; 499 new_state = CT_DCCP_RESPOND;
504 break; 500 break;
505 } 501 }
506 ct->proto.dccp.last_dir = dir; 502 ct->proto.dccp.last_dir = dir;
507 ct->proto.dccp.last_pkt = type; 503 ct->proto.dccp.last_pkt = type;
508 504
509 spin_unlock_bh(&ct->lock); 505 spin_unlock_bh(&ct->lock);
510 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); 506 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
511 return NF_ACCEPT; 507 return NF_ACCEPT;
512 case CT_DCCP_INVALID: 508 case CT_DCCP_INVALID:
513 spin_unlock_bh(&ct->lock); 509 spin_unlock_bh(&ct->lock);
514 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); 510 nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
515 return -NF_ACCEPT; 511 return -NF_ACCEPT;
516 } 512 }
517 513
518 ct->proto.dccp.last_dir = dir; 514 ct->proto.dccp.last_dir = dir;
519 ct->proto.dccp.last_pkt = type; 515 ct->proto.dccp.last_pkt = type;
520 ct->proto.dccp.state = new_state; 516 ct->proto.dccp.state = new_state;
521 spin_unlock_bh(&ct->lock); 517 spin_unlock_bh(&ct->lock);
522 518
523 if (new_state != old_state) 519 if (new_state != old_state)
524 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 520 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
525 521
522 timeouts = nf_ct_timeout_lookup(ct);
523 if (!timeouts)
524 timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
526 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 525 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
527 526
528 return NF_ACCEPT; 527 return NF_ACCEPT;
529 } 528 }
530 529
531 static int dccp_error(struct net *net, struct nf_conn *tmpl, 530 static int dccp_error(struct net *net, struct nf_conn *tmpl,
532 struct sk_buff *skb, unsigned int dataoff, 531 struct sk_buff *skb, unsigned int dataoff,
533 u_int8_t pf, unsigned int hooknum) 532 u_int8_t pf, unsigned int hooknum)
534 { 533 {
535 struct dccp_hdr _dh, *dh; 534 struct dccp_hdr _dh, *dh;
536 unsigned int dccp_len = skb->len - dataoff; 535 unsigned int dccp_len = skb->len - dataoff;
537 unsigned int cscov; 536 unsigned int cscov;
538 const char *msg; 537 const char *msg;
539 538
540 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); 539 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
541 if (dh == NULL) { 540 if (dh == NULL) {
542 msg = "nf_ct_dccp: short packet "; 541 msg = "nf_ct_dccp: short packet ";
543 goto out_invalid; 542 goto out_invalid;
544 } 543 }
545 544
546 if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || 545 if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
547 dh->dccph_doff * 4 > dccp_len) { 546 dh->dccph_doff * 4 > dccp_len) {
548 msg = "nf_ct_dccp: truncated/malformed packet "; 547 msg = "nf_ct_dccp: truncated/malformed packet ";
549 goto out_invalid; 548 goto out_invalid;
550 } 549 }
551 550
552 cscov = dccp_len; 551 cscov = dccp_len;
553 if (dh->dccph_cscov) { 552 if (dh->dccph_cscov) {
554 cscov = (dh->dccph_cscov - 1) * 4; 553 cscov = (dh->dccph_cscov - 1) * 4;
555 if (cscov > dccp_len) { 554 if (cscov > dccp_len) {
556 msg = "nf_ct_dccp: bad checksum coverage "; 555 msg = "nf_ct_dccp: bad checksum coverage ";
557 goto out_invalid; 556 goto out_invalid;
558 } 557 }
559 } 558 }
560 559
561 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 560 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
562 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, 561 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP,
563 pf)) { 562 pf)) {
564 msg = "nf_ct_dccp: bad checksum "; 563 msg = "nf_ct_dccp: bad checksum ";
565 goto out_invalid; 564 goto out_invalid;
566 } 565 }
567 566
568 if (dh->dccph_type >= DCCP_PKT_INVALID) { 567 if (dh->dccph_type >= DCCP_PKT_INVALID) {
569 msg = "nf_ct_dccp: reserved packet type "; 568 msg = "nf_ct_dccp: reserved packet type ";
570 goto out_invalid; 569 goto out_invalid;
571 } 570 }
572 571
573 return NF_ACCEPT; 572 return NF_ACCEPT;
574 573
575 out_invalid: 574 out_invalid:
576 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); 575 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg);
577 return -NF_ACCEPT; 576 return -NF_ACCEPT;
578 } 577 }
579 578
580 static bool dccp_can_early_drop(const struct nf_conn *ct) 579 static bool dccp_can_early_drop(const struct nf_conn *ct)
581 { 580 {
582 switch (ct->proto.dccp.state) { 581 switch (ct->proto.dccp.state) {
583 case CT_DCCP_CLOSEREQ: 582 case CT_DCCP_CLOSEREQ:
584 case CT_DCCP_CLOSING: 583 case CT_DCCP_CLOSING:
585 case CT_DCCP_TIMEWAIT: 584 case CT_DCCP_TIMEWAIT:
586 return true; 585 return true;
587 default: 586 default:
588 break; 587 break;
589 } 588 }
590 589
591 return false; 590 return false;
592 } 591 }
593 592
594 #ifdef CONFIG_NF_CONNTRACK_PROCFS 593 #ifdef CONFIG_NF_CONNTRACK_PROCFS
595 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) 594 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
596 { 595 {
597 seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); 596 seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
598 } 597 }
599 #endif 598 #endif
600 599
601 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 600 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
602 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 601 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
603 struct nf_conn *ct) 602 struct nf_conn *ct)
604 { 603 {
605 struct nlattr *nest_parms; 604 struct nlattr *nest_parms;
606 605
607 spin_lock_bh(&ct->lock); 606 spin_lock_bh(&ct->lock);
608 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); 607 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
609 if (!nest_parms) 608 if (!nest_parms)
610 goto nla_put_failure; 609 goto nla_put_failure;
611 if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) || 610 if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state) ||
612 nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE, 611 nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE,
613 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) || 612 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) ||
614 nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, 613 nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
615 cpu_to_be64(ct->proto.dccp.handshake_seq), 614 cpu_to_be64(ct->proto.dccp.handshake_seq),
616 CTA_PROTOINFO_DCCP_PAD)) 615 CTA_PROTOINFO_DCCP_PAD))
617 goto nla_put_failure; 616 goto nla_put_failure;
618 nla_nest_end(skb, nest_parms); 617 nla_nest_end(skb, nest_parms);
619 spin_unlock_bh(&ct->lock); 618 spin_unlock_bh(&ct->lock);
620 return 0; 619 return 0;
621 620
622 nla_put_failure: 621 nla_put_failure:
623 spin_unlock_bh(&ct->lock); 622 spin_unlock_bh(&ct->lock);
624 return -1; 623 return -1;
625 } 624 }
626 625
627 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { 626 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
628 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, 627 [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
629 [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, 628 [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
630 [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, 629 [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
631 [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC }, 630 [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
632 }; 631 };
633 632
634 #define DCCP_NLATTR_SIZE ( \ 633 #define DCCP_NLATTR_SIZE ( \
635 NLA_ALIGN(NLA_HDRLEN + 1) + \ 634 NLA_ALIGN(NLA_HDRLEN + 1) + \
636 NLA_ALIGN(NLA_HDRLEN + 1) + \ 635 NLA_ALIGN(NLA_HDRLEN + 1) + \
637 NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \ 636 NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
638 NLA_ALIGN(NLA_HDRLEN + 0)) 637 NLA_ALIGN(NLA_HDRLEN + 0))
639 638
640 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) 639 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
641 { 640 {
642 struct nlattr *attr = cda[CTA_PROTOINFO_DCCP]; 641 struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
643 struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1]; 642 struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1];
644 int err; 643 int err;
645 644
646 if (!attr) 645 if (!attr)
647 return 0; 646 return 0;
648 647
649 err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr, 648 err = nla_parse_nested(tb, CTA_PROTOINFO_DCCP_MAX, attr,
650 dccp_nla_policy, NULL); 649 dccp_nla_policy, NULL);
651 if (err < 0) 650 if (err < 0)
652 return err; 651 return err;
653 652
654 if (!tb[CTA_PROTOINFO_DCCP_STATE] || 653 if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
655 !tb[CTA_PROTOINFO_DCCP_ROLE] || 654 !tb[CTA_PROTOINFO_DCCP_ROLE] ||
656 nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || 655 nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
657 nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { 656 nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
658 return -EINVAL; 657 return -EINVAL;
659 } 658 }
660 659
661 spin_lock_bh(&ct->lock); 660 spin_lock_bh(&ct->lock);
662 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); 661 ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
663 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { 662 if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
664 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 663 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
665 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 664 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
666 } else { 665 } else {
667 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; 666 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
668 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; 667 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
669 } 668 }
670 if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { 669 if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
671 ct->proto.dccp.handshake_seq = 670 ct->proto.dccp.handshake_seq =
672 be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); 671 be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
673 } 672 }
674 spin_unlock_bh(&ct->lock); 673 spin_unlock_bh(&ct->lock);
675 return 0; 674 return 0;
676 } 675 }
677 #endif 676 #endif
678 677
679 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 678 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
680 679
681 #include <linux/netfilter/nfnetlink.h> 680 #include <linux/netfilter/nfnetlink.h>
682 #include <linux/netfilter/nfnetlink_cttimeout.h> 681 #include <linux/netfilter/nfnetlink_cttimeout.h>
683 682
684 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], 683 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
685 struct net *net, void *data) 684 struct net *net, void *data)
686 { 685 {
687 struct nf_dccp_net *dn = dccp_pernet(net); 686 struct nf_dccp_net *dn = dccp_pernet(net);
688 unsigned int *timeouts = data; 687 unsigned int *timeouts = data;
689 int i; 688 int i;
690 689
691 /* set default DCCP timeouts. */ 690 /* set default DCCP timeouts. */
692 for (i=0; i<CT_DCCP_MAX; i++) 691 for (i=0; i<CT_DCCP_MAX; i++)
693 timeouts[i] = dn->dccp_timeout[i]; 692 timeouts[i] = dn->dccp_timeout[i];
694 693
695 /* there's a 1:1 mapping between attributes and protocol states. */ 694 /* there's a 1:1 mapping between attributes and protocol states. */
696 for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { 695 for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
697 if (tb[i]) { 696 if (tb[i]) {
698 timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; 697 timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
699 } 698 }
700 } 699 }
701 return 0; 700 return 0;
702 } 701 }
703 702
704 static int 703 static int
705 dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 704 dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
706 { 705 {
707 const unsigned int *timeouts = data; 706 const unsigned int *timeouts = data;
708 int i; 707 int i;
709 708
710 for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) { 709 for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
711 if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) 710 if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
712 goto nla_put_failure; 711 goto nla_put_failure;
713 } 712 }
714 return 0; 713 return 0;
715 714
716 nla_put_failure: 715 nla_put_failure:
717 return -ENOSPC; 716 return -ENOSPC;
718 } 717 }
719 718
720 static const struct nla_policy 719 static const struct nla_policy
721 dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { 720 dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
722 [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 }, 721 [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 },
723 [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 }, 722 [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 },
724 [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 }, 723 [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 },
725 [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 }, 724 [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 },
726 [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 }, 725 [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 },
727 [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 }, 726 [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 },
728 [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 }, 727 [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 },
729 }; 728 };
730 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 729 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
731 730
732 #ifdef CONFIG_SYSCTL 731 #ifdef CONFIG_SYSCTL
733 /* template, data assigned later */ 732 /* template, data assigned later */
734 static struct ctl_table dccp_sysctl_table[] = { 733 static struct ctl_table dccp_sysctl_table[] = {
735 { 734 {
736 .procname = "nf_conntrack_dccp_timeout_request", 735 .procname = "nf_conntrack_dccp_timeout_request",
737 .maxlen = sizeof(unsigned int), 736 .maxlen = sizeof(unsigned int),
738 .mode = 0644, 737 .mode = 0644,
739 .proc_handler = proc_dointvec_jiffies, 738 .proc_handler = proc_dointvec_jiffies,
740 }, 739 },
741 { 740 {
742 .procname = "nf_conntrack_dccp_timeout_respond", 741 .procname = "nf_conntrack_dccp_timeout_respond",
743 .maxlen = sizeof(unsigned int), 742 .maxlen = sizeof(unsigned int),
744 .mode = 0644, 743 .mode = 0644,
745 .proc_handler = proc_dointvec_jiffies, 744 .proc_handler = proc_dointvec_jiffies,
746 }, 745 },
747 { 746 {
748 .procname = "nf_conntrack_dccp_timeout_partopen", 747 .procname = "nf_conntrack_dccp_timeout_partopen",
749 .maxlen = sizeof(unsigned int), 748 .maxlen = sizeof(unsigned int),
750 .mode = 0644, 749 .mode = 0644,
751 .proc_handler = proc_dointvec_jiffies, 750 .proc_handler = proc_dointvec_jiffies,
752 }, 751 },
753 { 752 {
754 .procname = "nf_conntrack_dccp_timeout_open", 753 .procname = "nf_conntrack_dccp_timeout_open",
755 .maxlen = sizeof(unsigned int), 754 .maxlen = sizeof(unsigned int),
756 .mode = 0644, 755 .mode = 0644,
757 .proc_handler = proc_dointvec_jiffies, 756 .proc_handler = proc_dointvec_jiffies,
758 }, 757 },
759 { 758 {
760 .procname = "nf_conntrack_dccp_timeout_closereq", 759 .procname = "nf_conntrack_dccp_timeout_closereq",
761 .maxlen = sizeof(unsigned int), 760 .maxlen = sizeof(unsigned int),
762 .mode = 0644, 761 .mode = 0644,
763 .proc_handler = proc_dointvec_jiffies, 762 .proc_handler = proc_dointvec_jiffies,
764 }, 763 },
765 { 764 {
766 .procname = "nf_conntrack_dccp_timeout_closing", 765 .procname = "nf_conntrack_dccp_timeout_closing",
767 .maxlen = sizeof(unsigned int), 766 .maxlen = sizeof(unsigned int),
768 .mode = 0644, 767 .mode = 0644,
769 .proc_handler = proc_dointvec_jiffies, 768 .proc_handler = proc_dointvec_jiffies,
770 }, 769 },
771 { 770 {
772 .procname = "nf_conntrack_dccp_timeout_timewait", 771 .procname = "nf_conntrack_dccp_timeout_timewait",
773 .maxlen = sizeof(unsigned int), 772 .maxlen = sizeof(unsigned int),
774 .mode = 0644, 773 .mode = 0644,
775 .proc_handler = proc_dointvec_jiffies, 774 .proc_handler = proc_dointvec_jiffies,
776 }, 775 },
777 { 776 {
778 .procname = "nf_conntrack_dccp_loose", 777 .procname = "nf_conntrack_dccp_loose",
779 .maxlen = sizeof(int), 778 .maxlen = sizeof(int),
780 .mode = 0644, 779 .mode = 0644,
781 .proc_handler = proc_dointvec, 780 .proc_handler = proc_dointvec,
782 }, 781 },
783 { } 782 { }
784 }; 783 };
785 #endif /* CONFIG_SYSCTL */ 784 #endif /* CONFIG_SYSCTL */
786 785
787 static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, 786 static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
788 struct nf_dccp_net *dn) 787 struct nf_dccp_net *dn)
789 { 788 {
790 #ifdef CONFIG_SYSCTL 789 #ifdef CONFIG_SYSCTL
791 if (pn->ctl_table) 790 if (pn->ctl_table)
792 return 0; 791 return 0;
793 792
794 pn->ctl_table = kmemdup(dccp_sysctl_table, 793 pn->ctl_table = kmemdup(dccp_sysctl_table,
795 sizeof(dccp_sysctl_table), 794 sizeof(dccp_sysctl_table),
796 GFP_KERNEL); 795 GFP_KERNEL);
797 if (!pn->ctl_table) 796 if (!pn->ctl_table)
798 return -ENOMEM; 797 return -ENOMEM;
799 798
800 pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; 799 pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
801 pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; 800 pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
802 pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; 801 pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
803 pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; 802 pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
804 pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; 803 pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
805 pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; 804 pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
806 pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; 805 pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
807 pn->ctl_table[7].data = &dn->dccp_loose; 806 pn->ctl_table[7].data = &dn->dccp_loose;
808 807
809 /* Don't export sysctls to unprivileged users */ 808 /* Don't export sysctls to unprivileged users */
810 if (net->user_ns != &init_user_ns) 809 if (net->user_ns != &init_user_ns)
811 pn->ctl_table[0].procname = NULL; 810 pn->ctl_table[0].procname = NULL;
812 #endif 811 #endif
813 return 0; 812 return 0;
814 } 813 }
815 814
816 static int dccp_init_net(struct net *net, u_int16_t proto) 815 static int dccp_init_net(struct net *net, u_int16_t proto)
817 { 816 {
818 struct nf_dccp_net *dn = dccp_pernet(net); 817 struct nf_dccp_net *dn = dccp_pernet(net);
819 struct nf_proto_net *pn = &dn->pn; 818 struct nf_proto_net *pn = &dn->pn;
820 819
821 if (!pn->users) { 820 if (!pn->users) {
822 /* default values */ 821 /* default values */
823 dn->dccp_loose = 1; 822 dn->dccp_loose = 1;
824 dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; 823 dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
825 dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; 824 dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
826 dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; 825 dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
827 dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; 826 dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
828 dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; 827 dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
829 dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; 828 dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
830 dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; 829 dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
831 } 830 }
832 831
833 return dccp_kmemdup_sysctl_table(net, pn, dn); 832 return dccp_kmemdup_sysctl_table(net, pn, dn);
834 } 833 }
835 834
836 static struct nf_proto_net *dccp_get_net_proto(struct net *net) 835 static struct nf_proto_net *dccp_get_net_proto(struct net *net)
837 { 836 {
838 return &net->ct.nf_ct_proto.dccp.pn; 837 return &net->ct.nf_ct_proto.dccp.pn;
839 } 838 }
840 839
841 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { 840 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
842 .l3proto = AF_INET, 841 .l3proto = AF_INET,
843 .l4proto = IPPROTO_DCCP, 842 .l4proto = IPPROTO_DCCP,
844 .new = dccp_new, 843 .new = dccp_new,
845 .packet = dccp_packet, 844 .packet = dccp_packet,
846 .get_timeouts = dccp_get_timeouts,
847 .error = dccp_error, 845 .error = dccp_error,
848 .can_early_drop = dccp_can_early_drop, 846 .can_early_drop = dccp_can_early_drop,
849 #ifdef CONFIG_NF_CONNTRACK_PROCFS 847 #ifdef CONFIG_NF_CONNTRACK_PROCFS
850 .print_conntrack = dccp_print_conntrack, 848 .print_conntrack = dccp_print_conntrack,
851 #endif 849 #endif
852 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 850 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
853 .nlattr_size = DCCP_NLATTR_SIZE, 851 .nlattr_size = DCCP_NLATTR_SIZE,
854 .to_nlattr = dccp_to_nlattr, 852 .to_nlattr = dccp_to_nlattr,
855 .from_nlattr = nlattr_to_dccp, 853 .from_nlattr = nlattr_to_dccp,
856 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 854 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
857 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 855 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
858 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 856 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
859 .nla_policy = nf_ct_port_nla_policy, 857 .nla_policy = nf_ct_port_nla_policy,
860 #endif 858 #endif
861 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 859 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
862 .ctnl_timeout = { 860 .ctnl_timeout = {
863 .nlattr_to_obj = dccp_timeout_nlattr_to_obj, 861 .nlattr_to_obj = dccp_timeout_nlattr_to_obj,
864 .obj_to_nlattr = dccp_timeout_obj_to_nlattr, 862 .obj_to_nlattr = dccp_timeout_obj_to_nlattr,
865 .nlattr_max = CTA_TIMEOUT_DCCP_MAX, 863 .nlattr_max = CTA_TIMEOUT_DCCP_MAX,
866 .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, 864 .obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
867 .nla_policy = dccp_timeout_nla_policy, 865 .nla_policy = dccp_timeout_nla_policy,
868 }, 866 },
869 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 867 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
870 .init_net = dccp_init_net, 868 .init_net = dccp_init_net,
871 .get_net_proto = dccp_get_net_proto, 869 .get_net_proto = dccp_get_net_proto,
872 }; 870 };
873 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); 871 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
874 872
875 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { 873 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
876 .l3proto = AF_INET6, 874 .l3proto = AF_INET6,
877 .l4proto = IPPROTO_DCCP, 875 .l4proto = IPPROTO_DCCP,
878 .new = dccp_new, 876 .new = dccp_new,
879 .packet = dccp_packet, 877 .packet = dccp_packet,
880 .get_timeouts = dccp_get_timeouts,
881 .error = dccp_error, 878 .error = dccp_error,
882 .can_early_drop = dccp_can_early_drop, 879 .can_early_drop = dccp_can_early_drop,
883 #ifdef CONFIG_NF_CONNTRACK_PROCFS 880 #ifdef CONFIG_NF_CONNTRACK_PROCFS
884 .print_conntrack = dccp_print_conntrack, 881 .print_conntrack = dccp_print_conntrack,
885 #endif 882 #endif
886 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 883 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
887 .nlattr_size = DCCP_NLATTR_SIZE, 884 .nlattr_size = DCCP_NLATTR_SIZE,
888 .to_nlattr = dccp_to_nlattr, 885 .to_nlattr = dccp_to_nlattr,
889 .from_nlattr = nlattr_to_dccp, 886 .from_nlattr = nlattr_to_dccp,
890 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 887 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
891 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 888 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
892 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 889 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
893 .nla_policy = nf_ct_port_nla_policy, 890 .nla_policy = nf_ct_port_nla_policy,
894 #endif 891 #endif
895 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 892 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
896 .ctnl_timeout = { 893 .ctnl_timeout = {
897 .nlattr_to_obj = dccp_timeout_nlattr_to_obj, 894 .nlattr_to_obj = dccp_timeout_nlattr_to_obj,
898 .obj_to_nlattr = dccp_timeout_obj_to_nlattr, 895 .obj_to_nlattr = dccp_timeout_obj_to_nlattr,
899 .nlattr_max = CTA_TIMEOUT_DCCP_MAX, 896 .nlattr_max = CTA_TIMEOUT_DCCP_MAX,
900 .obj_size = sizeof(unsigned int) * CT_DCCP_MAX, 897 .obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
901 .nla_policy = dccp_timeout_nla_policy, 898 .nla_policy = dccp_timeout_nla_policy,
902 }, 899 },
903 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 900 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
net/netfilter/nf_conntrack_proto_generic.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8 8
9 #include <linux/types.h> 9 #include <linux/types.h>
10 #include <linux/jiffies.h> 10 #include <linux/jiffies.h>
11 #include <linux/timer.h> 11 #include <linux/timer.h>
12 #include <linux/netfilter.h> 12 #include <linux/netfilter.h>
13 #include <net/netfilter/nf_conntrack_l4proto.h> 13 #include <net/netfilter/nf_conntrack_l4proto.h>
14 #include <net/netfilter/nf_conntrack_timeout.h>
14 15
15 static const unsigned int nf_ct_generic_timeout = 600*HZ; 16 static const unsigned int nf_ct_generic_timeout = 600*HZ;
16 17
17 static bool nf_generic_should_process(u8 proto) 18 static bool nf_generic_should_process(u8 proto)
18 { 19 {
19 switch (proto) { 20 switch (proto) {
20 #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE 21 #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
21 case IPPROTO_GRE: 22 case IPPROTO_GRE:
22 return false; 23 return false;
23 #endif 24 #endif
24 default: 25 default:
25 return true; 26 return true;
26 } 27 }
27 } 28 }
28 29
29 static inline struct nf_generic_net *generic_pernet(struct net *net) 30 static inline struct nf_generic_net *generic_pernet(struct net *net)
30 { 31 {
31 return &net->ct.nf_ct_proto.generic; 32 return &net->ct.nf_ct_proto.generic;
32 } 33 }
33 34
34 static bool generic_pkt_to_tuple(const struct sk_buff *skb, 35 static bool generic_pkt_to_tuple(const struct sk_buff *skb,
35 unsigned int dataoff, 36 unsigned int dataoff,
36 struct net *net, struct nf_conntrack_tuple *tuple) 37 struct net *net, struct nf_conntrack_tuple *tuple)
37 { 38 {
38 tuple->src.u.all = 0; 39 tuple->src.u.all = 0;
39 tuple->dst.u.all = 0; 40 tuple->dst.u.all = 0;
40 41
41 return true; 42 return true;
42 } 43 }
43 44
44 static unsigned int *generic_get_timeouts(struct net *net)
45 {
46 return &(generic_pernet(net)->timeout);
47 }
48
49 /* Returns verdict for packet, or -1 for invalid. */ 45 /* Returns verdict for packet, or -1 for invalid. */
50 static int generic_packet(struct nf_conn *ct, 46 static int generic_packet(struct nf_conn *ct,
51 const struct sk_buff *skb, 47 const struct sk_buff *skb,
52 unsigned int dataoff, 48 unsigned int dataoff,
53 enum ip_conntrack_info ctinfo, 49 enum ip_conntrack_info ctinfo)
54 unsigned int *timeout)
55 { 50 {
51 const unsigned int *timeout = nf_ct_timeout_lookup(ct);
52
53 if (!timeout)
54 timeout = &generic_pernet(nf_ct_net(ct))->timeout;
55
56 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 56 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
57 return NF_ACCEPT; 57 return NF_ACCEPT;
58 } 58 }
59 59
60 /* Called when a new connection for this protocol found. */ 60 /* Called when a new connection for this protocol found. */
61 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, 61 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
62 unsigned int dataoff, unsigned int *timeouts) 62 unsigned int dataoff)
63 { 63 {
64 bool ret; 64 bool ret;
65 65
66 ret = nf_generic_should_process(nf_ct_protonum(ct)); 66 ret = nf_generic_should_process(nf_ct_protonum(ct));
67 if (!ret) 67 if (!ret)
68 pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", 68 pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
69 nf_ct_protonum(ct)); 69 nf_ct_protonum(ct));
70 return ret; 70 return ret;
71 } 71 }
72 72
73 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 73 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
74 74
75 #include <linux/netfilter/nfnetlink.h> 75 #include <linux/netfilter/nfnetlink.h>
76 #include <linux/netfilter/nfnetlink_cttimeout.h> 76 #include <linux/netfilter/nfnetlink_cttimeout.h>
77 77
78 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], 78 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
79 struct net *net, void *data) 79 struct net *net, void *data)
80 { 80 {
81 unsigned int *timeout = data;
82 struct nf_generic_net *gn = generic_pernet(net); 81 struct nf_generic_net *gn = generic_pernet(net);
82 unsigned int *timeout = data;
83 83
84 if (!timeout)
85 timeout = &gn->timeout;
86
84 if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) 87 if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
85 *timeout = 88 *timeout =
86 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; 89 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ;
87 else { 90 else {
88 /* Set default generic timeout. */ 91 /* Set default generic timeout. */
89 *timeout = gn->timeout; 92 *timeout = gn->timeout;
90 } 93 }
91 94
92 return 0; 95 return 0;
93 } 96 }
94 97
95 static int 98 static int
96 generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 99 generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
97 { 100 {
98 const unsigned int *timeout = data; 101 const unsigned int *timeout = data;
99 102
100 if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) 103 if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ)))
101 goto nla_put_failure; 104 goto nla_put_failure;
102 105
103 return 0; 106 return 0;
104 107
105 nla_put_failure: 108 nla_put_failure:
106 return -ENOSPC; 109 return -ENOSPC;
107 } 110 }
108 111
109 static const struct nla_policy 112 static const struct nla_policy
110 generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { 113 generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = {
111 [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, 114 [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 },
112 }; 115 };
113 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 116 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
114 117
115 #ifdef CONFIG_SYSCTL 118 #ifdef CONFIG_SYSCTL
116 static struct ctl_table generic_sysctl_table[] = { 119 static struct ctl_table generic_sysctl_table[] = {
117 { 120 {
118 .procname = "nf_conntrack_generic_timeout", 121 .procname = "nf_conntrack_generic_timeout",
119 .maxlen = sizeof(unsigned int), 122 .maxlen = sizeof(unsigned int),
120 .mode = 0644, 123 .mode = 0644,
121 .proc_handler = proc_dointvec_jiffies, 124 .proc_handler = proc_dointvec_jiffies,
122 }, 125 },
123 { } 126 { }
124 }; 127 };
125 #endif /* CONFIG_SYSCTL */ 128 #endif /* CONFIG_SYSCTL */
126 129
127 static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, 130 static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
128 struct nf_generic_net *gn) 131 struct nf_generic_net *gn)
129 { 132 {
130 #ifdef CONFIG_SYSCTL 133 #ifdef CONFIG_SYSCTL
131 pn->ctl_table = kmemdup(generic_sysctl_table, 134 pn->ctl_table = kmemdup(generic_sysctl_table,
132 sizeof(generic_sysctl_table), 135 sizeof(generic_sysctl_table),
133 GFP_KERNEL); 136 GFP_KERNEL);
134 if (!pn->ctl_table) 137 if (!pn->ctl_table)
135 return -ENOMEM; 138 return -ENOMEM;
136 139
137 pn->ctl_table[0].data = &gn->timeout; 140 pn->ctl_table[0].data = &gn->timeout;
138 #endif 141 #endif
139 return 0; 142 return 0;
140 } 143 }
141 144
142 static int generic_init_net(struct net *net, u_int16_t proto) 145 static int generic_init_net(struct net *net, u_int16_t proto)
143 { 146 {
144 struct nf_generic_net *gn = generic_pernet(net); 147 struct nf_generic_net *gn = generic_pernet(net);
145 struct nf_proto_net *pn = &gn->pn; 148 struct nf_proto_net *pn = &gn->pn;
146 149
147 gn->timeout = nf_ct_generic_timeout; 150 gn->timeout = nf_ct_generic_timeout;
148 151
149 return generic_kmemdup_sysctl_table(pn, gn); 152 return generic_kmemdup_sysctl_table(pn, gn);
150 } 153 }
151 154
152 static struct nf_proto_net *generic_get_net_proto(struct net *net) 155 static struct nf_proto_net *generic_get_net_proto(struct net *net)
153 { 156 {
154 return &net->ct.nf_ct_proto.generic.pn; 157 return &net->ct.nf_ct_proto.generic.pn;
155 } 158 }
156 159
157 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = 160 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
158 { 161 {
159 .l3proto = PF_UNSPEC, 162 .l3proto = PF_UNSPEC,
160 .l4proto = 255, 163 .l4proto = 255,
161 .pkt_to_tuple = generic_pkt_to_tuple, 164 .pkt_to_tuple = generic_pkt_to_tuple,
162 .packet = generic_packet, 165 .packet = generic_packet,
163 .get_timeouts = generic_get_timeouts,
164 .new = generic_new, 166 .new = generic_new,
165 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 167 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
166 .ctnl_timeout = { 168 .ctnl_timeout = {
167 .nlattr_to_obj = generic_timeout_nlattr_to_obj, 169 .nlattr_to_obj = generic_timeout_nlattr_to_obj,
168 .obj_to_nlattr = generic_timeout_obj_to_nlattr, 170 .obj_to_nlattr = generic_timeout_obj_to_nlattr,
169 .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, 171 .nlattr_max = CTA_TIMEOUT_GENERIC_MAX,
net/netfilter/nf_conntrack_proto_gre.c
1 /* 1 /*
2 * ip_conntrack_proto_gre.c - Version 3.0 2 * ip_conntrack_proto_gre.c - Version 3.0
3 * 3 *
4 * Connection tracking protocol helper module for GRE. 4 * Connection tracking protocol helper module for GRE.
5 * 5 *
6 * GRE is a generic encapsulation protocol, which is generally not very 6 * GRE is a generic encapsulation protocol, which is generally not very
7 * suited for NAT, as it has no protocol-specific part as port numbers. 7 * suited for NAT, as it has no protocol-specific part as port numbers.
8 * 8 *
9 * It has an optional key field, which may help us distinguishing two 9 * It has an optional key field, which may help us distinguishing two
10 * connections between the same two hosts. 10 * connections between the same two hosts.
11 * 11 *
12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 12 * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
13 * 13 *
14 * PPTP is built on top of a modified version of GRE, and has a mandatory 14 * PPTP is built on top of a modified version of GRE, and has a mandatory
15 * field called "CallID", which serves us for the same purpose as the key 15 * field called "CallID", which serves us for the same purpose as the key
16 * field in plain GRE. 16 * field in plain GRE.
17 * 17 *
18 * Documentation about PPTP can be found in RFC 2637 18 * Documentation about PPTP can be found in RFC 2637
19 * 19 *
20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> 20 * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
21 * 21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/) 22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 * 23 *
24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
25 */ 25 */
26 26
27 #include <linux/module.h> 27 #include <linux/module.h>
28 #include <linux/types.h> 28 #include <linux/types.h>
29 #include <linux/timer.h> 29 #include <linux/timer.h>
30 #include <linux/list.h> 30 #include <linux/list.h>
31 #include <linux/seq_file.h> 31 #include <linux/seq_file.h>
32 #include <linux/in.h> 32 #include <linux/in.h>
33 #include <linux/netdevice.h> 33 #include <linux/netdevice.h>
34 #include <linux/skbuff.h> 34 #include <linux/skbuff.h>
35 #include <linux/slab.h> 35 #include <linux/slab.h>
36 #include <net/dst.h> 36 #include <net/dst.h>
37 #include <net/net_namespace.h> 37 #include <net/net_namespace.h>
38 #include <net/netns/generic.h> 38 #include <net/netns/generic.h>
39 #include <net/netfilter/nf_conntrack_l4proto.h> 39 #include <net/netfilter/nf_conntrack_l4proto.h>
40 #include <net/netfilter/nf_conntrack_helper.h> 40 #include <net/netfilter/nf_conntrack_helper.h>
41 #include <net/netfilter/nf_conntrack_core.h> 41 #include <net/netfilter/nf_conntrack_core.h>
42 #include <net/netfilter/nf_conntrack_timeout.h>
42 #include <linux/netfilter/nf_conntrack_proto_gre.h> 43 #include <linux/netfilter/nf_conntrack_proto_gre.h>
43 #include <linux/netfilter/nf_conntrack_pptp.h> 44 #include <linux/netfilter/nf_conntrack_pptp.h>
44 45
45 enum grep_conntrack { 46 enum grep_conntrack {
46 GRE_CT_UNREPLIED, 47 GRE_CT_UNREPLIED,
47 GRE_CT_REPLIED, 48 GRE_CT_REPLIED,
48 GRE_CT_MAX 49 GRE_CT_MAX
49 }; 50 };
50 51
51 static const unsigned int gre_timeouts[GRE_CT_MAX] = { 52 static const unsigned int gre_timeouts[GRE_CT_MAX] = {
52 [GRE_CT_UNREPLIED] = 30*HZ, 53 [GRE_CT_UNREPLIED] = 30*HZ,
53 [GRE_CT_REPLIED] = 180*HZ, 54 [GRE_CT_REPLIED] = 180*HZ,
54 }; 55 };
55 56
56 static unsigned int proto_gre_net_id __read_mostly; 57 static unsigned int proto_gre_net_id __read_mostly;
57 struct netns_proto_gre { 58 struct netns_proto_gre {
58 struct nf_proto_net nf; 59 struct nf_proto_net nf;
59 rwlock_t keymap_lock; 60 rwlock_t keymap_lock;
60 struct list_head keymap_list; 61 struct list_head keymap_list;
61 unsigned int gre_timeouts[GRE_CT_MAX]; 62 unsigned int gre_timeouts[GRE_CT_MAX];
62 }; 63 };
63 64
64 static inline struct netns_proto_gre *gre_pernet(struct net *net) 65 static inline struct netns_proto_gre *gre_pernet(struct net *net)
65 { 66 {
66 return net_generic(net, proto_gre_net_id); 67 return net_generic(net, proto_gre_net_id);
67 } 68 }
68 69
69 static void nf_ct_gre_keymap_flush(struct net *net) 70 static void nf_ct_gre_keymap_flush(struct net *net)
70 { 71 {
71 struct netns_proto_gre *net_gre = gre_pernet(net); 72 struct netns_proto_gre *net_gre = gre_pernet(net);
72 struct nf_ct_gre_keymap *km, *tmp; 73 struct nf_ct_gre_keymap *km, *tmp;
73 74
74 write_lock_bh(&net_gre->keymap_lock); 75 write_lock_bh(&net_gre->keymap_lock);
75 list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { 76 list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
76 list_del(&km->list); 77 list_del(&km->list);
77 kfree(km); 78 kfree(km);
78 } 79 }
79 write_unlock_bh(&net_gre->keymap_lock); 80 write_unlock_bh(&net_gre->keymap_lock);
80 } 81 }
81 82
82 static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, 83 static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
83 const struct nf_conntrack_tuple *t) 84 const struct nf_conntrack_tuple *t)
84 { 85 {
85 return km->tuple.src.l3num == t->src.l3num && 86 return km->tuple.src.l3num == t->src.l3num &&
86 !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) && 87 !memcmp(&km->tuple.src.u3, &t->src.u3, sizeof(t->src.u3)) &&
87 !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) && 88 !memcmp(&km->tuple.dst.u3, &t->dst.u3, sizeof(t->dst.u3)) &&
88 km->tuple.dst.protonum == t->dst.protonum && 89 km->tuple.dst.protonum == t->dst.protonum &&
89 km->tuple.dst.u.all == t->dst.u.all; 90 km->tuple.dst.u.all == t->dst.u.all;
90 } 91 }
91 92
92 /* look up the source key for a given tuple */ 93 /* look up the source key for a given tuple */
93 static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) 94 static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
94 { 95 {
95 struct netns_proto_gre *net_gre = gre_pernet(net); 96 struct netns_proto_gre *net_gre = gre_pernet(net);
96 struct nf_ct_gre_keymap *km; 97 struct nf_ct_gre_keymap *km;
97 __be16 key = 0; 98 __be16 key = 0;
98 99
99 read_lock_bh(&net_gre->keymap_lock); 100 read_lock_bh(&net_gre->keymap_lock);
100 list_for_each_entry(km, &net_gre->keymap_list, list) { 101 list_for_each_entry(km, &net_gre->keymap_list, list) {
101 if (gre_key_cmpfn(km, t)) { 102 if (gre_key_cmpfn(km, t)) {
102 key = km->tuple.src.u.gre.key; 103 key = km->tuple.src.u.gre.key;
103 break; 104 break;
104 } 105 }
105 } 106 }
106 read_unlock_bh(&net_gre->keymap_lock); 107 read_unlock_bh(&net_gre->keymap_lock);
107 108
108 pr_debug("lookup src key 0x%x for ", key); 109 pr_debug("lookup src key 0x%x for ", key);
109 nf_ct_dump_tuple(t); 110 nf_ct_dump_tuple(t);
110 111
111 return key; 112 return key;
112 } 113 }
113 114
114 /* add a single keymap entry, associate with specified master ct */ 115 /* add a single keymap entry, associate with specified master ct */
115 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, 116 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
116 struct nf_conntrack_tuple *t) 117 struct nf_conntrack_tuple *t)
117 { 118 {
118 struct net *net = nf_ct_net(ct); 119 struct net *net = nf_ct_net(ct);
119 struct netns_proto_gre *net_gre = gre_pernet(net); 120 struct netns_proto_gre *net_gre = gre_pernet(net);
120 struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); 121 struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
121 struct nf_ct_gre_keymap **kmp, *km; 122 struct nf_ct_gre_keymap **kmp, *km;
122 123
123 kmp = &ct_pptp_info->keymap[dir]; 124 kmp = &ct_pptp_info->keymap[dir];
124 if (*kmp) { 125 if (*kmp) {
125 /* check whether it's a retransmission */ 126 /* check whether it's a retransmission */
126 read_lock_bh(&net_gre->keymap_lock); 127 read_lock_bh(&net_gre->keymap_lock);
127 list_for_each_entry(km, &net_gre->keymap_list, list) { 128 list_for_each_entry(km, &net_gre->keymap_list, list) {
128 if (gre_key_cmpfn(km, t) && km == *kmp) { 129 if (gre_key_cmpfn(km, t) && km == *kmp) {
129 read_unlock_bh(&net_gre->keymap_lock); 130 read_unlock_bh(&net_gre->keymap_lock);
130 return 0; 131 return 0;
131 } 132 }
132 } 133 }
133 read_unlock_bh(&net_gre->keymap_lock); 134 read_unlock_bh(&net_gre->keymap_lock);
134 pr_debug("trying to override keymap_%s for ct %p\n", 135 pr_debug("trying to override keymap_%s for ct %p\n",
135 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); 136 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
136 return -EEXIST; 137 return -EEXIST;
137 } 138 }
138 139
139 km = kmalloc(sizeof(*km), GFP_ATOMIC); 140 km = kmalloc(sizeof(*km), GFP_ATOMIC);
140 if (!km) 141 if (!km)
141 return -ENOMEM; 142 return -ENOMEM;
142 memcpy(&km->tuple, t, sizeof(*t)); 143 memcpy(&km->tuple, t, sizeof(*t));
143 *kmp = km; 144 *kmp = km;
144 145
145 pr_debug("adding new entry %p: ", km); 146 pr_debug("adding new entry %p: ", km);
146 nf_ct_dump_tuple(&km->tuple); 147 nf_ct_dump_tuple(&km->tuple);
147 148
148 write_lock_bh(&net_gre->keymap_lock); 149 write_lock_bh(&net_gre->keymap_lock);
149 list_add_tail(&km->list, &net_gre->keymap_list); 150 list_add_tail(&km->list, &net_gre->keymap_list);
150 write_unlock_bh(&net_gre->keymap_lock); 151 write_unlock_bh(&net_gre->keymap_lock);
151 152
152 return 0; 153 return 0;
153 } 154 }
154 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); 155 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
155 156
156 /* destroy the keymap entries associated with specified master ct */ 157 /* destroy the keymap entries associated with specified master ct */
157 void nf_ct_gre_keymap_destroy(struct nf_conn *ct) 158 void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
158 { 159 {
159 struct net *net = nf_ct_net(ct); 160 struct net *net = nf_ct_net(ct);
160 struct netns_proto_gre *net_gre = gre_pernet(net); 161 struct netns_proto_gre *net_gre = gre_pernet(net);
161 struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); 162 struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
162 enum ip_conntrack_dir dir; 163 enum ip_conntrack_dir dir;
163 164
164 pr_debug("entering for ct %p\n", ct); 165 pr_debug("entering for ct %p\n", ct);
165 166
166 write_lock_bh(&net_gre->keymap_lock); 167 write_lock_bh(&net_gre->keymap_lock);
167 for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { 168 for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
168 if (ct_pptp_info->keymap[dir]) { 169 if (ct_pptp_info->keymap[dir]) {
169 pr_debug("removing %p from list\n", 170 pr_debug("removing %p from list\n",
170 ct_pptp_info->keymap[dir]); 171 ct_pptp_info->keymap[dir]);
171 list_del(&ct_pptp_info->keymap[dir]->list); 172 list_del(&ct_pptp_info->keymap[dir]->list);
172 kfree(ct_pptp_info->keymap[dir]); 173 kfree(ct_pptp_info->keymap[dir]);
173 ct_pptp_info->keymap[dir] = NULL; 174 ct_pptp_info->keymap[dir] = NULL;
174 } 175 }
175 } 176 }
176 write_unlock_bh(&net_gre->keymap_lock); 177 write_unlock_bh(&net_gre->keymap_lock);
177 } 178 }
178 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); 179 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
179 180
180 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ 181 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
181 182
182 /* gre hdr info to tuple */ 183 /* gre hdr info to tuple */
183 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 184 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
184 struct net *net, struct nf_conntrack_tuple *tuple) 185 struct net *net, struct nf_conntrack_tuple *tuple)
185 { 186 {
186 const struct pptp_gre_header *pgrehdr; 187 const struct pptp_gre_header *pgrehdr;
187 struct pptp_gre_header _pgrehdr; 188 struct pptp_gre_header _pgrehdr;
188 __be16 srckey; 189 __be16 srckey;
189 const struct gre_base_hdr *grehdr; 190 const struct gre_base_hdr *grehdr;
190 struct gre_base_hdr _grehdr; 191 struct gre_base_hdr _grehdr;
191 192
192 /* first only delinearize old RFC1701 GRE header */ 193 /* first only delinearize old RFC1701 GRE header */
193 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); 194 grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
194 if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { 195 if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
195 /* try to behave like "nf_conntrack_proto_generic" */ 196 /* try to behave like "nf_conntrack_proto_generic" */
196 tuple->src.u.all = 0; 197 tuple->src.u.all = 0;
197 tuple->dst.u.all = 0; 198 tuple->dst.u.all = 0;
198 return true; 199 return true;
199 } 200 }
200 201
201 /* PPTP header is variable length, only need up to the call_id field */ 202 /* PPTP header is variable length, only need up to the call_id field */
202 pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); 203 pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
203 if (!pgrehdr) 204 if (!pgrehdr)
204 return true; 205 return true;
205 206
206 if (grehdr->protocol != GRE_PROTO_PPP) { 207 if (grehdr->protocol != GRE_PROTO_PPP) {
207 pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); 208 pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol));
208 return false; 209 return false;
209 } 210 }
210 211
211 tuple->dst.u.gre.key = pgrehdr->call_id; 212 tuple->dst.u.gre.key = pgrehdr->call_id;
212 srckey = gre_keymap_lookup(net, tuple); 213 srckey = gre_keymap_lookup(net, tuple);
213 tuple->src.u.gre.key = srckey; 214 tuple->src.u.gre.key = srckey;
214 215
215 return true; 216 return true;
216 } 217 }
217 218
218 #ifdef CONFIG_NF_CONNTRACK_PROCFS 219 #ifdef CONFIG_NF_CONNTRACK_PROCFS
219 /* print private data for conntrack */ 220 /* print private data for conntrack */
220 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) 221 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
221 { 222 {
222 seq_printf(s, "timeout=%u, stream_timeout=%u ", 223 seq_printf(s, "timeout=%u, stream_timeout=%u ",
223 (ct->proto.gre.timeout / HZ), 224 (ct->proto.gre.timeout / HZ),
224 (ct->proto.gre.stream_timeout / HZ)); 225 (ct->proto.gre.stream_timeout / HZ));
225 } 226 }
226 #endif 227 #endif
227 228
228 static unsigned int *gre_get_timeouts(struct net *net) 229 static unsigned int *gre_get_timeouts(struct net *net)
229 { 230 {
230 return gre_pernet(net)->gre_timeouts; 231 return gre_pernet(net)->gre_timeouts;
231 } 232 }
232 233
233 /* Returns verdict for packet, and may modify conntrack */ 234 /* Returns verdict for packet, and may modify conntrack */
234 static int gre_packet(struct nf_conn *ct, 235 static int gre_packet(struct nf_conn *ct,
235 const struct sk_buff *skb, 236 const struct sk_buff *skb,
236 unsigned int dataoff, 237 unsigned int dataoff,
237 enum ip_conntrack_info ctinfo, 238 enum ip_conntrack_info ctinfo)
238 unsigned int *timeouts)
239 { 239 {
240 /* If we've seen traffic both ways, this is a GRE connection. 240 /* If we've seen traffic both ways, this is a GRE connection.
241 * Extend timeout. */ 241 * Extend timeout. */
242 if (ct->status & IPS_SEEN_REPLY) { 242 if (ct->status & IPS_SEEN_REPLY) {
243 nf_ct_refresh_acct(ct, ctinfo, skb, 243 nf_ct_refresh_acct(ct, ctinfo, skb,
244 ct->proto.gre.stream_timeout); 244 ct->proto.gre.stream_timeout);
245 /* Also, more likely to be important, and not a probe. */ 245 /* Also, more likely to be important, and not a probe. */
246 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) 246 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
247 nf_conntrack_event_cache(IPCT_ASSURED, ct); 247 nf_conntrack_event_cache(IPCT_ASSURED, ct);
248 } else 248 } else
249 nf_ct_refresh_acct(ct, ctinfo, skb, 249 nf_ct_refresh_acct(ct, ctinfo, skb,
250 ct->proto.gre.timeout); 250 ct->proto.gre.timeout);
251 251
252 return NF_ACCEPT; 252 return NF_ACCEPT;
253 } 253 }
254 254
255 /* Called when a new connection for this protocol found. */ 255 /* Called when a new connection for this protocol found. */
256 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, 256 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
257 unsigned int dataoff, unsigned int *timeouts) 257 unsigned int dataoff)
258 { 258 {
259 unsigned int *timeouts = nf_ct_timeout_lookup(ct);
260
261 if (!timeouts)
262 timeouts = gre_get_timeouts(nf_ct_net(ct));
263
259 pr_debug(": "); 264 pr_debug(": ");
260 nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 265 nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
261 266
262 /* initialize to sane value. Ideally a conntrack helper 267 /* initialize to sane value. Ideally a conntrack helper
263 * (e.g. in case of pptp) is increasing them */ 268 * (e.g. in case of pptp) is increasing them */
264 ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; 269 ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED];
265 ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; 270 ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
266 271
267 return true; 272 return true;
268 } 273 }
269 274
270 /* Called when a conntrack entry has already been removed from the hashes 275 /* Called when a conntrack entry has already been removed from the hashes
271 * and is about to be deleted from memory */ 276 * and is about to be deleted from memory */
272 static void gre_destroy(struct nf_conn *ct) 277 static void gre_destroy(struct nf_conn *ct)
273 { 278 {
274 struct nf_conn *master = ct->master; 279 struct nf_conn *master = ct->master;
275 pr_debug(" entering\n"); 280 pr_debug(" entering\n");
276 281
277 if (!master) 282 if (!master)
278 pr_debug("no master !?!\n"); 283 pr_debug("no master !?!\n");
279 else 284 else
280 nf_ct_gre_keymap_destroy(master); 285 nf_ct_gre_keymap_destroy(master);
281 } 286 }
282 287
283 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 288 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
284 289
285 #include <linux/netfilter/nfnetlink.h> 290 #include <linux/netfilter/nfnetlink.h>
286 #include <linux/netfilter/nfnetlink_cttimeout.h> 291 #include <linux/netfilter/nfnetlink_cttimeout.h>
287 292
288 static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], 293 static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
289 struct net *net, void *data) 294 struct net *net, void *data)
290 { 295 {
291 unsigned int *timeouts = data; 296 unsigned int *timeouts = data;
292 struct netns_proto_gre *net_gre = gre_pernet(net); 297 struct netns_proto_gre *net_gre = gre_pernet(net);
293 298
299 if (!timeouts)
300 timeouts = gre_get_timeouts(net);
294 /* set default timeouts for GRE. */ 301 /* set default timeouts for GRE. */
295 timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; 302 timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
296 timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; 303 timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
297 304
298 if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { 305 if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) {
299 timeouts[GRE_CT_UNREPLIED] = 306 timeouts[GRE_CT_UNREPLIED] =
300 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; 307 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ;
301 } 308 }
302 if (tb[CTA_TIMEOUT_GRE_REPLIED]) { 309 if (tb[CTA_TIMEOUT_GRE_REPLIED]) {
303 timeouts[GRE_CT_REPLIED] = 310 timeouts[GRE_CT_REPLIED] =
304 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; 311 ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ;
305 } 312 }
306 return 0; 313 return 0;
307 } 314 }
308 315
309 static int 316 static int
310 gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 317 gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
311 { 318 {
312 const unsigned int *timeouts = data; 319 const unsigned int *timeouts = data;
313 320
314 if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED, 321 if (nla_put_be32(skb, CTA_TIMEOUT_GRE_UNREPLIED,
315 htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) || 322 htonl(timeouts[GRE_CT_UNREPLIED] / HZ)) ||
316 nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED, 323 nla_put_be32(skb, CTA_TIMEOUT_GRE_REPLIED,
317 htonl(timeouts[GRE_CT_REPLIED] / HZ))) 324 htonl(timeouts[GRE_CT_REPLIED] / HZ)))
318 goto nla_put_failure; 325 goto nla_put_failure;
319 return 0; 326 return 0;
320 327
321 nla_put_failure: 328 nla_put_failure:
322 return -ENOSPC; 329 return -ENOSPC;
323 } 330 }
324 331
325 static const struct nla_policy 332 static const struct nla_policy
326 gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { 333 gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
327 [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, 334 [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 },
328 [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, 335 [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 },
329 }; 336 };
330 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 337 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
331 338
332 static int gre_init_net(struct net *net, u_int16_t proto) 339 static int gre_init_net(struct net *net, u_int16_t proto)
333 { 340 {
334 struct netns_proto_gre *net_gre = gre_pernet(net); 341 struct netns_proto_gre *net_gre = gre_pernet(net);
335 int i; 342 int i;
336 343
337 rwlock_init(&net_gre->keymap_lock); 344 rwlock_init(&net_gre->keymap_lock);
338 INIT_LIST_HEAD(&net_gre->keymap_list); 345 INIT_LIST_HEAD(&net_gre->keymap_list);
339 for (i = 0; i < GRE_CT_MAX; i++) 346 for (i = 0; i < GRE_CT_MAX; i++)
340 net_gre->gre_timeouts[i] = gre_timeouts[i]; 347 net_gre->gre_timeouts[i] = gre_timeouts[i];
341 348
342 return 0; 349 return 0;
343 } 350 }
344 351
345 /* protocol helper struct */ 352 /* protocol helper struct */
346 static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { 353 static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
347 .l3proto = AF_INET, 354 .l3proto = AF_INET,
348 .l4proto = IPPROTO_GRE, 355 .l4proto = IPPROTO_GRE,
349 .pkt_to_tuple = gre_pkt_to_tuple, 356 .pkt_to_tuple = gre_pkt_to_tuple,
350 #ifdef CONFIG_NF_CONNTRACK_PROCFS 357 #ifdef CONFIG_NF_CONNTRACK_PROCFS
351 .print_conntrack = gre_print_conntrack, 358 .print_conntrack = gre_print_conntrack,
352 #endif 359 #endif
353 .get_timeouts = gre_get_timeouts,
354 .packet = gre_packet, 360 .packet = gre_packet,
355 .new = gre_new, 361 .new = gre_new,
356 .destroy = gre_destroy, 362 .destroy = gre_destroy,
357 .me = THIS_MODULE, 363 .me = THIS_MODULE,
358 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 364 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
359 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 365 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
360 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 366 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
361 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 367 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
362 .nla_policy = nf_ct_port_nla_policy, 368 .nla_policy = nf_ct_port_nla_policy,
363 #endif 369 #endif
364 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 370 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
365 .ctnl_timeout = { 371 .ctnl_timeout = {
366 .nlattr_to_obj = gre_timeout_nlattr_to_obj, 372 .nlattr_to_obj = gre_timeout_nlattr_to_obj,
367 .obj_to_nlattr = gre_timeout_obj_to_nlattr, 373 .obj_to_nlattr = gre_timeout_obj_to_nlattr,
368 .nlattr_max = CTA_TIMEOUT_GRE_MAX, 374 .nlattr_max = CTA_TIMEOUT_GRE_MAX,
369 .obj_size = sizeof(unsigned int) * GRE_CT_MAX, 375 .obj_size = sizeof(unsigned int) * GRE_CT_MAX,
370 .nla_policy = gre_timeout_nla_policy, 376 .nla_policy = gre_timeout_nla_policy,
371 }, 377 },
372 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 378 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
373 .net_id = &proto_gre_net_id, 379 .net_id = &proto_gre_net_id,
374 .init_net = gre_init_net, 380 .init_net = gre_init_net,
375 }; 381 };
376 382
377 static int proto_gre_net_init(struct net *net) 383 static int proto_gre_net_init(struct net *net)
378 { 384 {
379 int ret = 0; 385 int ret = 0;
380 386
381 ret = nf_ct_l4proto_pernet_register_one(net, 387 ret = nf_ct_l4proto_pernet_register_one(net,
382 &nf_conntrack_l4proto_gre4); 388 &nf_conntrack_l4proto_gre4);
383 if (ret < 0) 389 if (ret < 0)
384 pr_err("nf_conntrack_gre4: pernet registration failed.\n"); 390 pr_err("nf_conntrack_gre4: pernet registration failed.\n");
385 return ret; 391 return ret;
386 } 392 }
387 393
388 static void proto_gre_net_exit(struct net *net) 394 static void proto_gre_net_exit(struct net *net)
389 { 395 {
390 nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); 396 nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
391 nf_ct_gre_keymap_flush(net); 397 nf_ct_gre_keymap_flush(net);
392 } 398 }
393 399
394 static struct pernet_operations proto_gre_net_ops = { 400 static struct pernet_operations proto_gre_net_ops = {
395 .init = proto_gre_net_init, 401 .init = proto_gre_net_init,
396 .exit = proto_gre_net_exit, 402 .exit = proto_gre_net_exit,
397 .id = &proto_gre_net_id, 403 .id = &proto_gre_net_id,
398 .size = sizeof(struct netns_proto_gre), 404 .size = sizeof(struct netns_proto_gre),
399 }; 405 };
400 406
401 static int __init nf_ct_proto_gre_init(void) 407 static int __init nf_ct_proto_gre_init(void)
402 { 408 {
403 int ret; 409 int ret;
404 410
405 ret = register_pernet_subsys(&proto_gre_net_ops); 411 ret = register_pernet_subsys(&proto_gre_net_ops);
406 if (ret < 0) 412 if (ret < 0)
407 goto out_pernet; 413 goto out_pernet;
408 ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); 414 ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
409 if (ret < 0) 415 if (ret < 0)
410 goto out_gre4; 416 goto out_gre4;
411 417
412 return 0; 418 return 0;
413 out_gre4: 419 out_gre4:
414 unregister_pernet_subsys(&proto_gre_net_ops); 420 unregister_pernet_subsys(&proto_gre_net_ops);
415 out_pernet: 421 out_pernet:
416 return ret; 422 return ret;
417 } 423 }
418 424
419 static void __exit nf_ct_proto_gre_fini(void) 425 static void __exit nf_ct_proto_gre_fini(void)
420 { 426 {
421 nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); 427 nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
422 unregister_pernet_subsys(&proto_gre_net_ops); 428 unregister_pernet_subsys(&proto_gre_net_ops);
423 } 429 }
424 430
425 module_init(nf_ct_proto_gre_init); 431 module_init(nf_ct_proto_gre_init);
426 module_exit(nf_ct_proto_gre_fini); 432 module_exit(nf_ct_proto_gre_fini);
427 433
net/netfilter/nf_conntrack_proto_sctp.c
1 /* 1 /*
2 * Connection tracking protocol helper module for SCTP. 2 * Connection tracking protocol helper module for SCTP.
3 * 3 *
4 * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> 4 * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
5 * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> 5 * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
6 * 6 *
7 * SCTP is defined in RFC 2960. References to various sections in this code 7 * SCTP is defined in RFC 2960. References to various sections in this code
8 * are to this RFC. 8 * are to this RFC.
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15 #include <linux/types.h> 15 #include <linux/types.h>
16 #include <linux/timer.h> 16 #include <linux/timer.h>
17 #include <linux/netfilter.h> 17 #include <linux/netfilter.h>
18 #include <linux/in.h> 18 #include <linux/in.h>
19 #include <linux/ip.h> 19 #include <linux/ip.h>
20 #include <linux/sctp.h> 20 #include <linux/sctp.h>
21 #include <linux/string.h> 21 #include <linux/string.h>
22 #include <linux/seq_file.h> 22 #include <linux/seq_file.h>
23 #include <linux/spinlock.h> 23 #include <linux/spinlock.h>
24 #include <linux/interrupt.h> 24 #include <linux/interrupt.h>
25 #include <net/sctp/checksum.h> 25 #include <net/sctp/checksum.h>
26 26
27 #include <net/netfilter/nf_log.h> 27 #include <net/netfilter/nf_log.h>
28 #include <net/netfilter/nf_conntrack.h> 28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_l4proto.h> 29 #include <net/netfilter/nf_conntrack_l4proto.h>
30 #include <net/netfilter/nf_conntrack_ecache.h> 30 #include <net/netfilter/nf_conntrack_ecache.h>
31 #include <net/netfilter/nf_conntrack_timeout.h>
31 32
32 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 33 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
33 closely. They're more complex. --RR 34 closely. They're more complex. --RR
34 35
35 And so for me for SCTP :D -Kiran */ 36 And so for me for SCTP :D -Kiran */
36 37
37 static const char *const sctp_conntrack_names[] = { 38 static const char *const sctp_conntrack_names[] = {
38 "NONE", 39 "NONE",
39 "CLOSED", 40 "CLOSED",
40 "COOKIE_WAIT", 41 "COOKIE_WAIT",
41 "COOKIE_ECHOED", 42 "COOKIE_ECHOED",
42 "ESTABLISHED", 43 "ESTABLISHED",
43 "SHUTDOWN_SENT", 44 "SHUTDOWN_SENT",
44 "SHUTDOWN_RECD", 45 "SHUTDOWN_RECD",
45 "SHUTDOWN_ACK_SENT", 46 "SHUTDOWN_ACK_SENT",
46 "HEARTBEAT_SENT", 47 "HEARTBEAT_SENT",
47 "HEARTBEAT_ACKED", 48 "HEARTBEAT_ACKED",
48 }; 49 };
49 50
50 #define SECS * HZ 51 #define SECS * HZ
51 #define MINS * 60 SECS 52 #define MINS * 60 SECS
52 #define HOURS * 60 MINS 53 #define HOURS * 60 MINS
53 #define DAYS * 24 HOURS 54 #define DAYS * 24 HOURS
54 55
55 static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { 56 static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
56 [SCTP_CONNTRACK_CLOSED] = 10 SECS, 57 [SCTP_CONNTRACK_CLOSED] = 10 SECS,
57 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, 58 [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
58 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, 59 [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
59 [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, 60 [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
60 [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, 61 [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
61 [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, 62 [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
62 [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, 63 [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
63 [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, 64 [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
64 [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, 65 [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
65 }; 66 };
66 67
67 #define sNO SCTP_CONNTRACK_NONE 68 #define sNO SCTP_CONNTRACK_NONE
68 #define sCL SCTP_CONNTRACK_CLOSED 69 #define sCL SCTP_CONNTRACK_CLOSED
69 #define sCW SCTP_CONNTRACK_COOKIE_WAIT 70 #define sCW SCTP_CONNTRACK_COOKIE_WAIT
70 #define sCE SCTP_CONNTRACK_COOKIE_ECHOED 71 #define sCE SCTP_CONNTRACK_COOKIE_ECHOED
71 #define sES SCTP_CONNTRACK_ESTABLISHED 72 #define sES SCTP_CONNTRACK_ESTABLISHED
72 #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT 73 #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
73 #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD 74 #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
74 #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT 75 #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
75 #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT 76 #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
76 #define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED 77 #define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
77 #define sIV SCTP_CONNTRACK_MAX 78 #define sIV SCTP_CONNTRACK_MAX
78 79
79 /* 80 /*
80 These are the descriptions of the states: 81 These are the descriptions of the states:
81 82
82 NOTE: These state names are tantalizingly similar to the states of an 83 NOTE: These state names are tantalizingly similar to the states of an
83 SCTP endpoint. But the interpretation of the states is a little different, 84 SCTP endpoint. But the interpretation of the states is a little different,
84 considering that these are the states of the connection and not of an end 85 considering that these are the states of the connection and not of an end
85 point. Please note the subtleties. -Kiran 86 point. Please note the subtleties. -Kiran
86 87
87 NONE - Nothing so far. 88 NONE - Nothing so far.
88 COOKIE WAIT - We have seen an INIT chunk in the original direction, or also 89 COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
89 an INIT_ACK chunk in the reply direction. 90 an INIT_ACK chunk in the reply direction.
90 COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. 91 COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
91 ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. 92 ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
92 SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. 93 SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
93 SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. 94 SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
94 SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite 95 SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
95 to that of the SHUTDOWN chunk. 96 to that of the SHUTDOWN chunk.
96 CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of 97 CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
97 the SHUTDOWN chunk. Connection is closed. 98 the SHUTDOWN chunk. Connection is closed.
98 HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. 99 HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
99 HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to 100 HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
100 that of the HEARTBEAT chunk. Secondary connection is 101 that of the HEARTBEAT chunk. Secondary connection is
101 established. 102 established.
102 */ 103 */
103 104
104 /* TODO 105 /* TODO
105 - I have assumed that the first INIT is in the original direction. 106 - I have assumed that the first INIT is in the original direction.
106 This messes things when an INIT comes in the reply direction in CLOSED 107 This messes things when an INIT comes in the reply direction in CLOSED
107 state. 108 state.
108 - Check the error type in the reply dir before transitioning from 109 - Check the error type in the reply dir before transitioning from
109 cookie echoed to closed. 110 cookie echoed to closed.
110 - Sec 5.2.4 of RFC 2960 111 - Sec 5.2.4 of RFC 2960
111 - Full Multi Homing support. 112 - Full Multi Homing support.
112 */ 113 */
113 114
114 /* SCTP conntrack state transitions */ 115 /* SCTP conntrack state transitions */
115 static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { 116 static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
116 { 117 {
117 /* ORIGINAL */ 118 /* ORIGINAL */
118 /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ 119 /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
119 /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, 120 /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
120 /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, 121 /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
121 /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, 122 /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
122 /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, 123 /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
123 /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, 124 /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
124 /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ 125 /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
125 /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ 126 /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
126 /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ 127 /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
127 /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, 128 /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
128 /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, 129 /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
129 /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} 130 /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
130 }, 131 },
131 { 132 {
132 /* REPLY */ 133 /* REPLY */
133 /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ 134 /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
134 /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ 135 /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
135 /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, 136 /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
136 /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, 137 /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
137 /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, 138 /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
138 /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, 139 /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
139 /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, 140 /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
140 /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ 141 /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
141 /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, 142 /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
142 /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, 143 /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
143 /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, 144 /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
144 /* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} 145 /* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
145 } 146 }
146 }; 147 };
147 148
148 static inline struct nf_sctp_net *sctp_pernet(struct net *net) 149 static inline struct nf_sctp_net *sctp_pernet(struct net *net)
149 { 150 {
150 return &net->ct.nf_ct_proto.sctp; 151 return &net->ct.nf_ct_proto.sctp;
151 } 152 }
152 153
153 #ifdef CONFIG_NF_CONNTRACK_PROCFS 154 #ifdef CONFIG_NF_CONNTRACK_PROCFS
154 /* Print out the private part of the conntrack. */ 155 /* Print out the private part of the conntrack. */
155 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) 156 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
156 { 157 {
157 seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); 158 seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
158 } 159 }
159 #endif 160 #endif
160 161
161 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ 162 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
162 for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ 163 for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
163 (offset) < (skb)->len && \ 164 (offset) < (skb)->len && \
164 ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ 165 ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \
165 (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) 166 (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
166 167
167 /* Some validity checks to make sure the chunks are fine */ 168 /* Some validity checks to make sure the chunks are fine */
168 static int do_basic_checks(struct nf_conn *ct, 169 static int do_basic_checks(struct nf_conn *ct,
169 const struct sk_buff *skb, 170 const struct sk_buff *skb,
170 unsigned int dataoff, 171 unsigned int dataoff,
171 unsigned long *map) 172 unsigned long *map)
172 { 173 {
173 u_int32_t offset, count; 174 u_int32_t offset, count;
174 struct sctp_chunkhdr _sch, *sch; 175 struct sctp_chunkhdr _sch, *sch;
175 int flag; 176 int flag;
176 177
177 flag = 0; 178 flag = 0;
178 179
179 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 180 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
180 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type); 181 pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
181 182
182 if (sch->type == SCTP_CID_INIT || 183 if (sch->type == SCTP_CID_INIT ||
183 sch->type == SCTP_CID_INIT_ACK || 184 sch->type == SCTP_CID_INIT_ACK ||
184 sch->type == SCTP_CID_SHUTDOWN_COMPLETE) 185 sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
185 flag = 1; 186 flag = 1;
186 187
187 /* 188 /*
188 * Cookie Ack/Echo chunks not the first OR 189 * Cookie Ack/Echo chunks not the first OR
189 * Init / Init Ack / Shutdown compl chunks not the only chunks 190 * Init / Init Ack / Shutdown compl chunks not the only chunks
190 * OR zero-length. 191 * OR zero-length.
191 */ 192 */
192 if (((sch->type == SCTP_CID_COOKIE_ACK || 193 if (((sch->type == SCTP_CID_COOKIE_ACK ||
193 sch->type == SCTP_CID_COOKIE_ECHO || 194 sch->type == SCTP_CID_COOKIE_ECHO ||
194 flag) && 195 flag) &&
195 count != 0) || !sch->length) { 196 count != 0) || !sch->length) {
196 pr_debug("Basic checks failed\n"); 197 pr_debug("Basic checks failed\n");
197 return 1; 198 return 1;
198 } 199 }
199 200
200 if (map) 201 if (map)
201 set_bit(sch->type, map); 202 set_bit(sch->type, map);
202 } 203 }
203 204
204 pr_debug("Basic checks passed\n"); 205 pr_debug("Basic checks passed\n");
205 return count == 0; 206 return count == 0;
206 } 207 }
207 208
208 static int sctp_new_state(enum ip_conntrack_dir dir, 209 static int sctp_new_state(enum ip_conntrack_dir dir,
209 enum sctp_conntrack cur_state, 210 enum sctp_conntrack cur_state,
210 int chunk_type) 211 int chunk_type)
211 { 212 {
212 int i; 213 int i;
213 214
214 pr_debug("Chunk type: %d\n", chunk_type); 215 pr_debug("Chunk type: %d\n", chunk_type);
215 216
216 switch (chunk_type) { 217 switch (chunk_type) {
217 case SCTP_CID_INIT: 218 case SCTP_CID_INIT:
218 pr_debug("SCTP_CID_INIT\n"); 219 pr_debug("SCTP_CID_INIT\n");
219 i = 0; 220 i = 0;
220 break; 221 break;
221 case SCTP_CID_INIT_ACK: 222 case SCTP_CID_INIT_ACK:
222 pr_debug("SCTP_CID_INIT_ACK\n"); 223 pr_debug("SCTP_CID_INIT_ACK\n");
223 i = 1; 224 i = 1;
224 break; 225 break;
225 case SCTP_CID_ABORT: 226 case SCTP_CID_ABORT:
226 pr_debug("SCTP_CID_ABORT\n"); 227 pr_debug("SCTP_CID_ABORT\n");
227 i = 2; 228 i = 2;
228 break; 229 break;
229 case SCTP_CID_SHUTDOWN: 230 case SCTP_CID_SHUTDOWN:
230 pr_debug("SCTP_CID_SHUTDOWN\n"); 231 pr_debug("SCTP_CID_SHUTDOWN\n");
231 i = 3; 232 i = 3;
232 break; 233 break;
233 case SCTP_CID_SHUTDOWN_ACK: 234 case SCTP_CID_SHUTDOWN_ACK:
234 pr_debug("SCTP_CID_SHUTDOWN_ACK\n"); 235 pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
235 i = 4; 236 i = 4;
236 break; 237 break;
237 case SCTP_CID_ERROR: 238 case SCTP_CID_ERROR:
238 pr_debug("SCTP_CID_ERROR\n"); 239 pr_debug("SCTP_CID_ERROR\n");
239 i = 5; 240 i = 5;
240 break; 241 break;
241 case SCTP_CID_COOKIE_ECHO: 242 case SCTP_CID_COOKIE_ECHO:
242 pr_debug("SCTP_CID_COOKIE_ECHO\n"); 243 pr_debug("SCTP_CID_COOKIE_ECHO\n");
243 i = 6; 244 i = 6;
244 break; 245 break;
245 case SCTP_CID_COOKIE_ACK: 246 case SCTP_CID_COOKIE_ACK:
246 pr_debug("SCTP_CID_COOKIE_ACK\n"); 247 pr_debug("SCTP_CID_COOKIE_ACK\n");
247 i = 7; 248 i = 7;
248 break; 249 break;
249 case SCTP_CID_SHUTDOWN_COMPLETE: 250 case SCTP_CID_SHUTDOWN_COMPLETE:
250 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); 251 pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
251 i = 8; 252 i = 8;
252 break; 253 break;
253 case SCTP_CID_HEARTBEAT: 254 case SCTP_CID_HEARTBEAT:
254 pr_debug("SCTP_CID_HEARTBEAT"); 255 pr_debug("SCTP_CID_HEARTBEAT");
255 i = 9; 256 i = 9;
256 break; 257 break;
257 case SCTP_CID_HEARTBEAT_ACK: 258 case SCTP_CID_HEARTBEAT_ACK:
258 pr_debug("SCTP_CID_HEARTBEAT_ACK"); 259 pr_debug("SCTP_CID_HEARTBEAT_ACK");
259 i = 10; 260 i = 10;
260 break; 261 break;
261 default: 262 default:
262 /* Other chunks like DATA or SACK do not change the state */ 263 /* Other chunks like DATA or SACK do not change the state */
263 pr_debug("Unknown chunk type, Will stay in %s\n", 264 pr_debug("Unknown chunk type, Will stay in %s\n",
264 sctp_conntrack_names[cur_state]); 265 sctp_conntrack_names[cur_state]);
265 return cur_state; 266 return cur_state;
266 } 267 }
267 268
268 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", 269 pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
269 dir, sctp_conntrack_names[cur_state], chunk_type, 270 dir, sctp_conntrack_names[cur_state], chunk_type,
270 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); 271 sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
271 272
272 return sctp_conntracks[dir][i][cur_state]; 273 return sctp_conntracks[dir][i][cur_state];
273 } 274 }
274 275
275 static unsigned int *sctp_get_timeouts(struct net *net)
276 {
277 return sctp_pernet(net)->timeouts;
278 }
279
280 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ 276 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
281 static int sctp_packet(struct nf_conn *ct, 277 static int sctp_packet(struct nf_conn *ct,
282 const struct sk_buff *skb, 278 const struct sk_buff *skb,
283 unsigned int dataoff, 279 unsigned int dataoff,
284 enum ip_conntrack_info ctinfo, 280 enum ip_conntrack_info ctinfo)
285 unsigned int *timeouts)
286 { 281 {
287 enum sctp_conntrack new_state, old_state; 282 enum sctp_conntrack new_state, old_state;
288 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 283 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
289 const struct sctphdr *sh; 284 const struct sctphdr *sh;
290 struct sctphdr _sctph; 285 struct sctphdr _sctph;
291 const struct sctp_chunkhdr *sch; 286 const struct sctp_chunkhdr *sch;
292 struct sctp_chunkhdr _sch; 287 struct sctp_chunkhdr _sch;
293 u_int32_t offset, count; 288 u_int32_t offset, count;
289 unsigned int *timeouts;
294 unsigned long map[256 / sizeof(unsigned long)] = { 0 }; 290 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
295 291
296 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 292 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
297 if (sh == NULL) 293 if (sh == NULL)
298 goto out; 294 goto out;
299 295
300 if (do_basic_checks(ct, skb, dataoff, map) != 0) 296 if (do_basic_checks(ct, skb, dataoff, map) != 0)
301 goto out; 297 goto out;
302 298
303 /* Check the verification tag (Sec 8.5) */ 299 /* Check the verification tag (Sec 8.5) */
304 if (!test_bit(SCTP_CID_INIT, map) && 300 if (!test_bit(SCTP_CID_INIT, map) &&
305 !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && 301 !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
306 !test_bit(SCTP_CID_COOKIE_ECHO, map) && 302 !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
307 !test_bit(SCTP_CID_ABORT, map) && 303 !test_bit(SCTP_CID_ABORT, map) &&
308 !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && 304 !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
309 !test_bit(SCTP_CID_HEARTBEAT, map) && 305 !test_bit(SCTP_CID_HEARTBEAT, map) &&
310 !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && 306 !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
311 sh->vtag != ct->proto.sctp.vtag[dir]) { 307 sh->vtag != ct->proto.sctp.vtag[dir]) {
312 pr_debug("Verification tag check failed\n"); 308 pr_debug("Verification tag check failed\n");
313 goto out; 309 goto out;
314 } 310 }
315 311
316 old_state = new_state = SCTP_CONNTRACK_NONE; 312 old_state = new_state = SCTP_CONNTRACK_NONE;
317 spin_lock_bh(&ct->lock); 313 spin_lock_bh(&ct->lock);
318 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 314 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
319 /* Special cases of Verification tag check (Sec 8.5.1) */ 315 /* Special cases of Verification tag check (Sec 8.5.1) */
320 if (sch->type == SCTP_CID_INIT) { 316 if (sch->type == SCTP_CID_INIT) {
321 /* Sec 8.5.1 (A) */ 317 /* Sec 8.5.1 (A) */
322 if (sh->vtag != 0) 318 if (sh->vtag != 0)
323 goto out_unlock; 319 goto out_unlock;
324 } else if (sch->type == SCTP_CID_ABORT) { 320 } else if (sch->type == SCTP_CID_ABORT) {
325 /* Sec 8.5.1 (B) */ 321 /* Sec 8.5.1 (B) */
326 if (sh->vtag != ct->proto.sctp.vtag[dir] && 322 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
327 sh->vtag != ct->proto.sctp.vtag[!dir]) 323 sh->vtag != ct->proto.sctp.vtag[!dir])
328 goto out_unlock; 324 goto out_unlock;
329 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 325 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
330 /* Sec 8.5.1 (C) */ 326 /* Sec 8.5.1 (C) */
331 if (sh->vtag != ct->proto.sctp.vtag[dir] && 327 if (sh->vtag != ct->proto.sctp.vtag[dir] &&
332 sh->vtag != ct->proto.sctp.vtag[!dir] && 328 sh->vtag != ct->proto.sctp.vtag[!dir] &&
333 sch->flags & SCTP_CHUNK_FLAG_T) 329 sch->flags & SCTP_CHUNK_FLAG_T)
334 goto out_unlock; 330 goto out_unlock;
335 } else if (sch->type == SCTP_CID_COOKIE_ECHO) { 331 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
336 /* Sec 8.5.1 (D) */ 332 /* Sec 8.5.1 (D) */
337 if (sh->vtag != ct->proto.sctp.vtag[dir]) 333 if (sh->vtag != ct->proto.sctp.vtag[dir])
338 goto out_unlock; 334 goto out_unlock;
339 } else if (sch->type == SCTP_CID_HEARTBEAT || 335 } else if (sch->type == SCTP_CID_HEARTBEAT ||
340 sch->type == SCTP_CID_HEARTBEAT_ACK) { 336 sch->type == SCTP_CID_HEARTBEAT_ACK) {
341 if (ct->proto.sctp.vtag[dir] == 0) { 337 if (ct->proto.sctp.vtag[dir] == 0) {
342 pr_debug("Setting vtag %x for dir %d\n", 338 pr_debug("Setting vtag %x for dir %d\n",
343 sh->vtag, dir); 339 sh->vtag, dir);
344 ct->proto.sctp.vtag[dir] = sh->vtag; 340 ct->proto.sctp.vtag[dir] = sh->vtag;
345 } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { 341 } else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
346 pr_debug("Verification tag check failed\n"); 342 pr_debug("Verification tag check failed\n");
347 goto out_unlock; 343 goto out_unlock;
348 } 344 }
349 } 345 }
350 346
351 old_state = ct->proto.sctp.state; 347 old_state = ct->proto.sctp.state;
352 new_state = sctp_new_state(dir, old_state, sch->type); 348 new_state = sctp_new_state(dir, old_state, sch->type);
353 349
354 /* Invalid */ 350 /* Invalid */
355 if (new_state == SCTP_CONNTRACK_MAX) { 351 if (new_state == SCTP_CONNTRACK_MAX) {
356 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u " 352 pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
357 "conntrack=%u\n", 353 "conntrack=%u\n",
358 dir, sch->type, old_state); 354 dir, sch->type, old_state);
359 goto out_unlock; 355 goto out_unlock;
360 } 356 }
361 357
362 /* If it is an INIT or an INIT ACK note down the vtag */ 358 /* If it is an INIT or an INIT ACK note down the vtag */
363 if (sch->type == SCTP_CID_INIT || 359 if (sch->type == SCTP_CID_INIT ||
364 sch->type == SCTP_CID_INIT_ACK) { 360 sch->type == SCTP_CID_INIT_ACK) {
365 struct sctp_inithdr _inithdr, *ih; 361 struct sctp_inithdr _inithdr, *ih;
366 362
367 ih = skb_header_pointer(skb, offset + sizeof(_sch), 363 ih = skb_header_pointer(skb, offset + sizeof(_sch),
368 sizeof(_inithdr), &_inithdr); 364 sizeof(_inithdr), &_inithdr);
369 if (ih == NULL) 365 if (ih == NULL)
370 goto out_unlock; 366 goto out_unlock;
371 pr_debug("Setting vtag %x for dir %d\n", 367 pr_debug("Setting vtag %x for dir %d\n",
372 ih->init_tag, !dir); 368 ih->init_tag, !dir);
373 ct->proto.sctp.vtag[!dir] = ih->init_tag; 369 ct->proto.sctp.vtag[!dir] = ih->init_tag;
374 } 370 }
375 371
376 ct->proto.sctp.state = new_state; 372 ct->proto.sctp.state = new_state;
377 if (old_state != new_state) 373 if (old_state != new_state)
378 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 374 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
379 } 375 }
380 spin_unlock_bh(&ct->lock); 376 spin_unlock_bh(&ct->lock);
381 377
378 timeouts = nf_ct_timeout_lookup(ct);
379 if (!timeouts)
380 timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
381
382 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 382 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
383 383
384 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && 384 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
385 dir == IP_CT_DIR_REPLY && 385 dir == IP_CT_DIR_REPLY &&
386 new_state == SCTP_CONNTRACK_ESTABLISHED) { 386 new_state == SCTP_CONNTRACK_ESTABLISHED) {
387 pr_debug("Setting assured bit\n"); 387 pr_debug("Setting assured bit\n");
388 set_bit(IPS_ASSURED_BIT, &ct->status); 388 set_bit(IPS_ASSURED_BIT, &ct->status);
389 nf_conntrack_event_cache(IPCT_ASSURED, ct); 389 nf_conntrack_event_cache(IPCT_ASSURED, ct);
390 } 390 }
391 391
392 return NF_ACCEPT; 392 return NF_ACCEPT;
393 393
394 out_unlock: 394 out_unlock:
395 spin_unlock_bh(&ct->lock); 395 spin_unlock_bh(&ct->lock);
396 out: 396 out:
397 return -NF_ACCEPT; 397 return -NF_ACCEPT;
398 } 398 }
399 399
400 /* Called when a new connection for this protocol found. */ 400 /* Called when a new connection for this protocol found. */
401 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, 401 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
402 unsigned int dataoff, unsigned int *timeouts) 402 unsigned int dataoff)
403 { 403 {
404 enum sctp_conntrack new_state; 404 enum sctp_conntrack new_state;
405 const struct sctphdr *sh; 405 const struct sctphdr *sh;
406 struct sctphdr _sctph; 406 struct sctphdr _sctph;
407 const struct sctp_chunkhdr *sch; 407 const struct sctp_chunkhdr *sch;
408 struct sctp_chunkhdr _sch; 408 struct sctp_chunkhdr _sch;
409 u_int32_t offset, count; 409 u_int32_t offset, count;
410 unsigned long map[256 / sizeof(unsigned long)] = { 0 }; 410 unsigned long map[256 / sizeof(unsigned long)] = { 0 };
411 411
412 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); 412 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
413 if (sh == NULL) 413 if (sh == NULL)
414 return false; 414 return false;
415 415
416 if (do_basic_checks(ct, skb, dataoff, map) != 0) 416 if (do_basic_checks(ct, skb, dataoff, map) != 0)
417 return false; 417 return false;
418 418
419 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ 419 /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
420 if (test_bit(SCTP_CID_ABORT, map) || 420 if (test_bit(SCTP_CID_ABORT, map) ||
421 test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) || 421 test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) ||
422 test_bit(SCTP_CID_COOKIE_ACK, map)) 422 test_bit(SCTP_CID_COOKIE_ACK, map))
423 return false; 423 return false;
424 424
425 memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); 425 memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
426 new_state = SCTP_CONNTRACK_MAX; 426 new_state = SCTP_CONNTRACK_MAX;
427 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 427 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
428 /* Don't need lock here: this conntrack not in circulation yet */ 428 /* Don't need lock here: this conntrack not in circulation yet */
429 new_state = sctp_new_state(IP_CT_DIR_ORIGINAL, 429 new_state = sctp_new_state(IP_CT_DIR_ORIGINAL,
430 SCTP_CONNTRACK_NONE, sch->type); 430 SCTP_CONNTRACK_NONE, sch->type);
431 431
432 /* Invalid: delete conntrack */ 432 /* Invalid: delete conntrack */
433 if (new_state == SCTP_CONNTRACK_NONE || 433 if (new_state == SCTP_CONNTRACK_NONE ||
434 new_state == SCTP_CONNTRACK_MAX) { 434 new_state == SCTP_CONNTRACK_MAX) {
435 pr_debug("nf_conntrack_sctp: invalid new deleting.\n"); 435 pr_debug("nf_conntrack_sctp: invalid new deleting.\n");
436 return false; 436 return false;
437 } 437 }
438 438
439 /* Copy the vtag into the state info */ 439 /* Copy the vtag into the state info */
440 if (sch->type == SCTP_CID_INIT) { 440 if (sch->type == SCTP_CID_INIT) {
441 struct sctp_inithdr _inithdr, *ih; 441 struct sctp_inithdr _inithdr, *ih;
442 /* Sec 8.5.1 (A) */ 442 /* Sec 8.5.1 (A) */
443 if (sh->vtag) 443 if (sh->vtag)
444 return false; 444 return false;
445 445
446 ih = skb_header_pointer(skb, offset + sizeof(_sch), 446 ih = skb_header_pointer(skb, offset + sizeof(_sch),
447 sizeof(_inithdr), &_inithdr); 447 sizeof(_inithdr), &_inithdr);
448 if (!ih) 448 if (!ih)
449 return false; 449 return false;
450 450
451 pr_debug("Setting vtag %x for new conn\n", 451 pr_debug("Setting vtag %x for new conn\n",
452 ih->init_tag); 452 ih->init_tag);
453 453
454 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; 454 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
455 } else if (sch->type == SCTP_CID_HEARTBEAT) { 455 } else if (sch->type == SCTP_CID_HEARTBEAT) {
456 pr_debug("Setting vtag %x for secondary conntrack\n", 456 pr_debug("Setting vtag %x for secondary conntrack\n",
457 sh->vtag); 457 sh->vtag);
458 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; 458 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
459 } 459 }
460 /* If it is a shutdown ack OOTB packet, we expect a return 460 /* If it is a shutdown ack OOTB packet, we expect a return
461 shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ 461 shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
462 else { 462 else {
463 pr_debug("Setting vtag %x for new conn OOTB\n", 463 pr_debug("Setting vtag %x for new conn OOTB\n",
464 sh->vtag); 464 sh->vtag);
465 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; 465 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
466 } 466 }
467 467
468 ct->proto.sctp.state = new_state; 468 ct->proto.sctp.state = new_state;
469 } 469 }
470 470
471 return true; 471 return true;
472 } 472 }
473 473
474 static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, 474 static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb,
475 unsigned int dataoff, 475 unsigned int dataoff,
476 u8 pf, unsigned int hooknum) 476 u8 pf, unsigned int hooknum)
477 { 477 {
478 const struct sctphdr *sh; 478 const struct sctphdr *sh;
479 const char *logmsg; 479 const char *logmsg;
480 480
481 if (skb->len < dataoff + sizeof(struct sctphdr)) { 481 if (skb->len < dataoff + sizeof(struct sctphdr)) {
482 logmsg = "nf_ct_sctp: short packet "; 482 logmsg = "nf_ct_sctp: short packet ";
483 goto out_invalid; 483 goto out_invalid;
484 } 484 }
485 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 485 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
486 skb->ip_summed == CHECKSUM_NONE) { 486 skb->ip_summed == CHECKSUM_NONE) {
487 if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) { 487 if (!skb_make_writable(skb, dataoff + sizeof(struct sctphdr))) {
488 logmsg = "nf_ct_sctp: failed to read header "; 488 logmsg = "nf_ct_sctp: failed to read header ";
489 goto out_invalid; 489 goto out_invalid;
490 } 490 }
491 sh = (const struct sctphdr *)(skb->data + dataoff); 491 sh = (const struct sctphdr *)(skb->data + dataoff);
492 if (sh->checksum != sctp_compute_cksum(skb, dataoff)) { 492 if (sh->checksum != sctp_compute_cksum(skb, dataoff)) {
493 logmsg = "nf_ct_sctp: bad CRC "; 493 logmsg = "nf_ct_sctp: bad CRC ";
494 goto out_invalid; 494 goto out_invalid;
495 } 495 }
496 skb->ip_summed = CHECKSUM_UNNECESSARY; 496 skb->ip_summed = CHECKSUM_UNNECESSARY;
497 } 497 }
498 return NF_ACCEPT; 498 return NF_ACCEPT;
499 out_invalid: 499 out_invalid:
500 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); 500 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg);
501 return -NF_ACCEPT; 501 return -NF_ACCEPT;
502 } 502 }
503 503
504 static bool sctp_can_early_drop(const struct nf_conn *ct) 504 static bool sctp_can_early_drop(const struct nf_conn *ct)
505 { 505 {
506 switch (ct->proto.sctp.state) { 506 switch (ct->proto.sctp.state) {
507 case SCTP_CONNTRACK_SHUTDOWN_SENT: 507 case SCTP_CONNTRACK_SHUTDOWN_SENT:
508 case SCTP_CONNTRACK_SHUTDOWN_RECD: 508 case SCTP_CONNTRACK_SHUTDOWN_RECD:
509 case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: 509 case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT:
510 return true; 510 return true;
511 default: 511 default:
512 break; 512 break;
513 } 513 }
514 514
515 return false; 515 return false;
516 } 516 }
517 517
518 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 518 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
519 519
520 #include <linux/netfilter/nfnetlink.h> 520 #include <linux/netfilter/nfnetlink.h>
521 #include <linux/netfilter/nfnetlink_conntrack.h> 521 #include <linux/netfilter/nfnetlink_conntrack.h>
522 522
523 static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 523 static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
524 struct nf_conn *ct) 524 struct nf_conn *ct)
525 { 525 {
526 struct nlattr *nest_parms; 526 struct nlattr *nest_parms;
527 527
528 spin_lock_bh(&ct->lock); 528 spin_lock_bh(&ct->lock);
529 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); 529 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
530 if (!nest_parms) 530 if (!nest_parms)
531 goto nla_put_failure; 531 goto nla_put_failure;
532 532
533 if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state) || 533 if (nla_put_u8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state) ||
534 nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, 534 nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_ORIGINAL,
535 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) || 535 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL]) ||
536 nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY, 536 nla_put_be32(skb, CTA_PROTOINFO_SCTP_VTAG_REPLY,
537 ct->proto.sctp.vtag[IP_CT_DIR_REPLY])) 537 ct->proto.sctp.vtag[IP_CT_DIR_REPLY]))
538 goto nla_put_failure; 538 goto nla_put_failure;
539 539
540 spin_unlock_bh(&ct->lock); 540 spin_unlock_bh(&ct->lock);
541 541
542 nla_nest_end(skb, nest_parms); 542 nla_nest_end(skb, nest_parms);
543 543
544 return 0; 544 return 0;
545 545
546 nla_put_failure: 546 nla_put_failure:
547 spin_unlock_bh(&ct->lock); 547 spin_unlock_bh(&ct->lock);
548 return -1; 548 return -1;
549 } 549 }
550 550
551 static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { 551 static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = {
552 [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, 552 [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 },
553 [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, 553 [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 },
554 [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, 554 [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 },
555 }; 555 };
556 556
557 #define SCTP_NLATTR_SIZE ( \ 557 #define SCTP_NLATTR_SIZE ( \
558 NLA_ALIGN(NLA_HDRLEN + 1) + \ 558 NLA_ALIGN(NLA_HDRLEN + 1) + \
559 NLA_ALIGN(NLA_HDRLEN + 4) + \ 559 NLA_ALIGN(NLA_HDRLEN + 4) + \
560 NLA_ALIGN(NLA_HDRLEN + 4)) 560 NLA_ALIGN(NLA_HDRLEN + 4))
561 561
562 static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) 562 static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
563 { 563 {
564 struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; 564 struct nlattr *attr = cda[CTA_PROTOINFO_SCTP];
565 struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; 565 struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1];
566 int err; 566 int err;
567 567
568 /* updates may not contain the internal protocol info, skip parsing */ 568 /* updates may not contain the internal protocol info, skip parsing */
569 if (!attr) 569 if (!attr)
570 return 0; 570 return 0;
571 571
572 err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr, 572 err = nla_parse_nested(tb, CTA_PROTOINFO_SCTP_MAX, attr,
573 sctp_nla_policy, NULL); 573 sctp_nla_policy, NULL);
574 if (err < 0) 574 if (err < 0)
575 return err; 575 return err;
576 576
577 if (!tb[CTA_PROTOINFO_SCTP_STATE] || 577 if (!tb[CTA_PROTOINFO_SCTP_STATE] ||
578 !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || 578 !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] ||
579 !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) 579 !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
580 return -EINVAL; 580 return -EINVAL;
581 581
582 spin_lock_bh(&ct->lock); 582 spin_lock_bh(&ct->lock);
583 ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); 583 ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
584 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = 584 ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
585 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); 585 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
586 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = 586 ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
587 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); 587 nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
588 spin_unlock_bh(&ct->lock); 588 spin_unlock_bh(&ct->lock);
589 589
590 return 0; 590 return 0;
591 } 591 }
592 #endif 592 #endif
593 593
594 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 594 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
595 595
596 #include <linux/netfilter/nfnetlink.h> 596 #include <linux/netfilter/nfnetlink.h>
597 #include <linux/netfilter/nfnetlink_cttimeout.h> 597 #include <linux/netfilter/nfnetlink_cttimeout.h>
598 598
599 static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], 599 static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
600 struct net *net, void *data) 600 struct net *net, void *data)
601 { 601 {
602 unsigned int *timeouts = data; 602 unsigned int *timeouts = data;
603 struct nf_sctp_net *sn = sctp_pernet(net); 603 struct nf_sctp_net *sn = sctp_pernet(net);
604 int i; 604 int i;
605 605
606 /* set default SCTP timeouts. */ 606 /* set default SCTP timeouts. */
607 for (i=0; i<SCTP_CONNTRACK_MAX; i++) 607 for (i=0; i<SCTP_CONNTRACK_MAX; i++)
608 timeouts[i] = sn->timeouts[i]; 608 timeouts[i] = sn->timeouts[i];
609 609
610 /* there's a 1:1 mapping between attributes and protocol states. */ 610 /* there's a 1:1 mapping between attributes and protocol states. */
611 for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { 611 for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) {
612 if (tb[i]) { 612 if (tb[i]) {
613 timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ; 613 timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
614 } 614 }
615 } 615 }
616 return 0; 616 return 0;
617 } 617 }
618 618
619 static int 619 static int
620 sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 620 sctp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
621 { 621 {
622 const unsigned int *timeouts = data; 622 const unsigned int *timeouts = data;
623 int i; 623 int i;
624 624
625 for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) { 625 for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i<CTA_TIMEOUT_SCTP_MAX+1; i++) {
626 if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ))) 626 if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
627 goto nla_put_failure; 627 goto nla_put_failure;
628 } 628 }
629 return 0; 629 return 0;
630 630
631 nla_put_failure: 631 nla_put_failure:
632 return -ENOSPC; 632 return -ENOSPC;
633 } 633 }
634 634
635 static const struct nla_policy 635 static const struct nla_policy
636 sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { 636 sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
637 [CTA_TIMEOUT_SCTP_CLOSED] = { .type = NLA_U32 }, 637 [CTA_TIMEOUT_SCTP_CLOSED] = { .type = NLA_U32 },
638 [CTA_TIMEOUT_SCTP_COOKIE_WAIT] = { .type = NLA_U32 }, 638 [CTA_TIMEOUT_SCTP_COOKIE_WAIT] = { .type = NLA_U32 },
639 [CTA_TIMEOUT_SCTP_COOKIE_ECHOED] = { .type = NLA_U32 }, 639 [CTA_TIMEOUT_SCTP_COOKIE_ECHOED] = { .type = NLA_U32 },
640 [CTA_TIMEOUT_SCTP_ESTABLISHED] = { .type = NLA_U32 }, 640 [CTA_TIMEOUT_SCTP_ESTABLISHED] = { .type = NLA_U32 },
641 [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, 641 [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 },
642 [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, 642 [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 },
643 [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, 643 [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
644 [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, 644 [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
645 [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, 645 [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
646 }; 646 };
647 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 647 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
648 648
649 649
650 #ifdef CONFIG_SYSCTL 650 #ifdef CONFIG_SYSCTL
651 static struct ctl_table sctp_sysctl_table[] = { 651 static struct ctl_table sctp_sysctl_table[] = {
652 { 652 {
653 .procname = "nf_conntrack_sctp_timeout_closed", 653 .procname = "nf_conntrack_sctp_timeout_closed",
654 .maxlen = sizeof(unsigned int), 654 .maxlen = sizeof(unsigned int),
655 .mode = 0644, 655 .mode = 0644,
656 .proc_handler = proc_dointvec_jiffies, 656 .proc_handler = proc_dointvec_jiffies,
657 }, 657 },
658 { 658 {
659 .procname = "nf_conntrack_sctp_timeout_cookie_wait", 659 .procname = "nf_conntrack_sctp_timeout_cookie_wait",
660 .maxlen = sizeof(unsigned int), 660 .maxlen = sizeof(unsigned int),
661 .mode = 0644, 661 .mode = 0644,
662 .proc_handler = proc_dointvec_jiffies, 662 .proc_handler = proc_dointvec_jiffies,
663 }, 663 },
664 { 664 {
665 .procname = "nf_conntrack_sctp_timeout_cookie_echoed", 665 .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
666 .maxlen = sizeof(unsigned int), 666 .maxlen = sizeof(unsigned int),
667 .mode = 0644, 667 .mode = 0644,
668 .proc_handler = proc_dointvec_jiffies, 668 .proc_handler = proc_dointvec_jiffies,
669 }, 669 },
670 { 670 {
671 .procname = "nf_conntrack_sctp_timeout_established", 671 .procname = "nf_conntrack_sctp_timeout_established",
672 .maxlen = sizeof(unsigned int), 672 .maxlen = sizeof(unsigned int),
673 .mode = 0644, 673 .mode = 0644,
674 .proc_handler = proc_dointvec_jiffies, 674 .proc_handler = proc_dointvec_jiffies,
675 }, 675 },
676 { 676 {
677 .procname = "nf_conntrack_sctp_timeout_shutdown_sent", 677 .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
678 .maxlen = sizeof(unsigned int), 678 .maxlen = sizeof(unsigned int),
679 .mode = 0644, 679 .mode = 0644,
680 .proc_handler = proc_dointvec_jiffies, 680 .proc_handler = proc_dointvec_jiffies,
681 }, 681 },
682 { 682 {
683 .procname = "nf_conntrack_sctp_timeout_shutdown_recd", 683 .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
684 .maxlen = sizeof(unsigned int), 684 .maxlen = sizeof(unsigned int),
685 .mode = 0644, 685 .mode = 0644,
686 .proc_handler = proc_dointvec_jiffies, 686 .proc_handler = proc_dointvec_jiffies,
687 }, 687 },
688 { 688 {
689 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", 689 .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
690 .maxlen = sizeof(unsigned int), 690 .maxlen = sizeof(unsigned int),
691 .mode = 0644, 691 .mode = 0644,
692 .proc_handler = proc_dointvec_jiffies, 692 .proc_handler = proc_dointvec_jiffies,
693 }, 693 },
694 { 694 {
695 .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", 695 .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
696 .maxlen = sizeof(unsigned int), 696 .maxlen = sizeof(unsigned int),
697 .mode = 0644, 697 .mode = 0644,
698 .proc_handler = proc_dointvec_jiffies, 698 .proc_handler = proc_dointvec_jiffies,
699 }, 699 },
700 { 700 {
701 .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", 701 .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
702 .maxlen = sizeof(unsigned int), 702 .maxlen = sizeof(unsigned int),
703 .mode = 0644, 703 .mode = 0644,
704 .proc_handler = proc_dointvec_jiffies, 704 .proc_handler = proc_dointvec_jiffies,
705 }, 705 },
706 { } 706 { }
707 }; 707 };
708 #endif 708 #endif
709 709
710 static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, 710 static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
711 struct nf_sctp_net *sn) 711 struct nf_sctp_net *sn)
712 { 712 {
713 #ifdef CONFIG_SYSCTL 713 #ifdef CONFIG_SYSCTL
714 if (pn->ctl_table) 714 if (pn->ctl_table)
715 return 0; 715 return 0;
716 716
717 pn->ctl_table = kmemdup(sctp_sysctl_table, 717 pn->ctl_table = kmemdup(sctp_sysctl_table,
718 sizeof(sctp_sysctl_table), 718 sizeof(sctp_sysctl_table),
719 GFP_KERNEL); 719 GFP_KERNEL);
720 if (!pn->ctl_table) 720 if (!pn->ctl_table)
721 return -ENOMEM; 721 return -ENOMEM;
722 722
723 pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; 723 pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
724 pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; 724 pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
725 pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; 725 pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
726 pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; 726 pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
727 pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; 727 pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
728 pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; 728 pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
729 pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; 729 pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
730 pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; 730 pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
731 pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; 731 pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
732 #endif 732 #endif
733 return 0; 733 return 0;
734 } 734 }
735 735
736 static int sctp_init_net(struct net *net, u_int16_t proto) 736 static int sctp_init_net(struct net *net, u_int16_t proto)
737 { 737 {
738 struct nf_sctp_net *sn = sctp_pernet(net); 738 struct nf_sctp_net *sn = sctp_pernet(net);
739 struct nf_proto_net *pn = &sn->pn; 739 struct nf_proto_net *pn = &sn->pn;
740 740
741 if (!pn->users) { 741 if (!pn->users) {
742 int i; 742 int i;
743 743
744 for (i = 0; i < SCTP_CONNTRACK_MAX; i++) 744 for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
745 sn->timeouts[i] = sctp_timeouts[i]; 745 sn->timeouts[i] = sctp_timeouts[i];
746 } 746 }
747 747
748 return sctp_kmemdup_sysctl_table(pn, sn); 748 return sctp_kmemdup_sysctl_table(pn, sn);
749 } 749 }
750 750
751 static struct nf_proto_net *sctp_get_net_proto(struct net *net) 751 static struct nf_proto_net *sctp_get_net_proto(struct net *net)
752 { 752 {
753 return &net->ct.nf_ct_proto.sctp.pn; 753 return &net->ct.nf_ct_proto.sctp.pn;
754 } 754 }
755 755
756 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { 756 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
757 .l3proto = PF_INET, 757 .l3proto = PF_INET,
758 .l4proto = IPPROTO_SCTP, 758 .l4proto = IPPROTO_SCTP,
759 #ifdef CONFIG_NF_CONNTRACK_PROCFS 759 #ifdef CONFIG_NF_CONNTRACK_PROCFS
760 .print_conntrack = sctp_print_conntrack, 760 .print_conntrack = sctp_print_conntrack,
761 #endif 761 #endif
762 .packet = sctp_packet, 762 .packet = sctp_packet,
763 .get_timeouts = sctp_get_timeouts,
764 .new = sctp_new, 763 .new = sctp_new,
765 .error = sctp_error, 764 .error = sctp_error,
766 .can_early_drop = sctp_can_early_drop, 765 .can_early_drop = sctp_can_early_drop,
767 .me = THIS_MODULE, 766 .me = THIS_MODULE,
768 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 767 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
769 .nlattr_size = SCTP_NLATTR_SIZE, 768 .nlattr_size = SCTP_NLATTR_SIZE,
770 .to_nlattr = sctp_to_nlattr, 769 .to_nlattr = sctp_to_nlattr,
771 .from_nlattr = nlattr_to_sctp, 770 .from_nlattr = nlattr_to_sctp,
772 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 771 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
773 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 772 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
774 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 773 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
775 .nla_policy = nf_ct_port_nla_policy, 774 .nla_policy = nf_ct_port_nla_policy,
776 #endif 775 #endif
777 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 776 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
778 .ctnl_timeout = { 777 .ctnl_timeout = {
779 .nlattr_to_obj = sctp_timeout_nlattr_to_obj, 778 .nlattr_to_obj = sctp_timeout_nlattr_to_obj,
780 .obj_to_nlattr = sctp_timeout_obj_to_nlattr, 779 .obj_to_nlattr = sctp_timeout_obj_to_nlattr,
781 .nlattr_max = CTA_TIMEOUT_SCTP_MAX, 780 .nlattr_max = CTA_TIMEOUT_SCTP_MAX,
782 .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, 781 .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
783 .nla_policy = sctp_timeout_nla_policy, 782 .nla_policy = sctp_timeout_nla_policy,
784 }, 783 },
785 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 784 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
786 .init_net = sctp_init_net, 785 .init_net = sctp_init_net,
787 .get_net_proto = sctp_get_net_proto, 786 .get_net_proto = sctp_get_net_proto,
788 }; 787 };
789 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); 788 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
790 789
791 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { 790 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
792 .l3proto = PF_INET6, 791 .l3proto = PF_INET6,
793 .l4proto = IPPROTO_SCTP, 792 .l4proto = IPPROTO_SCTP,
794 #ifdef CONFIG_NF_CONNTRACK_PROCFS 793 #ifdef CONFIG_NF_CONNTRACK_PROCFS
795 .print_conntrack = sctp_print_conntrack, 794 .print_conntrack = sctp_print_conntrack,
796 #endif 795 #endif
797 .packet = sctp_packet, 796 .packet = sctp_packet,
798 .get_timeouts = sctp_get_timeouts,
799 .new = sctp_new, 797 .new = sctp_new,
800 .error = sctp_error, 798 .error = sctp_error,
801 .can_early_drop = sctp_can_early_drop, 799 .can_early_drop = sctp_can_early_drop,
802 .me = THIS_MODULE, 800 .me = THIS_MODULE,
803 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 801 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
804 .nlattr_size = SCTP_NLATTR_SIZE, 802 .nlattr_size = SCTP_NLATTR_SIZE,
805 .to_nlattr = sctp_to_nlattr, 803 .to_nlattr = sctp_to_nlattr,
806 .from_nlattr = nlattr_to_sctp, 804 .from_nlattr = nlattr_to_sctp,
807 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 805 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
808 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 806 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
809 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 807 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
810 .nla_policy = nf_ct_port_nla_policy, 808 .nla_policy = nf_ct_port_nla_policy,
811 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 809 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
812 .ctnl_timeout = { 810 .ctnl_timeout = {
813 .nlattr_to_obj = sctp_timeout_nlattr_to_obj, 811 .nlattr_to_obj = sctp_timeout_nlattr_to_obj,
814 .obj_to_nlattr = sctp_timeout_obj_to_nlattr, 812 .obj_to_nlattr = sctp_timeout_obj_to_nlattr,
815 .nlattr_max = CTA_TIMEOUT_SCTP_MAX, 813 .nlattr_max = CTA_TIMEOUT_SCTP_MAX,
816 .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX, 814 .obj_size = sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
817 .nla_policy = sctp_timeout_nla_policy, 815 .nla_policy = sctp_timeout_nla_policy,
818 }, 816 },
819 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 817 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
net/netfilter/nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11 #include <linux/types.h> 11 #include <linux/types.h>
12 #include <linux/timer.h> 12 #include <linux/timer.h>
13 #include <linux/module.h> 13 #include <linux/module.h>
14 #include <linux/in.h> 14 #include <linux/in.h>
15 #include <linux/tcp.h> 15 #include <linux/tcp.h>
16 #include <linux/spinlock.h> 16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h> 17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h> 18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h> 19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h> 20 #include <asm/unaligned.h>
21 21
22 #include <net/tcp.h> 22 #include <net/tcp.h>
23 23
24 #include <linux/netfilter.h> 24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h> 25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h> 26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h> 27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h> 28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h> 29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h> 30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h> 31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
32 #include <net/netfilter/nf_log.h> 33 #include <net/netfilter/nf_log.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 36
36 /* "Be conservative in what you do, 37 /* "Be conservative in what you do,
37 be liberal in what you accept from others." 38 be liberal in what you accept from others."
38 If it's non-zero, we mark only out of window RST segments as INVALID. */ 39 If it's non-zero, we mark only out of window RST segments as INVALID. */
39 static int nf_ct_tcp_be_liberal __read_mostly = 0; 40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40 41
41 /* If it is set to zero, we disable picking up already established 42 /* If it is set to zero, we disable picking up already established
42 connections. */ 43 connections. */
43 static int nf_ct_tcp_loose __read_mostly = 1; 44 static int nf_ct_tcp_loose __read_mostly = 1;
44 45
45 /* Max number of the retransmitted packets without receiving an (acceptable) 46 /* Max number of the retransmitted packets without receiving an (acceptable)
46 ACK from the destination. If this number is reached, a shorter timer 47 ACK from the destination. If this number is reached, a shorter timer
47 will be started. */ 48 will be started. */
48 static int nf_ct_tcp_max_retrans __read_mostly = 3; 49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49 50
50 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 51 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51 closely. They're more complex. --RR */ 52 closely. They're more complex. --RR */
52 53
53 static const char *const tcp_conntrack_names[] = { 54 static const char *const tcp_conntrack_names[] = {
54 "NONE", 55 "NONE",
55 "SYN_SENT", 56 "SYN_SENT",
56 "SYN_RECV", 57 "SYN_RECV",
57 "ESTABLISHED", 58 "ESTABLISHED",
58 "FIN_WAIT", 59 "FIN_WAIT",
59 "CLOSE_WAIT", 60 "CLOSE_WAIT",
60 "LAST_ACK", 61 "LAST_ACK",
61 "TIME_WAIT", 62 "TIME_WAIT",
62 "CLOSE", 63 "CLOSE",
63 "SYN_SENT2", 64 "SYN_SENT2",
64 }; 65 };
65 66
66 #define SECS * HZ 67 #define SECS * HZ
67 #define MINS * 60 SECS 68 #define MINS * 60 SECS
68 #define HOURS * 60 MINS 69 #define HOURS * 60 MINS
69 #define DAYS * 24 HOURS 70 #define DAYS * 24 HOURS
70 71
71 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = { 72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
72 [TCP_CONNTRACK_SYN_SENT] = 2 MINS, 73 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
73 [TCP_CONNTRACK_SYN_RECV] = 60 SECS, 74 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
74 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS, 75 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
75 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS, 76 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
76 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS, 77 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
77 [TCP_CONNTRACK_LAST_ACK] = 30 SECS, 78 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
78 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, 79 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
79 [TCP_CONNTRACK_CLOSE] = 10 SECS, 80 [TCP_CONNTRACK_CLOSE] = 10 SECS,
80 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, 81 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
81 /* RFC1122 says the R2 limit should be at least 100 seconds. 82 /* RFC1122 says the R2 limit should be at least 100 seconds.
82 Linux uses 15 packets as limit, which corresponds 83 Linux uses 15 packets as limit, which corresponds
83 to ~13-30min depending on RTO. */ 84 to ~13-30min depending on RTO. */
84 [TCP_CONNTRACK_RETRANS] = 5 MINS, 85 [TCP_CONNTRACK_RETRANS] = 5 MINS,
85 [TCP_CONNTRACK_UNACK] = 5 MINS, 86 [TCP_CONNTRACK_UNACK] = 5 MINS,
86 }; 87 };
87 88
88 #define sNO TCP_CONNTRACK_NONE 89 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT 90 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV 91 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED 92 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT 93 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT 94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK 95 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT 96 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE 97 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2 98 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX 99 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE 100 #define sIG TCP_CONNTRACK_IGNORE
100 101
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */ 102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set { 103 enum tcp_bit_set {
103 TCP_SYN_SET, 104 TCP_SYN_SET,
104 TCP_SYNACK_SET, 105 TCP_SYNACK_SET,
105 TCP_FIN_SET, 106 TCP_FIN_SET,
106 TCP_ACK_SET, 107 TCP_ACK_SET,
107 TCP_RST_SET, 108 TCP_RST_SET,
108 TCP_NONE_SET, 109 TCP_NONE_SET,
109 }; 110 };
110 111
111 /* 112 /*
112 * The TCP state transition table needs a few words... 113 * The TCP state transition table needs a few words...
113 * 114 *
114 * We are the man in the middle. All the packets go through us 115 * We are the man in the middle. All the packets go through us
115 * but might get lost in transit to the destination. 116 * but might get lost in transit to the destination.
116 * It is assumed that the destinations can't receive segments 117 * It is assumed that the destinations can't receive segments
117 * we haven't seen. 118 * we haven't seen.
118 * 119 *
119 * The checked segment is in window, but our windows are *not* 120 * The checked segment is in window, but our windows are *not*
120 * equivalent with the ones of the sender/receiver. We always 121 * equivalent with the ones of the sender/receiver. We always
121 * try to guess the state of the current sender. 122 * try to guess the state of the current sender.
122 * 123 *
123 * The meaning of the states are: 124 * The meaning of the states are:
124 * 125 *
125 * NONE: initial state 126 * NONE: initial state
126 * SYN_SENT: SYN-only packet seen 127 * SYN_SENT: SYN-only packet seen
127 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open 128 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
128 * SYN_RECV: SYN-ACK packet seen 129 * SYN_RECV: SYN-ACK packet seen
129 * ESTABLISHED: ACK packet seen 130 * ESTABLISHED: ACK packet seen
130 * FIN_WAIT: FIN packet seen 131 * FIN_WAIT: FIN packet seen
131 * CLOSE_WAIT: ACK seen (after FIN) 132 * CLOSE_WAIT: ACK seen (after FIN)
132 * LAST_ACK: FIN seen (after FIN) 133 * LAST_ACK: FIN seen (after FIN)
133 * TIME_WAIT: last ACK seen 134 * TIME_WAIT: last ACK seen
134 * CLOSE: closed connection (RST) 135 * CLOSE: closed connection (RST)
135 * 136 *
136 * Packets marked as IGNORED (sIG): 137 * Packets marked as IGNORED (sIG):
137 * if they may be either invalid or valid 138 * if they may be either invalid or valid
138 * and the receiver may send back a connection 139 * and the receiver may send back a connection
139 * closing RST or a SYN/ACK. 140 * closing RST or a SYN/ACK.
140 * 141 *
141 * Packets marked as INVALID (sIV): 142 * Packets marked as INVALID (sIV):
142 * if we regard them as truly invalid packets 143 * if we regard them as truly invalid packets
143 */ 144 */
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { 145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145 { 146 {
146 /* ORIGINAL */ 147 /* ORIGINAL */
147 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 148 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
148 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, 149 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149 /* 150 /*
150 * sNO -> sSS Initialize a new connection 151 * sNO -> sSS Initialize a new connection
151 * sSS -> sSS Retransmitted SYN 152 * sSS -> sSS Retransmitted SYN
152 * sS2 -> sS2 Late retransmitted SYN 153 * sS2 -> sS2 Late retransmitted SYN
153 * sSR -> sIG 154 * sSR -> sIG
154 * sES -> sIG Error: SYNs in window outside the SYN_SENT state 155 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
155 * are errors. Receiver will reply with RST 156 * are errors. Receiver will reply with RST
156 * and close the connection. 157 * and close the connection.
157 * Or we are not in sync and hold a dead connection. 158 * Or we are not in sync and hold a dead connection.
158 * sFW -> sIG 159 * sFW -> sIG
159 * sCW -> sIG 160 * sCW -> sIG
160 * sLA -> sIG 161 * sLA -> sIG
161 * sTW -> sSS Reopened connection (RFC 1122). 162 * sTW -> sSS Reopened connection (RFC 1122).
162 * sCL -> sSS 163 * sCL -> sSS
163 */ 164 */
164 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 165 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
165 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, 166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166 /* 167 /*
167 * sNO -> sIV Too late and no reason to do anything 168 * sNO -> sIV Too late and no reason to do anything
168 * sSS -> sIV Client can't send SYN and then SYN/ACK 169 * sSS -> sIV Client can't send SYN and then SYN/ACK
169 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open 170 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
170 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open 171 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
171 * sES -> sIV Invalid SYN/ACK packets sent by the client 172 * sES -> sIV Invalid SYN/ACK packets sent by the client
172 * sFW -> sIV 173 * sFW -> sIV
173 * sCW -> sIV 174 * sCW -> sIV
174 * sLA -> sIV 175 * sLA -> sIV
175 * sTW -> sIV 176 * sTW -> sIV
176 * sCL -> sIV 177 * sCL -> sIV
177 */ 178 */
178 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 179 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
179 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 180 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180 /* 181 /*
181 * sNO -> sIV Too late and no reason to do anything... 182 * sNO -> sIV Too late and no reason to do anything...
182 * sSS -> sIV Client migth not send FIN in this state: 183 * sSS -> sIV Client migth not send FIN in this state:
183 * we enforce waiting for a SYN/ACK reply first. 184 * we enforce waiting for a SYN/ACK reply first.
184 * sS2 -> sIV 185 * sS2 -> sIV
185 * sSR -> sFW Close started. 186 * sSR -> sFW Close started.
186 * sES -> sFW 187 * sES -> sFW
187 * sFW -> sLA FIN seen in both directions, waiting for 188 * sFW -> sLA FIN seen in both directions, waiting for
188 * the last ACK. 189 * the last ACK.
189 * Migth be a retransmitted FIN as well... 190 * Migth be a retransmitted FIN as well...
190 * sCW -> sLA 191 * sCW -> sLA
191 * sLA -> sLA Retransmitted FIN. Remain in the same state. 192 * sLA -> sLA Retransmitted FIN. Remain in the same state.
192 * sTW -> sTW 193 * sTW -> sTW
193 * sCL -> sCL 194 * sCL -> sCL
194 */ 195 */
195 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 196 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
196 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, 197 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197 /* 198 /*
198 * sNO -> sES Assumed. 199 * sNO -> sES Assumed.
199 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. 200 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
200 * sS2 -> sIV 201 * sS2 -> sIV
201 * sSR -> sES Established state is reached. 202 * sSR -> sES Established state is reached.
202 * sES -> sES :-) 203 * sES -> sES :-)
203 * sFW -> sCW Normal close request answered by ACK. 204 * sFW -> sCW Normal close request answered by ACK.
204 * sCW -> sCW 205 * sCW -> sCW
205 * sLA -> sTW Last ACK detected (RFC5961 challenged) 206 * sLA -> sTW Last ACK detected (RFC5961 challenged)
206 * sTW -> sTW Retransmitted last ACK. Remain in the same state. 207 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
207 * sCL -> sCL 208 * sCL -> sCL
208 */ 209 */
209 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 210 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
210 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, 211 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 212 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212 }, 213 },
213 { 214 {
214 /* REPLY */ 215 /* REPLY */
215 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 216 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
216 /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, 217 /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217 /* 218 /*
218 * sNO -> sIV Never reached. 219 * sNO -> sIV Never reached.
219 * sSS -> sS2 Simultaneous open 220 * sSS -> sS2 Simultaneous open
220 * sS2 -> sS2 Retransmitted simultaneous SYN 221 * sS2 -> sS2 Retransmitted simultaneous SYN
221 * sSR -> sIV Invalid SYN packets sent by the server 222 * sSR -> sIV Invalid SYN packets sent by the server
222 * sES -> sIV 223 * sES -> sIV
223 * sFW -> sIV 224 * sFW -> sIV
224 * sCW -> sIV 225 * sCW -> sIV
225 * sLA -> sIV 226 * sLA -> sIV
226 * sTW -> sSS Reopened connection, but server may have switched role 227 * sTW -> sSS Reopened connection, but server may have switched role
227 * sCL -> sIV 228 * sCL -> sIV
228 */ 229 */
229 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 230 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
230 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, 231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231 /* 232 /*
232 * sSS -> sSR Standard open. 233 * sSS -> sSR Standard open.
233 * sS2 -> sSR Simultaneous open 234 * sS2 -> sSR Simultaneous open
234 * sSR -> sIG Retransmitted SYN/ACK, ignore it. 235 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
235 * sES -> sIG Late retransmitted SYN/ACK? 236 * sES -> sIG Late retransmitted SYN/ACK?
236 * sFW -> sIG Might be SYN/ACK answering ignored SYN 237 * sFW -> sIG Might be SYN/ACK answering ignored SYN
237 * sCW -> sIG 238 * sCW -> sIG
238 * sLA -> sIG 239 * sLA -> sIG
239 * sTW -> sIG 240 * sTW -> sIG
240 * sCL -> sIG 241 * sCL -> sIG
241 */ 242 */
242 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 243 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
243 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, 244 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244 /* 245 /*
245 * sSS -> sIV Server might not send FIN in this state. 246 * sSS -> sIV Server might not send FIN in this state.
246 * sS2 -> sIV 247 * sS2 -> sIV
247 * sSR -> sFW Close started. 248 * sSR -> sFW Close started.
248 * sES -> sFW 249 * sES -> sFW
249 * sFW -> sLA FIN seen in both directions. 250 * sFW -> sLA FIN seen in both directions.
250 * sCW -> sLA 251 * sCW -> sLA
251 * sLA -> sLA Retransmitted FIN. 252 * sLA -> sLA Retransmitted FIN.
252 * sTW -> sTW 253 * sTW -> sTW
253 * sCL -> sCL 254 * sCL -> sCL
254 */ 255 */
255 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 256 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
256 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, 257 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257 /* 258 /*
258 * sSS -> sIG Might be a half-open connection. 259 * sSS -> sIG Might be a half-open connection.
259 * sS2 -> sIG 260 * sS2 -> sIG
260 * sSR -> sSR Might answer late resent SYN. 261 * sSR -> sSR Might answer late resent SYN.
261 * sES -> sES :-) 262 * sES -> sES :-)
262 * sFW -> sCW Normal close request answered by ACK. 263 * sFW -> sCW Normal close request answered by ACK.
263 * sCW -> sCW 264 * sCW -> sCW
264 * sLA -> sTW Last ACK detected (RFC5961 challenged) 265 * sLA -> sTW Last ACK detected (RFC5961 challenged)
265 * sTW -> sTW Retransmitted last ACK. 266 * sTW -> sTW Retransmitted last ACK.
266 * sCL -> sCL 267 * sCL -> sCL
267 */ 268 */
268 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ 269 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
269 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, 270 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } 271 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271 } 272 }
272 }; 273 };
273 274
274 static inline struct nf_tcp_net *tcp_pernet(struct net *net) 275 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275 { 276 {
276 return &net->ct.nf_ct_proto.tcp; 277 return &net->ct.nf_ct_proto.tcp;
277 } 278 }
278 279
279 #ifdef CONFIG_NF_CONNTRACK_PROCFS 280 #ifdef CONFIG_NF_CONNTRACK_PROCFS
280 /* Print out the private part of the conntrack. */ 281 /* Print out the private part of the conntrack. */
281 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) 282 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
282 { 283 {
283 if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) 284 if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
284 return; 285 return;
285 286
286 seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); 287 seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
287 } 288 }
288 #endif 289 #endif
289 290
290 static unsigned int get_conntrack_index(const struct tcphdr *tcph) 291 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
291 { 292 {
292 if (tcph->rst) return TCP_RST_SET; 293 if (tcph->rst) return TCP_RST_SET;
293 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); 294 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
294 else if (tcph->fin) return TCP_FIN_SET; 295 else if (tcph->fin) return TCP_FIN_SET;
295 else if (tcph->ack) return TCP_ACK_SET; 296 else if (tcph->ack) return TCP_ACK_SET;
296 else return TCP_NONE_SET; 297 else return TCP_NONE_SET;
297 } 298 }
298 299
299 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering 300 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
300 in IP Filter' by Guido van Rooij. 301 in IP Filter' by Guido van Rooij.
301 302
302 http://www.sane.nl/events/sane2000/papers.html 303 http://www.sane.nl/events/sane2000/papers.html
303 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/ 304 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
304 305
305 The boundaries and the conditions are changed according to RFC793: 306 The boundaries and the conditions are changed according to RFC793:
306 the packet must intersect the window (i.e. segments may be 307 the packet must intersect the window (i.e. segments may be
307 after the right or before the left edge) and thus receivers may ACK 308 after the right or before the left edge) and thus receivers may ACK
308 segments after the right edge of the window. 309 segments after the right edge of the window.
309 310
310 td_maxend = max(sack + max(win,1)) seen in reply packets 311 td_maxend = max(sack + max(win,1)) seen in reply packets
311 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets 312 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
312 td_maxwin += seq + len - sender.td_maxend 313 td_maxwin += seq + len - sender.td_maxend
313 if seq + len > sender.td_maxend 314 if seq + len > sender.td_maxend
314 td_end = max(seq + len) seen in sent packets 315 td_end = max(seq + len) seen in sent packets
315 316
316 I. Upper bound for valid data: seq <= sender.td_maxend 317 I. Upper bound for valid data: seq <= sender.td_maxend
317 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin 318 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
318 III. Upper bound for valid (s)ack: sack <= receiver.td_end 319 III. Upper bound for valid (s)ack: sack <= receiver.td_end
319 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW 320 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
320 321
321 where sack is the highest right edge of sack block found in the packet 322 where sack is the highest right edge of sack block found in the packet
322 or ack in the case of packet without SACK option. 323 or ack in the case of packet without SACK option.
323 324
324 The upper bound limit for a valid (s)ack is not ignored - 325 The upper bound limit for a valid (s)ack is not ignored -
325 we doesn't have to deal with fragments. 326 we doesn't have to deal with fragments.
326 */ 327 */
327 328
328 static inline __u32 segment_seq_plus_len(__u32 seq, 329 static inline __u32 segment_seq_plus_len(__u32 seq,
329 size_t len, 330 size_t len,
330 unsigned int dataoff, 331 unsigned int dataoff,
331 const struct tcphdr *tcph) 332 const struct tcphdr *tcph)
332 { 333 {
333 /* XXX Should I use payload length field in IP/IPv6 header ? 334 /* XXX Should I use payload length field in IP/IPv6 header ?
334 * - YK */ 335 * - YK */
335 return (seq + len - dataoff - tcph->doff*4 336 return (seq + len - dataoff - tcph->doff*4
336 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); 337 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
337 } 338 }
338 339
339 /* Fixme: what about big packets? */ 340 /* Fixme: what about big packets? */
340 #define MAXACKWINCONST 66000 341 #define MAXACKWINCONST 66000
341 #define MAXACKWINDOW(sender) \ 342 #define MAXACKWINDOW(sender) \
342 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ 343 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
343 : MAXACKWINCONST) 344 : MAXACKWINCONST)
344 345
345 /* 346 /*
346 * Simplified tcp_parse_options routine from tcp_input.c 347 * Simplified tcp_parse_options routine from tcp_input.c
347 */ 348 */
348 static void tcp_options(const struct sk_buff *skb, 349 static void tcp_options(const struct sk_buff *skb,
349 unsigned int dataoff, 350 unsigned int dataoff,
350 const struct tcphdr *tcph, 351 const struct tcphdr *tcph,
351 struct ip_ct_tcp_state *state) 352 struct ip_ct_tcp_state *state)
352 { 353 {
353 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 354 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
354 const unsigned char *ptr; 355 const unsigned char *ptr;
355 int length = (tcph->doff*4) - sizeof(struct tcphdr); 356 int length = (tcph->doff*4) - sizeof(struct tcphdr);
356 357
357 if (!length) 358 if (!length)
358 return; 359 return;
359 360
360 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 361 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
361 length, buff); 362 length, buff);
362 BUG_ON(ptr == NULL); 363 BUG_ON(ptr == NULL);
363 364
364 state->td_scale = 365 state->td_scale =
365 state->flags = 0; 366 state->flags = 0;
366 367
367 while (length > 0) { 368 while (length > 0) {
368 int opcode=*ptr++; 369 int opcode=*ptr++;
369 int opsize; 370 int opsize;
370 371
371 switch (opcode) { 372 switch (opcode) {
372 case TCPOPT_EOL: 373 case TCPOPT_EOL:
373 return; 374 return;
374 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 375 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
375 length--; 376 length--;
376 continue; 377 continue;
377 default: 378 default:
378 if (length < 2) 379 if (length < 2)
379 return; 380 return;
380 opsize=*ptr++; 381 opsize=*ptr++;
381 if (opsize < 2) /* "silly options" */ 382 if (opsize < 2) /* "silly options" */
382 return; 383 return;
383 if (opsize > length) 384 if (opsize > length)
384 return; /* don't parse partial options */ 385 return; /* don't parse partial options */
385 386
386 if (opcode == TCPOPT_SACK_PERM 387 if (opcode == TCPOPT_SACK_PERM
387 && opsize == TCPOLEN_SACK_PERM) 388 && opsize == TCPOLEN_SACK_PERM)
388 state->flags |= IP_CT_TCP_FLAG_SACK_PERM; 389 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
389 else if (opcode == TCPOPT_WINDOW 390 else if (opcode == TCPOPT_WINDOW
390 && opsize == TCPOLEN_WINDOW) { 391 && opsize == TCPOLEN_WINDOW) {
391 state->td_scale = *(u_int8_t *)ptr; 392 state->td_scale = *(u_int8_t *)ptr;
392 393
393 if (state->td_scale > TCP_MAX_WSCALE) 394 if (state->td_scale > TCP_MAX_WSCALE)
394 state->td_scale = TCP_MAX_WSCALE; 395 state->td_scale = TCP_MAX_WSCALE;
395 396
396 state->flags |= 397 state->flags |=
397 IP_CT_TCP_FLAG_WINDOW_SCALE; 398 IP_CT_TCP_FLAG_WINDOW_SCALE;
398 } 399 }
399 ptr += opsize - 2; 400 ptr += opsize - 2;
400 length -= opsize; 401 length -= opsize;
401 } 402 }
402 } 403 }
403 } 404 }
404 405
405 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, 406 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
406 const struct tcphdr *tcph, __u32 *sack) 407 const struct tcphdr *tcph, __u32 *sack)
407 { 408 {
408 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; 409 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
409 const unsigned char *ptr; 410 const unsigned char *ptr;
410 int length = (tcph->doff*4) - sizeof(struct tcphdr); 411 int length = (tcph->doff*4) - sizeof(struct tcphdr);
411 __u32 tmp; 412 __u32 tmp;
412 413
413 if (!length) 414 if (!length)
414 return; 415 return;
415 416
416 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), 417 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
417 length, buff); 418 length, buff);
418 BUG_ON(ptr == NULL); 419 BUG_ON(ptr == NULL);
419 420
420 /* Fast path for timestamp-only option */ 421 /* Fast path for timestamp-only option */
421 if (length == TCPOLEN_TSTAMP_ALIGNED 422 if (length == TCPOLEN_TSTAMP_ALIGNED
422 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) 423 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
423 | (TCPOPT_NOP << 16) 424 | (TCPOPT_NOP << 16)
424 | (TCPOPT_TIMESTAMP << 8) 425 | (TCPOPT_TIMESTAMP << 8)
425 | TCPOLEN_TIMESTAMP)) 426 | TCPOLEN_TIMESTAMP))
426 return; 427 return;
427 428
428 while (length > 0) { 429 while (length > 0) {
429 int opcode = *ptr++; 430 int opcode = *ptr++;
430 int opsize, i; 431 int opsize, i;
431 432
432 switch (opcode) { 433 switch (opcode) {
433 case TCPOPT_EOL: 434 case TCPOPT_EOL:
434 return; 435 return;
435 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 436 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
436 length--; 437 length--;
437 continue; 438 continue;
438 default: 439 default:
439 if (length < 2) 440 if (length < 2)
440 return; 441 return;
441 opsize = *ptr++; 442 opsize = *ptr++;
442 if (opsize < 2) /* "silly options" */ 443 if (opsize < 2) /* "silly options" */
443 return; 444 return;
444 if (opsize > length) 445 if (opsize > length)
445 return; /* don't parse partial options */ 446 return; /* don't parse partial options */
446 447
447 if (opcode == TCPOPT_SACK 448 if (opcode == TCPOPT_SACK
448 && opsize >= (TCPOLEN_SACK_BASE 449 && opsize >= (TCPOLEN_SACK_BASE
449 + TCPOLEN_SACK_PERBLOCK) 450 + TCPOLEN_SACK_PERBLOCK)
450 && !((opsize - TCPOLEN_SACK_BASE) 451 && !((opsize - TCPOLEN_SACK_BASE)
451 % TCPOLEN_SACK_PERBLOCK)) { 452 % TCPOLEN_SACK_PERBLOCK)) {
452 for (i = 0; 453 for (i = 0;
453 i < (opsize - TCPOLEN_SACK_BASE); 454 i < (opsize - TCPOLEN_SACK_BASE);
454 i += TCPOLEN_SACK_PERBLOCK) { 455 i += TCPOLEN_SACK_PERBLOCK) {
455 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); 456 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
456 457
457 if (after(tmp, *sack)) 458 if (after(tmp, *sack))
458 *sack = tmp; 459 *sack = tmp;
459 } 460 }
460 return; 461 return;
461 } 462 }
462 ptr += opsize - 2; 463 ptr += opsize - 2;
463 length -= opsize; 464 length -= opsize;
464 } 465 }
465 } 466 }
466 } 467 }
467 468
468 static bool tcp_in_window(const struct nf_conn *ct, 469 static bool tcp_in_window(const struct nf_conn *ct,
469 struct ip_ct_tcp *state, 470 struct ip_ct_tcp *state,
470 enum ip_conntrack_dir dir, 471 enum ip_conntrack_dir dir,
471 unsigned int index, 472 unsigned int index,
472 const struct sk_buff *skb, 473 const struct sk_buff *skb,
473 unsigned int dataoff, 474 unsigned int dataoff,
474 const struct tcphdr *tcph) 475 const struct tcphdr *tcph)
475 { 476 {
476 struct net *net = nf_ct_net(ct); 477 struct net *net = nf_ct_net(ct);
477 struct nf_tcp_net *tn = tcp_pernet(net); 478 struct nf_tcp_net *tn = tcp_pernet(net);
478 struct ip_ct_tcp_state *sender = &state->seen[dir]; 479 struct ip_ct_tcp_state *sender = &state->seen[dir];
479 struct ip_ct_tcp_state *receiver = &state->seen[!dir]; 480 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
480 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; 481 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
481 __u32 seq, ack, sack, end, win, swin; 482 __u32 seq, ack, sack, end, win, swin;
482 s32 receiver_offset; 483 s32 receiver_offset;
483 bool res, in_recv_win; 484 bool res, in_recv_win;
484 485
485 /* 486 /*
486 * Get the required data from the packet. 487 * Get the required data from the packet.
487 */ 488 */
488 seq = ntohl(tcph->seq); 489 seq = ntohl(tcph->seq);
489 ack = sack = ntohl(tcph->ack_seq); 490 ack = sack = ntohl(tcph->ack_seq);
490 win = ntohs(tcph->window); 491 win = ntohs(tcph->window);
491 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); 492 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
492 493
493 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) 494 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
494 tcp_sack(skb, dataoff, tcph, &sack); 495 tcp_sack(skb, dataoff, tcph, &sack);
495 496
496 /* Take into account NAT sequence number mangling */ 497 /* Take into account NAT sequence number mangling */
497 receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); 498 receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
498 ack -= receiver_offset; 499 ack -= receiver_offset;
499 sack -= receiver_offset; 500 sack -= receiver_offset;
500 501
501 pr_debug("tcp_in_window: START\n"); 502 pr_debug("tcp_in_window: START\n");
502 pr_debug("tcp_in_window: "); 503 pr_debug("tcp_in_window: ");
503 nf_ct_dump_tuple(tuple); 504 nf_ct_dump_tuple(tuple);
504 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", 505 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
505 seq, ack, receiver_offset, sack, receiver_offset, win, end); 506 seq, ack, receiver_offset, sack, receiver_offset, win, end);
506 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 507 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
507 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 508 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
508 sender->td_end, sender->td_maxend, sender->td_maxwin, 509 sender->td_end, sender->td_maxend, sender->td_maxwin,
509 sender->td_scale, 510 sender->td_scale,
510 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 511 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
511 receiver->td_scale); 512 receiver->td_scale);
512 513
513 if (sender->td_maxwin == 0) { 514 if (sender->td_maxwin == 0) {
514 /* 515 /*
515 * Initialize sender data. 516 * Initialize sender data.
516 */ 517 */
517 if (tcph->syn) { 518 if (tcph->syn) {
518 /* 519 /*
519 * SYN-ACK in reply to a SYN 520 * SYN-ACK in reply to a SYN
520 * or SYN from reply direction in simultaneous open. 521 * or SYN from reply direction in simultaneous open.
521 */ 522 */
522 sender->td_end = 523 sender->td_end =
523 sender->td_maxend = end; 524 sender->td_maxend = end;
524 sender->td_maxwin = (win == 0 ? 1 : win); 525 sender->td_maxwin = (win == 0 ? 1 : win);
525 526
526 tcp_options(skb, dataoff, tcph, sender); 527 tcp_options(skb, dataoff, tcph, sender);
527 /* 528 /*
528 * RFC 1323: 529 * RFC 1323:
529 * Both sides must send the Window Scale option 530 * Both sides must send the Window Scale option
530 * to enable window scaling in either direction. 531 * to enable window scaling in either direction.
531 */ 532 */
532 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE 533 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
533 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) 534 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
534 sender->td_scale = 535 sender->td_scale =
535 receiver->td_scale = 0; 536 receiver->td_scale = 0;
536 if (!tcph->ack) 537 if (!tcph->ack)
537 /* Simultaneous open */ 538 /* Simultaneous open */
538 return true; 539 return true;
539 } else { 540 } else {
540 /* 541 /*
541 * We are in the middle of a connection, 542 * We are in the middle of a connection,
542 * its history is lost for us. 543 * its history is lost for us.
543 * Let's try to use the data from the packet. 544 * Let's try to use the data from the packet.
544 */ 545 */
545 sender->td_end = end; 546 sender->td_end = end;
546 swin = win << sender->td_scale; 547 swin = win << sender->td_scale;
547 sender->td_maxwin = (swin == 0 ? 1 : swin); 548 sender->td_maxwin = (swin == 0 ? 1 : swin);
548 sender->td_maxend = end + sender->td_maxwin; 549 sender->td_maxend = end + sender->td_maxwin;
549 /* 550 /*
550 * We haven't seen traffic in the other direction yet 551 * We haven't seen traffic in the other direction yet
551 * but we have to tweak window tracking to pass III 552 * but we have to tweak window tracking to pass III
552 * and IV until that happens. 553 * and IV until that happens.
553 */ 554 */
554 if (receiver->td_maxwin == 0) 555 if (receiver->td_maxwin == 0)
555 receiver->td_end = receiver->td_maxend = sack; 556 receiver->td_end = receiver->td_maxend = sack;
556 } 557 }
557 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 558 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
558 && dir == IP_CT_DIR_ORIGINAL) 559 && dir == IP_CT_DIR_ORIGINAL)
559 || (state->state == TCP_CONNTRACK_SYN_RECV 560 || (state->state == TCP_CONNTRACK_SYN_RECV
560 && dir == IP_CT_DIR_REPLY)) 561 && dir == IP_CT_DIR_REPLY))
561 && after(end, sender->td_end)) { 562 && after(end, sender->td_end)) {
562 /* 563 /*
563 * RFC 793: "if a TCP is reinitialized ... then it need 564 * RFC 793: "if a TCP is reinitialized ... then it need
564 * not wait at all; it must only be sure to use sequence 565 * not wait at all; it must only be sure to use sequence
565 * numbers larger than those recently used." 566 * numbers larger than those recently used."
566 */ 567 */
567 sender->td_end = 568 sender->td_end =
568 sender->td_maxend = end; 569 sender->td_maxend = end;
569 sender->td_maxwin = (win == 0 ? 1 : win); 570 sender->td_maxwin = (win == 0 ? 1 : win);
570 571
571 tcp_options(skb, dataoff, tcph, sender); 572 tcp_options(skb, dataoff, tcph, sender);
572 } 573 }
573 574
574 if (!(tcph->ack)) { 575 if (!(tcph->ack)) {
575 /* 576 /*
576 * If there is no ACK, just pretend it was set and OK. 577 * If there is no ACK, just pretend it was set and OK.
577 */ 578 */
578 ack = sack = receiver->td_end; 579 ack = sack = receiver->td_end;
579 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == 580 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
580 (TCP_FLAG_ACK|TCP_FLAG_RST)) 581 (TCP_FLAG_ACK|TCP_FLAG_RST))
581 && (ack == 0)) { 582 && (ack == 0)) {
582 /* 583 /*
583 * Broken TCP stacks, that set ACK in RST packets as well 584 * Broken TCP stacks, that set ACK in RST packets as well
584 * with zero ack value. 585 * with zero ack value.
585 */ 586 */
586 ack = sack = receiver->td_end; 587 ack = sack = receiver->td_end;
587 } 588 }
588 589
589 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) 590 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
590 /* 591 /*
591 * RST sent answering SYN. 592 * RST sent answering SYN.
592 */ 593 */
593 seq = end = sender->td_end; 594 seq = end = sender->td_end;
594 595
595 pr_debug("tcp_in_window: "); 596 pr_debug("tcp_in_window: ");
596 nf_ct_dump_tuple(tuple); 597 nf_ct_dump_tuple(tuple);
597 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", 598 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
598 seq, ack, receiver_offset, sack, receiver_offset, win, end); 599 seq, ack, receiver_offset, sack, receiver_offset, win, end);
599 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " 600 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
600 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 601 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
601 sender->td_end, sender->td_maxend, sender->td_maxwin, 602 sender->td_end, sender->td_maxend, sender->td_maxwin,
602 sender->td_scale, 603 sender->td_scale,
603 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 604 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
604 receiver->td_scale); 605 receiver->td_scale);
605 606
606 /* Is the ending sequence in the receive window (if available)? */ 607 /* Is the ending sequence in the receive window (if available)? */
607 in_recv_win = !receiver->td_maxwin || 608 in_recv_win = !receiver->td_maxwin ||
608 after(end, sender->td_end - receiver->td_maxwin - 1); 609 after(end, sender->td_end - receiver->td_maxwin - 1);
609 610
610 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", 611 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
611 before(seq, sender->td_maxend + 1), 612 before(seq, sender->td_maxend + 1),
612 (in_recv_win ? 1 : 0), 613 (in_recv_win ? 1 : 0),
613 before(sack, receiver->td_end + 1), 614 before(sack, receiver->td_end + 1),
614 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); 615 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
615 616
616 if (before(seq, sender->td_maxend + 1) && 617 if (before(seq, sender->td_maxend + 1) &&
617 in_recv_win && 618 in_recv_win &&
618 before(sack, receiver->td_end + 1) && 619 before(sack, receiver->td_end + 1) &&
619 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { 620 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
620 /* 621 /*
621 * Take into account window scaling (RFC 1323). 622 * Take into account window scaling (RFC 1323).
622 */ 623 */
623 if (!tcph->syn) 624 if (!tcph->syn)
624 win <<= sender->td_scale; 625 win <<= sender->td_scale;
625 626
626 /* 627 /*
627 * Update sender data. 628 * Update sender data.
628 */ 629 */
629 swin = win + (sack - ack); 630 swin = win + (sack - ack);
630 if (sender->td_maxwin < swin) 631 if (sender->td_maxwin < swin)
631 sender->td_maxwin = swin; 632 sender->td_maxwin = swin;
632 if (after(end, sender->td_end)) { 633 if (after(end, sender->td_end)) {
633 sender->td_end = end; 634 sender->td_end = end;
634 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 635 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
635 } 636 }
636 if (tcph->ack) { 637 if (tcph->ack) {
637 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { 638 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
638 sender->td_maxack = ack; 639 sender->td_maxack = ack;
639 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; 640 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
640 } else if (after(ack, sender->td_maxack)) 641 } else if (after(ack, sender->td_maxack))
641 sender->td_maxack = ack; 642 sender->td_maxack = ack;
642 } 643 }
643 644
644 /* 645 /*
645 * Update receiver data. 646 * Update receiver data.
646 */ 647 */
647 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) 648 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
648 receiver->td_maxwin += end - sender->td_maxend; 649 receiver->td_maxwin += end - sender->td_maxend;
649 if (after(sack + win, receiver->td_maxend - 1)) { 650 if (after(sack + win, receiver->td_maxend - 1)) {
650 receiver->td_maxend = sack + win; 651 receiver->td_maxend = sack + win;
651 if (win == 0) 652 if (win == 0)
652 receiver->td_maxend++; 653 receiver->td_maxend++;
653 } 654 }
654 if (ack == receiver->td_end) 655 if (ack == receiver->td_end)
655 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; 656 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
656 657
657 /* 658 /*
658 * Check retransmissions. 659 * Check retransmissions.
659 */ 660 */
660 if (index == TCP_ACK_SET) { 661 if (index == TCP_ACK_SET) {
661 if (state->last_dir == dir 662 if (state->last_dir == dir
662 && state->last_seq == seq 663 && state->last_seq == seq
663 && state->last_ack == ack 664 && state->last_ack == ack
664 && state->last_end == end 665 && state->last_end == end
665 && state->last_win == win) 666 && state->last_win == win)
666 state->retrans++; 667 state->retrans++;
667 else { 668 else {
668 state->last_dir = dir; 669 state->last_dir = dir;
669 state->last_seq = seq; 670 state->last_seq = seq;
670 state->last_ack = ack; 671 state->last_ack = ack;
671 state->last_end = end; 672 state->last_end = end;
672 state->last_win = win; 673 state->last_win = win;
673 state->retrans = 0; 674 state->retrans = 0;
674 } 675 }
675 } 676 }
676 res = true; 677 res = true;
677 } else { 678 } else {
678 res = false; 679 res = false;
679 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || 680 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
680 tn->tcp_be_liberal) 681 tn->tcp_be_liberal)
681 res = true; 682 res = true;
682 if (!res) { 683 if (!res) {
683 nf_ct_l4proto_log_invalid(skb, ct, 684 nf_ct_l4proto_log_invalid(skb, ct,
684 "%s", 685 "%s",
685 before(seq, sender->td_maxend + 1) ? 686 before(seq, sender->td_maxend + 1) ?
686 in_recv_win ? 687 in_recv_win ?
687 before(sack, receiver->td_end + 1) ? 688 before(sack, receiver->td_end + 1) ?
688 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" 689 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
689 : "ACK is under the lower bound (possible overly delayed ACK)" 690 : "ACK is under the lower bound (possible overly delayed ACK)"
690 : "ACK is over the upper bound (ACKed data not seen yet)" 691 : "ACK is over the upper bound (ACKed data not seen yet)"
691 : "SEQ is under the lower bound (already ACKed data retransmitted)" 692 : "SEQ is under the lower bound (already ACKed data retransmitted)"
692 : "SEQ is over the upper bound (over the window of the receiver)"); 693 : "SEQ is over the upper bound (over the window of the receiver)");
693 } 694 }
694 } 695 }
695 696
696 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " 697 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
697 "receiver end=%u maxend=%u maxwin=%u\n", 698 "receiver end=%u maxend=%u maxwin=%u\n",
698 res, sender->td_end, sender->td_maxend, sender->td_maxwin, 699 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
699 receiver->td_end, receiver->td_maxend, receiver->td_maxwin); 700 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
700 701
701 return res; 702 return res;
702 } 703 }
703 704
704 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 705 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
705 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| 706 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
706 TCPHDR_URG) + 1] = 707 TCPHDR_URG) + 1] =
707 { 708 {
708 [TCPHDR_SYN] = 1, 709 [TCPHDR_SYN] = 1,
709 [TCPHDR_SYN|TCPHDR_URG] = 1, 710 [TCPHDR_SYN|TCPHDR_URG] = 1,
710 [TCPHDR_SYN|TCPHDR_ACK] = 1, 711 [TCPHDR_SYN|TCPHDR_ACK] = 1,
711 [TCPHDR_RST] = 1, 712 [TCPHDR_RST] = 1,
712 [TCPHDR_RST|TCPHDR_ACK] = 1, 713 [TCPHDR_RST|TCPHDR_ACK] = 1,
713 [TCPHDR_FIN|TCPHDR_ACK] = 1, 714 [TCPHDR_FIN|TCPHDR_ACK] = 1,
714 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, 715 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
715 [TCPHDR_ACK] = 1, 716 [TCPHDR_ACK] = 1,
716 [TCPHDR_ACK|TCPHDR_URG] = 1, 717 [TCPHDR_ACK|TCPHDR_URG] = 1,
717 }; 718 };
718 719
719 static void tcp_error_log(const struct sk_buff *skb, struct net *net, 720 static void tcp_error_log(const struct sk_buff *skb, struct net *net,
720 u8 pf, const char *msg) 721 u8 pf, const char *msg)
721 { 722 {
722 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); 723 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
723 } 724 }
724 725
725 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 726 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
726 static int tcp_error(struct net *net, struct nf_conn *tmpl, 727 static int tcp_error(struct net *net, struct nf_conn *tmpl,
727 struct sk_buff *skb, 728 struct sk_buff *skb,
728 unsigned int dataoff, 729 unsigned int dataoff,
729 u_int8_t pf, 730 u_int8_t pf,
730 unsigned int hooknum) 731 unsigned int hooknum)
731 { 732 {
732 const struct tcphdr *th; 733 const struct tcphdr *th;
733 struct tcphdr _tcph; 734 struct tcphdr _tcph;
734 unsigned int tcplen = skb->len - dataoff; 735 unsigned int tcplen = skb->len - dataoff;
735 u_int8_t tcpflags; 736 u_int8_t tcpflags;
736 737
737 /* Smaller that minimal TCP header? */ 738 /* Smaller that minimal TCP header? */
738 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 739 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
739 if (th == NULL) { 740 if (th == NULL) {
740 tcp_error_log(skb, net, pf, "short packet"); 741 tcp_error_log(skb, net, pf, "short packet");
741 return -NF_ACCEPT; 742 return -NF_ACCEPT;
742 } 743 }
743 744
744 /* Not whole TCP header or malformed packet */ 745 /* Not whole TCP header or malformed packet */
745 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 746 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
746 tcp_error_log(skb, net, pf, "truncated packet"); 747 tcp_error_log(skb, net, pf, "truncated packet");
747 return -NF_ACCEPT; 748 return -NF_ACCEPT;
748 } 749 }
749 750
750 /* Checksum invalid? Ignore. 751 /* Checksum invalid? Ignore.
751 * We skip checking packets on the outgoing path 752 * We skip checking packets on the outgoing path
752 * because the checksum is assumed to be correct. 753 * because the checksum is assumed to be correct.
753 */ 754 */
754 /* FIXME: Source route IP option packets --RR */ 755 /* FIXME: Source route IP option packets --RR */
755 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 756 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
756 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 757 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
757 tcp_error_log(skb, net, pf, "bad checksum"); 758 tcp_error_log(skb, net, pf, "bad checksum");
758 return -NF_ACCEPT; 759 return -NF_ACCEPT;
759 } 760 }
760 761
761 /* Check TCP flags. */ 762 /* Check TCP flags. */
762 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 763 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
763 if (!tcp_valid_flags[tcpflags]) { 764 if (!tcp_valid_flags[tcpflags]) {
764 tcp_error_log(skb, net, pf, "invalid tcp flag combination"); 765 tcp_error_log(skb, net, pf, "invalid tcp flag combination");
765 return -NF_ACCEPT; 766 return -NF_ACCEPT;
766 } 767 }
767 768
768 return NF_ACCEPT; 769 return NF_ACCEPT;
769 } 770 }
770 771
771 static unsigned int *tcp_get_timeouts(struct net *net)
772 {
773 return tcp_pernet(net)->timeouts;
774 }
775
776 /* Returns verdict for packet, or -1 for invalid. */ 772 /* Returns verdict for packet, or -1 for invalid. */
777 static int tcp_packet(struct nf_conn *ct, 773 static int tcp_packet(struct nf_conn *ct,
778 const struct sk_buff *skb, 774 const struct sk_buff *skb,
779 unsigned int dataoff, 775 unsigned int dataoff,
780 enum ip_conntrack_info ctinfo, 776 enum ip_conntrack_info ctinfo)
781 unsigned int *timeouts)
782 { 777 {
783 struct net *net = nf_ct_net(ct); 778 struct net *net = nf_ct_net(ct);
784 struct nf_tcp_net *tn = tcp_pernet(net); 779 struct nf_tcp_net *tn = tcp_pernet(net);
785 struct nf_conntrack_tuple *tuple; 780 struct nf_conntrack_tuple *tuple;
786 enum tcp_conntrack new_state, old_state; 781 enum tcp_conntrack new_state, old_state;
782 unsigned int index, *timeouts;
787 enum ip_conntrack_dir dir; 783 enum ip_conntrack_dir dir;
788 const struct tcphdr *th; 784 const struct tcphdr *th;
789 struct tcphdr _tcph; 785 struct tcphdr _tcph;
790 unsigned long timeout; 786 unsigned long timeout;
791 unsigned int index;
792 787
793 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 788 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
794 BUG_ON(th == NULL); 789 BUG_ON(th == NULL);
795 790
796 spin_lock_bh(&ct->lock); 791 spin_lock_bh(&ct->lock);
797 old_state = ct->proto.tcp.state; 792 old_state = ct->proto.tcp.state;
798 dir = CTINFO2DIR(ctinfo); 793 dir = CTINFO2DIR(ctinfo);
799 index = get_conntrack_index(th); 794 index = get_conntrack_index(th);
800 new_state = tcp_conntracks[dir][index][old_state]; 795 new_state = tcp_conntracks[dir][index][old_state];
801 tuple = &ct->tuplehash[dir].tuple; 796 tuple = &ct->tuplehash[dir].tuple;
802 797
803 switch (new_state) { 798 switch (new_state) {
804 case TCP_CONNTRACK_SYN_SENT: 799 case TCP_CONNTRACK_SYN_SENT:
805 if (old_state < TCP_CONNTRACK_TIME_WAIT) 800 if (old_state < TCP_CONNTRACK_TIME_WAIT)
806 break; 801 break;
807 /* RFC 1122: "When a connection is closed actively, 802 /* RFC 1122: "When a connection is closed actively,
808 * it MUST linger in TIME-WAIT state for a time 2xMSL 803 * it MUST linger in TIME-WAIT state for a time 2xMSL
809 * (Maximum Segment Lifetime). However, it MAY accept 804 * (Maximum Segment Lifetime). However, it MAY accept
810 * a new SYN from the remote TCP to reopen the connection 805 * a new SYN from the remote TCP to reopen the connection
811 * directly from TIME-WAIT state, if..." 806 * directly from TIME-WAIT state, if..."
812 * We ignore the conditions because we are in the 807 * We ignore the conditions because we are in the
813 * TIME-WAIT state anyway. 808 * TIME-WAIT state anyway.
814 * 809 *
815 * Handle aborted connections: we and the server 810 * Handle aborted connections: we and the server
816 * think there is an existing connection but the client 811 * think there is an existing connection but the client
817 * aborts it and starts a new one. 812 * aborts it and starts a new one.
818 */ 813 */
819 if (((ct->proto.tcp.seen[dir].flags 814 if (((ct->proto.tcp.seen[dir].flags
820 | ct->proto.tcp.seen[!dir].flags) 815 | ct->proto.tcp.seen[!dir].flags)
821 & IP_CT_TCP_FLAG_CLOSE_INIT) 816 & IP_CT_TCP_FLAG_CLOSE_INIT)
822 || (ct->proto.tcp.last_dir == dir 817 || (ct->proto.tcp.last_dir == dir
823 && ct->proto.tcp.last_index == TCP_RST_SET)) { 818 && ct->proto.tcp.last_index == TCP_RST_SET)) {
824 /* Attempt to reopen a closed/aborted connection. 819 /* Attempt to reopen a closed/aborted connection.
825 * Delete this connection and look up again. */ 820 * Delete this connection and look up again. */
826 spin_unlock_bh(&ct->lock); 821 spin_unlock_bh(&ct->lock);
827 822
828 /* Only repeat if we can actually remove the timer. 823 /* Only repeat if we can actually remove the timer.
829 * Destruction may already be in progress in process 824 * Destruction may already be in progress in process
830 * context and we must give it a chance to terminate. 825 * context and we must give it a chance to terminate.
831 */ 826 */
832 if (nf_ct_kill(ct)) 827 if (nf_ct_kill(ct))
833 return -NF_REPEAT; 828 return -NF_REPEAT;
834 return NF_DROP; 829 return NF_DROP;
835 } 830 }
836 /* Fall through */ 831 /* Fall through */
837 case TCP_CONNTRACK_IGNORE: 832 case TCP_CONNTRACK_IGNORE:
838 /* Ignored packets: 833 /* Ignored packets:
839 * 834 *
840 * Our connection entry may be out of sync, so ignore 835 * Our connection entry may be out of sync, so ignore
841 * packets which may signal the real connection between 836 * packets which may signal the real connection between
842 * the client and the server. 837 * the client and the server.
843 * 838 *
844 * a) SYN in ORIGINAL 839 * a) SYN in ORIGINAL
845 * b) SYN/ACK in REPLY 840 * b) SYN/ACK in REPLY
846 * c) ACK in reply direction after initial SYN in original. 841 * c) ACK in reply direction after initial SYN in original.
847 * 842 *
848 * If the ignored packet is invalid, the receiver will send 843 * If the ignored packet is invalid, the receiver will send
849 * a RST we'll catch below. 844 * a RST we'll catch below.
850 */ 845 */
851 if (index == TCP_SYNACK_SET 846 if (index == TCP_SYNACK_SET
852 && ct->proto.tcp.last_index == TCP_SYN_SET 847 && ct->proto.tcp.last_index == TCP_SYN_SET
853 && ct->proto.tcp.last_dir != dir 848 && ct->proto.tcp.last_dir != dir
854 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { 849 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
855 /* b) This SYN/ACK acknowledges a SYN that we earlier 850 /* b) This SYN/ACK acknowledges a SYN that we earlier
856 * ignored as invalid. This means that the client and 851 * ignored as invalid. This means that the client and
857 * the server are both in sync, while the firewall is 852 * the server are both in sync, while the firewall is
858 * not. We get in sync from the previously annotated 853 * not. We get in sync from the previously annotated
859 * values. 854 * values.
860 */ 855 */
861 old_state = TCP_CONNTRACK_SYN_SENT; 856 old_state = TCP_CONNTRACK_SYN_SENT;
862 new_state = TCP_CONNTRACK_SYN_RECV; 857 new_state = TCP_CONNTRACK_SYN_RECV;
863 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = 858 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
864 ct->proto.tcp.last_end; 859 ct->proto.tcp.last_end;
865 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = 860 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
866 ct->proto.tcp.last_end; 861 ct->proto.tcp.last_end;
867 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = 862 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
868 ct->proto.tcp.last_win == 0 ? 863 ct->proto.tcp.last_win == 0 ?
869 1 : ct->proto.tcp.last_win; 864 1 : ct->proto.tcp.last_win;
870 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = 865 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
871 ct->proto.tcp.last_wscale; 866 ct->proto.tcp.last_wscale;
872 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; 867 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
873 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = 868 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
874 ct->proto.tcp.last_flags; 869 ct->proto.tcp.last_flags;
875 memset(&ct->proto.tcp.seen[dir], 0, 870 memset(&ct->proto.tcp.seen[dir], 0,
876 sizeof(struct ip_ct_tcp_state)); 871 sizeof(struct ip_ct_tcp_state));
877 break; 872 break;
878 } 873 }
879 ct->proto.tcp.last_index = index; 874 ct->proto.tcp.last_index = index;
880 ct->proto.tcp.last_dir = dir; 875 ct->proto.tcp.last_dir = dir;
881 ct->proto.tcp.last_seq = ntohl(th->seq); 876 ct->proto.tcp.last_seq = ntohl(th->seq);
882 ct->proto.tcp.last_end = 877 ct->proto.tcp.last_end =
883 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); 878 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
884 ct->proto.tcp.last_win = ntohs(th->window); 879 ct->proto.tcp.last_win = ntohs(th->window);
885 880
886 /* a) This is a SYN in ORIGINAL. The client and the server 881 /* a) This is a SYN in ORIGINAL. The client and the server
887 * may be in sync but we are not. In that case, we annotate 882 * may be in sync but we are not. In that case, we annotate
888 * the TCP options and let the packet go through. If it is a 883 * the TCP options and let the packet go through. If it is a
889 * valid SYN packet, the server will reply with a SYN/ACK, and 884 * valid SYN packet, the server will reply with a SYN/ACK, and
890 * then we'll get in sync. Otherwise, the server potentially 885 * then we'll get in sync. Otherwise, the server potentially
891 * responds with a challenge ACK if implementing RFC5961. 886 * responds with a challenge ACK if implementing RFC5961.
892 */ 887 */
893 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { 888 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
894 struct ip_ct_tcp_state seen = {}; 889 struct ip_ct_tcp_state seen = {};
895 890
896 ct->proto.tcp.last_flags = 891 ct->proto.tcp.last_flags =
897 ct->proto.tcp.last_wscale = 0; 892 ct->proto.tcp.last_wscale = 0;
898 tcp_options(skb, dataoff, th, &seen); 893 tcp_options(skb, dataoff, th, &seen);
899 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 894 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
900 ct->proto.tcp.last_flags |= 895 ct->proto.tcp.last_flags |=
901 IP_CT_TCP_FLAG_WINDOW_SCALE; 896 IP_CT_TCP_FLAG_WINDOW_SCALE;
902 ct->proto.tcp.last_wscale = seen.td_scale; 897 ct->proto.tcp.last_wscale = seen.td_scale;
903 } 898 }
904 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { 899 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
905 ct->proto.tcp.last_flags |= 900 ct->proto.tcp.last_flags |=
906 IP_CT_TCP_FLAG_SACK_PERM; 901 IP_CT_TCP_FLAG_SACK_PERM;
907 } 902 }
908 /* Mark the potential for RFC5961 challenge ACK, 903 /* Mark the potential for RFC5961 challenge ACK,
909 * this pose a special problem for LAST_ACK state 904 * this pose a special problem for LAST_ACK state
910 * as ACK is intrepretated as ACKing last FIN. 905 * as ACK is intrepretated as ACKing last FIN.
911 */ 906 */
912 if (old_state == TCP_CONNTRACK_LAST_ACK) 907 if (old_state == TCP_CONNTRACK_LAST_ACK)
913 ct->proto.tcp.last_flags |= 908 ct->proto.tcp.last_flags |=
914 IP_CT_EXP_CHALLENGE_ACK; 909 IP_CT_EXP_CHALLENGE_ACK;
915 } 910 }
916 spin_unlock_bh(&ct->lock); 911 spin_unlock_bh(&ct->lock);
917 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " 912 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
918 "state %s ", tcp_conntrack_names[old_state]); 913 "state %s ", tcp_conntrack_names[old_state]);
919 return NF_ACCEPT; 914 return NF_ACCEPT;
920 case TCP_CONNTRACK_MAX: 915 case TCP_CONNTRACK_MAX:
921 /* Special case for SYN proxy: when the SYN to the server or 916 /* Special case for SYN proxy: when the SYN to the server or
922 * the SYN/ACK from the server is lost, the client may transmit 917 * the SYN/ACK from the server is lost, the client may transmit
923 * a keep-alive packet while in SYN_SENT state. This needs to 918 * a keep-alive packet while in SYN_SENT state. This needs to
924 * be associated with the original conntrack entry in order to 919 * be associated with the original conntrack entry in order to
925 * generate a new SYN with the correct sequence number. 920 * generate a new SYN with the correct sequence number.
926 */ 921 */
927 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT && 922 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
928 index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL && 923 index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
929 ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL && 924 ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
930 ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) { 925 ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
931 pr_debug("nf_ct_tcp: SYN proxy client keep alive\n"); 926 pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
932 spin_unlock_bh(&ct->lock); 927 spin_unlock_bh(&ct->lock);
933 return NF_ACCEPT; 928 return NF_ACCEPT;
934 } 929 }
935 930
936 /* Invalid packet */ 931 /* Invalid packet */
937 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 932 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
938 dir, get_conntrack_index(th), old_state); 933 dir, get_conntrack_index(th), old_state);
939 spin_unlock_bh(&ct->lock); 934 spin_unlock_bh(&ct->lock);
940 nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); 935 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
941 return -NF_ACCEPT; 936 return -NF_ACCEPT;
942 case TCP_CONNTRACK_TIME_WAIT: 937 case TCP_CONNTRACK_TIME_WAIT:
943 /* RFC5961 compliance cause stack to send "challenge-ACK" 938 /* RFC5961 compliance cause stack to send "challenge-ACK"
944 * e.g. in response to spurious SYNs. Conntrack MUST 939 * e.g. in response to spurious SYNs. Conntrack MUST
945 * not believe this ACK is acking last FIN. 940 * not believe this ACK is acking last FIN.
946 */ 941 */
947 if (old_state == TCP_CONNTRACK_LAST_ACK && 942 if (old_state == TCP_CONNTRACK_LAST_ACK &&
948 index == TCP_ACK_SET && 943 index == TCP_ACK_SET &&
949 ct->proto.tcp.last_dir != dir && 944 ct->proto.tcp.last_dir != dir &&
950 ct->proto.tcp.last_index == TCP_SYN_SET && 945 ct->proto.tcp.last_index == TCP_SYN_SET &&
951 (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) { 946 (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
952 /* Detected RFC5961 challenge ACK */ 947 /* Detected RFC5961 challenge ACK */
953 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; 948 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
954 spin_unlock_bh(&ct->lock); 949 spin_unlock_bh(&ct->lock);
955 nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); 950 nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
956 return NF_ACCEPT; /* Don't change state */ 951 return NF_ACCEPT; /* Don't change state */
957 } 952 }
958 break; 953 break;
959 case TCP_CONNTRACK_SYN_SENT2: 954 case TCP_CONNTRACK_SYN_SENT2:
960 /* tcp_conntracks table is not smart enough to handle 955 /* tcp_conntracks table is not smart enough to handle
961 * simultaneous open. 956 * simultaneous open.
962 */ 957 */
963 ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; 958 ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
964 break; 959 break;
965 case TCP_CONNTRACK_SYN_RECV: 960 case TCP_CONNTRACK_SYN_RECV:
966 if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && 961 if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
967 ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) 962 ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
968 new_state = TCP_CONNTRACK_ESTABLISHED; 963 new_state = TCP_CONNTRACK_ESTABLISHED;
969 break; 964 break;
970 case TCP_CONNTRACK_CLOSE: 965 case TCP_CONNTRACK_CLOSE:
971 if (index == TCP_RST_SET 966 if (index == TCP_RST_SET
972 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) 967 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
973 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { 968 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
974 /* Invalid RST */ 969 /* Invalid RST */
975 spin_unlock_bh(&ct->lock); 970 spin_unlock_bh(&ct->lock);
976 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); 971 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
977 return -NF_ACCEPT; 972 return -NF_ACCEPT;
978 } 973 }
979 if (index == TCP_RST_SET 974 if (index == TCP_RST_SET
980 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) 975 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
981 && ct->proto.tcp.last_index == TCP_SYN_SET) 976 && ct->proto.tcp.last_index == TCP_SYN_SET)
982 || (!test_bit(IPS_ASSURED_BIT, &ct->status) 977 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
983 && ct->proto.tcp.last_index == TCP_ACK_SET)) 978 && ct->proto.tcp.last_index == TCP_ACK_SET))
984 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { 979 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
985 /* RST sent to invalid SYN or ACK we had let through 980 /* RST sent to invalid SYN or ACK we had let through
986 * at a) and c) above: 981 * at a) and c) above:
987 * 982 *
988 * a) SYN was in window then 983 * a) SYN was in window then
989 * c) we hold a half-open connection. 984 * c) we hold a half-open connection.
990 * 985 *
991 * Delete our connection entry. 986 * Delete our connection entry.
992 * We skip window checking, because packet might ACK 987 * We skip window checking, because packet might ACK
993 * segments we ignored. */ 988 * segments we ignored. */
994 goto in_window; 989 goto in_window;
995 } 990 }
996 /* Just fall through */ 991 /* Just fall through */
997 default: 992 default:
998 /* Keep compilers happy. */ 993 /* Keep compilers happy. */
999 break; 994 break;
1000 } 995 }
1001 996
1002 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, 997 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1003 skb, dataoff, th)) { 998 skb, dataoff, th)) {
1004 spin_unlock_bh(&ct->lock); 999 spin_unlock_bh(&ct->lock);
1005 return -NF_ACCEPT; 1000 return -NF_ACCEPT;
1006 } 1001 }
1007 in_window: 1002 in_window:
1008 /* From now on we have got in-window packets */ 1003 /* From now on we have got in-window packets */
1009 ct->proto.tcp.last_index = index; 1004 ct->proto.tcp.last_index = index;
1010 ct->proto.tcp.last_dir = dir; 1005 ct->proto.tcp.last_dir = dir;
1011 1006
1012 pr_debug("tcp_conntracks: "); 1007 pr_debug("tcp_conntracks: ");
1013 nf_ct_dump_tuple(tuple); 1008 nf_ct_dump_tuple(tuple);
1014 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", 1009 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1015 (th->syn ? 1 : 0), (th->ack ? 1 : 0), 1010 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1016 (th->fin ? 1 : 0), (th->rst ? 1 : 0), 1011 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1017 old_state, new_state); 1012 old_state, new_state);
1018 1013
1019 ct->proto.tcp.state = new_state; 1014 ct->proto.tcp.state = new_state;
1020 if (old_state != new_state 1015 if (old_state != new_state
1021 && new_state == TCP_CONNTRACK_FIN_WAIT) 1016 && new_state == TCP_CONNTRACK_FIN_WAIT)
1022 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 1017 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1023 1018
1019 timeouts = nf_ct_timeout_lookup(ct);
1020 if (!timeouts)
1021 timeouts = tn->timeouts;
1022
1024 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && 1023 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1025 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) 1024 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1026 timeout = timeouts[TCP_CONNTRACK_RETRANS]; 1025 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1027 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & 1026 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1028 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && 1027 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1029 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) 1028 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1030 timeout = timeouts[TCP_CONNTRACK_UNACK]; 1029 timeout = timeouts[TCP_CONNTRACK_UNACK];
1031 else if (ct->proto.tcp.last_win == 0 && 1030 else if (ct->proto.tcp.last_win == 0 &&
1032 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) 1031 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1033 timeout = timeouts[TCP_CONNTRACK_RETRANS]; 1032 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1034 else 1033 else
1035 timeout = timeouts[new_state]; 1034 timeout = timeouts[new_state];
1036 spin_unlock_bh(&ct->lock); 1035 spin_unlock_bh(&ct->lock);
1037 1036
1038 if (new_state != old_state) 1037 if (new_state != old_state)
1039 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 1038 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1040 1039
1041 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 1040 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1042 /* If only reply is a RST, we can consider ourselves not to 1041 /* If only reply is a RST, we can consider ourselves not to
1043 have an established connection: this is a fairly common 1042 have an established connection: this is a fairly common
1044 problem case, so we can delete the conntrack 1043 problem case, so we can delete the conntrack
1045 immediately. --RR */ 1044 immediately. --RR */
1046 if (th->rst) { 1045 if (th->rst) {
1047 nf_ct_kill_acct(ct, ctinfo, skb); 1046 nf_ct_kill_acct(ct, ctinfo, skb);
1048 return NF_ACCEPT; 1047 return NF_ACCEPT;
1049 } 1048 }
1050 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection 1049 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1051 * pickup with loose=1. Avoid large ESTABLISHED timeout. 1050 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1052 */ 1051 */
1053 if (new_state == TCP_CONNTRACK_ESTABLISHED && 1052 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1054 timeout > timeouts[TCP_CONNTRACK_UNACK]) 1053 timeout > timeouts[TCP_CONNTRACK_UNACK])
1055 timeout = timeouts[TCP_CONNTRACK_UNACK]; 1054 timeout = timeouts[TCP_CONNTRACK_UNACK];
1056 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) 1055 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1057 && (old_state == TCP_CONNTRACK_SYN_RECV 1056 && (old_state == TCP_CONNTRACK_SYN_RECV
1058 || old_state == TCP_CONNTRACK_ESTABLISHED) 1057 || old_state == TCP_CONNTRACK_ESTABLISHED)
1059 && new_state == TCP_CONNTRACK_ESTABLISHED) { 1058 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1060 /* Set ASSURED if we see see valid ack in ESTABLISHED 1059 /* Set ASSURED if we see see valid ack in ESTABLISHED
1061 after SYN_RECV or a valid answer for a picked up 1060 after SYN_RECV or a valid answer for a picked up
1062 connection. */ 1061 connection. */
1063 set_bit(IPS_ASSURED_BIT, &ct->status); 1062 set_bit(IPS_ASSURED_BIT, &ct->status);
1064 nf_conntrack_event_cache(IPCT_ASSURED, ct); 1063 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1065 } 1064 }
1066 nf_ct_refresh_acct(ct, ctinfo, skb, timeout); 1065 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1067 1066
1068 return NF_ACCEPT; 1067 return NF_ACCEPT;
1069 } 1068 }
1070 1069
1071 /* Called when a new connection for this protocol found. */ 1070 /* Called when a new connection for this protocol found. */
1072 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, 1071 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1073 unsigned int dataoff, unsigned int *timeouts) 1072 unsigned int dataoff)
1074 { 1073 {
1075 enum tcp_conntrack new_state; 1074 enum tcp_conntrack new_state;
1076 const struct tcphdr *th; 1075 const struct tcphdr *th;
1077 struct tcphdr _tcph; 1076 struct tcphdr _tcph;
1078 struct net *net = nf_ct_net(ct); 1077 struct net *net = nf_ct_net(ct);
1079 struct nf_tcp_net *tn = tcp_pernet(net); 1078 struct nf_tcp_net *tn = tcp_pernet(net);
1080 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; 1079 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1081 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; 1080 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1082 1081
1083 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 1082 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1084 BUG_ON(th == NULL); 1083 BUG_ON(th == NULL);
1085 1084
1086 /* Don't need lock here: this conntrack not in circulation yet */ 1085 /* Don't need lock here: this conntrack not in circulation yet */
1087 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; 1086 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1088 1087
1089 /* Invalid: delete conntrack */ 1088 /* Invalid: delete conntrack */
1090 if (new_state >= TCP_CONNTRACK_MAX) { 1089 if (new_state >= TCP_CONNTRACK_MAX) {
1091 pr_debug("nf_ct_tcp: invalid new deleting.\n"); 1090 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1092 return false; 1091 return false;
1093 } 1092 }
1094 1093
1095 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1094 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1096 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); 1095 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1097 /* SYN packet */ 1096 /* SYN packet */
1098 ct->proto.tcp.seen[0].td_end = 1097 ct->proto.tcp.seen[0].td_end =
1099 segment_seq_plus_len(ntohl(th->seq), skb->len, 1098 segment_seq_plus_len(ntohl(th->seq), skb->len,
1100 dataoff, th); 1099 dataoff, th);
1101 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1100 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1102 if (ct->proto.tcp.seen[0].td_maxwin == 0) 1101 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1103 ct->proto.tcp.seen[0].td_maxwin = 1; 1102 ct->proto.tcp.seen[0].td_maxwin = 1;
1104 ct->proto.tcp.seen[0].td_maxend = 1103 ct->proto.tcp.seen[0].td_maxend =
1105 ct->proto.tcp.seen[0].td_end; 1104 ct->proto.tcp.seen[0].td_end;
1106 1105
1107 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1106 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1108 } else if (tn->tcp_loose == 0) { 1107 } else if (tn->tcp_loose == 0) {
1109 /* Don't try to pick up connections. */ 1108 /* Don't try to pick up connections. */
1110 return false; 1109 return false;
1111 } else { 1110 } else {
1112 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); 1111 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1113 /* 1112 /*
1114 * We are in the middle of a connection, 1113 * We are in the middle of a connection,
1115 * its history is lost for us. 1114 * its history is lost for us.
1116 * Let's try to use the data from the packet. 1115 * Let's try to use the data from the packet.
1117 */ 1116 */
1118 ct->proto.tcp.seen[0].td_end = 1117 ct->proto.tcp.seen[0].td_end =
1119 segment_seq_plus_len(ntohl(th->seq), skb->len, 1118 segment_seq_plus_len(ntohl(th->seq), skb->len,
1120 dataoff, th); 1119 dataoff, th);
1121 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); 1120 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1122 if (ct->proto.tcp.seen[0].td_maxwin == 0) 1121 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1123 ct->proto.tcp.seen[0].td_maxwin = 1; 1122 ct->proto.tcp.seen[0].td_maxwin = 1;
1124 ct->proto.tcp.seen[0].td_maxend = 1123 ct->proto.tcp.seen[0].td_maxend =
1125 ct->proto.tcp.seen[0].td_end + 1124 ct->proto.tcp.seen[0].td_end +
1126 ct->proto.tcp.seen[0].td_maxwin; 1125 ct->proto.tcp.seen[0].td_maxwin;
1127 1126
1128 /* We assume SACK and liberal window checking to handle 1127 /* We assume SACK and liberal window checking to handle
1129 * window scaling */ 1128 * window scaling */
1130 ct->proto.tcp.seen[0].flags = 1129 ct->proto.tcp.seen[0].flags =
1131 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | 1130 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1132 IP_CT_TCP_FLAG_BE_LIBERAL; 1131 IP_CT_TCP_FLAG_BE_LIBERAL;
1133 } 1132 }
1134 1133
1135 /* tcp_packet will set them */ 1134 /* tcp_packet will set them */
1136 ct->proto.tcp.last_index = TCP_NONE_SET; 1135 ct->proto.tcp.last_index = TCP_NONE_SET;
1137 1136
1138 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " 1137 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1139 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 1138 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1140 sender->td_end, sender->td_maxend, sender->td_maxwin, 1139 sender->td_end, sender->td_maxend, sender->td_maxwin,
1141 sender->td_scale, 1140 sender->td_scale,
1142 receiver->td_end, receiver->td_maxend, receiver->td_maxwin, 1141 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1143 receiver->td_scale); 1142 receiver->td_scale);
1144 return true; 1143 return true;
1145 } 1144 }
1146 1145
1147 static bool tcp_can_early_drop(const struct nf_conn *ct) 1146 static bool tcp_can_early_drop(const struct nf_conn *ct)
1148 { 1147 {
1149 switch (ct->proto.tcp.state) { 1148 switch (ct->proto.tcp.state) {
1150 case TCP_CONNTRACK_FIN_WAIT: 1149 case TCP_CONNTRACK_FIN_WAIT:
1151 case TCP_CONNTRACK_LAST_ACK: 1150 case TCP_CONNTRACK_LAST_ACK:
1152 case TCP_CONNTRACK_TIME_WAIT: 1151 case TCP_CONNTRACK_TIME_WAIT:
1153 case TCP_CONNTRACK_CLOSE: 1152 case TCP_CONNTRACK_CLOSE:
1154 case TCP_CONNTRACK_CLOSE_WAIT: 1153 case TCP_CONNTRACK_CLOSE_WAIT:
1155 return true; 1154 return true;
1156 default: 1155 default:
1157 break; 1156 break;
1158 } 1157 }
1159 1158
1160 return false; 1159 return false;
1161 } 1160 }
1162 1161
1163 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1162 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1164 1163
1165 #include <linux/netfilter/nfnetlink.h> 1164 #include <linux/netfilter/nfnetlink.h>
1166 #include <linux/netfilter/nfnetlink_conntrack.h> 1165 #include <linux/netfilter/nfnetlink_conntrack.h>
1167 1166
1168 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, 1167 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1169 struct nf_conn *ct) 1168 struct nf_conn *ct)
1170 { 1169 {
1171 struct nlattr *nest_parms; 1170 struct nlattr *nest_parms;
1172 struct nf_ct_tcp_flags tmp = {}; 1171 struct nf_ct_tcp_flags tmp = {};
1173 1172
1174 spin_lock_bh(&ct->lock); 1173 spin_lock_bh(&ct->lock);
1175 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); 1174 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1176 if (!nest_parms) 1175 if (!nest_parms)
1177 goto nla_put_failure; 1176 goto nla_put_failure;
1178 1177
1179 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) || 1178 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1180 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, 1179 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1181 ct->proto.tcp.seen[0].td_scale) || 1180 ct->proto.tcp.seen[0].td_scale) ||
1182 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, 1181 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1183 ct->proto.tcp.seen[1].td_scale)) 1182 ct->proto.tcp.seen[1].td_scale))
1184 goto nla_put_failure; 1183 goto nla_put_failure;
1185 1184
1186 tmp.flags = ct->proto.tcp.seen[0].flags; 1185 tmp.flags = ct->proto.tcp.seen[0].flags;
1187 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 1186 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1188 sizeof(struct nf_ct_tcp_flags), &tmp)) 1187 sizeof(struct nf_ct_tcp_flags), &tmp))
1189 goto nla_put_failure; 1188 goto nla_put_failure;
1190 1189
1191 tmp.flags = ct->proto.tcp.seen[1].flags; 1190 tmp.flags = ct->proto.tcp.seen[1].flags;
1192 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, 1191 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1193 sizeof(struct nf_ct_tcp_flags), &tmp)) 1192 sizeof(struct nf_ct_tcp_flags), &tmp))
1194 goto nla_put_failure; 1193 goto nla_put_failure;
1195 spin_unlock_bh(&ct->lock); 1194 spin_unlock_bh(&ct->lock);
1196 1195
1197 nla_nest_end(skb, nest_parms); 1196 nla_nest_end(skb, nest_parms);
1198 1197
1199 return 0; 1198 return 0;
1200 1199
1201 nla_put_failure: 1200 nla_put_failure:
1202 spin_unlock_bh(&ct->lock); 1201 spin_unlock_bh(&ct->lock);
1203 return -1; 1202 return -1;
1204 } 1203 }
1205 1204
1206 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { 1205 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1207 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 }, 1206 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1208 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, 1207 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1209 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, 1208 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1210 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, 1209 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1211 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, 1210 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1212 }; 1211 };
1213 1212
1214 #define TCP_NLATTR_SIZE ( \ 1213 #define TCP_NLATTR_SIZE ( \
1215 NLA_ALIGN(NLA_HDRLEN + 1) + \ 1214 NLA_ALIGN(NLA_HDRLEN + 1) + \
1216 NLA_ALIGN(NLA_HDRLEN + 1) + \ 1215 NLA_ALIGN(NLA_HDRLEN + 1) + \
1217 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ 1216 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
1218 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) 1217 NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
1219 1218
1220 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) 1219 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1221 { 1220 {
1222 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP]; 1221 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1223 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1]; 1222 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1224 int err; 1223 int err;
1225 1224
1226 /* updates could not contain anything about the private 1225 /* updates could not contain anything about the private
1227 * protocol info, in that case skip the parsing */ 1226 * protocol info, in that case skip the parsing */
1228 if (!pattr) 1227 if (!pattr)
1229 return 0; 1228 return 0;
1230 1229
1231 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, 1230 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1232 tcp_nla_policy, NULL); 1231 tcp_nla_policy, NULL);
1233 if (err < 0) 1232 if (err < 0)
1234 return err; 1233 return err;
1235 1234
1236 if (tb[CTA_PROTOINFO_TCP_STATE] && 1235 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1237 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) 1236 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1238 return -EINVAL; 1237 return -EINVAL;
1239 1238
1240 spin_lock_bh(&ct->lock); 1239 spin_lock_bh(&ct->lock);
1241 if (tb[CTA_PROTOINFO_TCP_STATE]) 1240 if (tb[CTA_PROTOINFO_TCP_STATE])
1242 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); 1241 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1243 1242
1244 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) { 1243 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1245 struct nf_ct_tcp_flags *attr = 1244 struct nf_ct_tcp_flags *attr =
1246 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]); 1245 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1247 ct->proto.tcp.seen[0].flags &= ~attr->mask; 1246 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1248 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask; 1247 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1249 } 1248 }
1250 1249
1251 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) { 1250 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1252 struct nf_ct_tcp_flags *attr = 1251 struct nf_ct_tcp_flags *attr =
1253 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]); 1252 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1254 ct->proto.tcp.seen[1].flags &= ~attr->mask; 1253 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1255 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask; 1254 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1256 } 1255 }
1257 1256
1258 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] && 1257 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1259 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] && 1258 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1260 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE && 1259 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1261 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { 1260 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1262 ct->proto.tcp.seen[0].td_scale = 1261 ct->proto.tcp.seen[0].td_scale =
1263 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]); 1262 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1264 ct->proto.tcp.seen[1].td_scale = 1263 ct->proto.tcp.seen[1].td_scale =
1265 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); 1264 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1266 } 1265 }
1267 spin_unlock_bh(&ct->lock); 1266 spin_unlock_bh(&ct->lock);
1268 1267
1269 return 0; 1268 return 0;
1270 } 1269 }
1271 1270
1272 static unsigned int tcp_nlattr_tuple_size(void) 1271 static unsigned int tcp_nlattr_tuple_size(void)
1273 { 1272 {
1274 static unsigned int size __read_mostly; 1273 static unsigned int size __read_mostly;
1275 1274
1276 if (!size) 1275 if (!size)
1277 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1276 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1278 1277
1279 return size; 1278 return size;
1280 } 1279 }
1281 #endif 1280 #endif
1282 1281
1283 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 1282 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1284 1283
1285 #include <linux/netfilter/nfnetlink.h> 1284 #include <linux/netfilter/nfnetlink.h>
1286 #include <linux/netfilter/nfnetlink_cttimeout.h> 1285 #include <linux/netfilter/nfnetlink_cttimeout.h>
1287 1286
1288 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], 1287 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1289 struct net *net, void *data) 1288 struct net *net, void *data)
1290 { 1289 {
1291 unsigned int *timeouts = data;
1292 struct nf_tcp_net *tn = tcp_pernet(net); 1290 struct nf_tcp_net *tn = tcp_pernet(net);
1291 unsigned int *timeouts = data;
1293 int i; 1292 int i;
1294 1293
1294 if (!timeouts)
1295 timeouts = tn->timeouts;
1295 /* set default TCP timeouts. */ 1296 /* set default TCP timeouts. */
1296 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) 1297 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1297 timeouts[i] = tn->timeouts[i]; 1298 timeouts[i] = tn->timeouts[i];
1298 1299
1299 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { 1300 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1300 timeouts[TCP_CONNTRACK_SYN_SENT] = 1301 timeouts[TCP_CONNTRACK_SYN_SENT] =
1301 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ; 1302 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1302 } 1303 }
1303 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) { 1304 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1304 timeouts[TCP_CONNTRACK_SYN_RECV] = 1305 timeouts[TCP_CONNTRACK_SYN_RECV] =
1305 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ; 1306 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1306 } 1307 }
1307 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) { 1308 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1308 timeouts[TCP_CONNTRACK_ESTABLISHED] = 1309 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1309 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ; 1310 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1310 } 1311 }
1311 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) { 1312 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1312 timeouts[TCP_CONNTRACK_FIN_WAIT] = 1313 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1313 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ; 1314 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1314 } 1315 }
1315 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) { 1316 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1316 timeouts[TCP_CONNTRACK_CLOSE_WAIT] = 1317 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1317 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ; 1318 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1318 } 1319 }
1319 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) { 1320 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1320 timeouts[TCP_CONNTRACK_LAST_ACK] = 1321 timeouts[TCP_CONNTRACK_LAST_ACK] =
1321 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ; 1322 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1322 } 1323 }
1323 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) { 1324 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1324 timeouts[TCP_CONNTRACK_TIME_WAIT] = 1325 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1325 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ; 1326 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1326 } 1327 }
1327 if (tb[CTA_TIMEOUT_TCP_CLOSE]) { 1328 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1328 timeouts[TCP_CONNTRACK_CLOSE] = 1329 timeouts[TCP_CONNTRACK_CLOSE] =
1329 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ; 1330 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1330 } 1331 }
1331 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) { 1332 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1332 timeouts[TCP_CONNTRACK_SYN_SENT2] = 1333 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1333 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ; 1334 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1334 } 1335 }
1335 if (tb[CTA_TIMEOUT_TCP_RETRANS]) { 1336 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1336 timeouts[TCP_CONNTRACK_RETRANS] = 1337 timeouts[TCP_CONNTRACK_RETRANS] =
1337 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ; 1338 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1338 } 1339 }
1339 if (tb[CTA_TIMEOUT_TCP_UNACK]) { 1340 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1340 timeouts[TCP_CONNTRACK_UNACK] = 1341 timeouts[TCP_CONNTRACK_UNACK] =
1341 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ; 1342 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1342 } 1343 }
1343 return 0; 1344 return 0;
1344 } 1345 }
1345 1346
1346 static int 1347 static int
1347 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 1348 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1348 { 1349 {
1349 const unsigned int *timeouts = data; 1350 const unsigned int *timeouts = data;
1350 1351
1351 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT, 1352 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1352 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) || 1353 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1353 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV, 1354 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1354 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) || 1355 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1355 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED, 1356 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1356 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) || 1357 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1357 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT, 1358 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1358 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) || 1359 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1359 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT, 1360 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1360 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) || 1361 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1361 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK, 1362 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1362 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) || 1363 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1363 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT, 1364 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1364 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) || 1365 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1365 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE, 1366 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1366 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) || 1367 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1367 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2, 1368 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1368 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) || 1369 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1369 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS, 1370 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1370 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) || 1371 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1371 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK, 1372 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1372 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ))) 1373 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1373 goto nla_put_failure; 1374 goto nla_put_failure;
1374 return 0; 1375 return 0;
1375 1376
1376 nla_put_failure: 1377 nla_put_failure:
1377 return -ENOSPC; 1378 return -ENOSPC;
1378 } 1379 }
1379 1380
1380 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { 1381 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1381 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 }, 1382 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1382 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 }, 1383 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1383 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 }, 1384 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1384 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 }, 1385 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1385 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 }, 1386 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1386 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 }, 1387 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1387 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 }, 1388 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1388 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 }, 1389 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1389 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 }, 1390 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
1390 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 }, 1391 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
1391 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 }, 1392 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
1392 }; 1393 };
1393 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 1394 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1394 1395
1395 #ifdef CONFIG_SYSCTL 1396 #ifdef CONFIG_SYSCTL
1396 static struct ctl_table tcp_sysctl_table[] = { 1397 static struct ctl_table tcp_sysctl_table[] = {
1397 { 1398 {
1398 .procname = "nf_conntrack_tcp_timeout_syn_sent", 1399 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1399 .maxlen = sizeof(unsigned int), 1400 .maxlen = sizeof(unsigned int),
1400 .mode = 0644, 1401 .mode = 0644,
1401 .proc_handler = proc_dointvec_jiffies, 1402 .proc_handler = proc_dointvec_jiffies,
1402 }, 1403 },
1403 { 1404 {
1404 .procname = "nf_conntrack_tcp_timeout_syn_recv", 1405 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1405 .maxlen = sizeof(unsigned int), 1406 .maxlen = sizeof(unsigned int),
1406 .mode = 0644, 1407 .mode = 0644,
1407 .proc_handler = proc_dointvec_jiffies, 1408 .proc_handler = proc_dointvec_jiffies,
1408 }, 1409 },
1409 { 1410 {
1410 .procname = "nf_conntrack_tcp_timeout_established", 1411 .procname = "nf_conntrack_tcp_timeout_established",
1411 .maxlen = sizeof(unsigned int), 1412 .maxlen = sizeof(unsigned int),
1412 .mode = 0644, 1413 .mode = 0644,
1413 .proc_handler = proc_dointvec_jiffies, 1414 .proc_handler = proc_dointvec_jiffies,
1414 }, 1415 },
1415 { 1416 {
1416 .procname = "nf_conntrack_tcp_timeout_fin_wait", 1417 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1417 .maxlen = sizeof(unsigned int), 1418 .maxlen = sizeof(unsigned int),
1418 .mode = 0644, 1419 .mode = 0644,
1419 .proc_handler = proc_dointvec_jiffies, 1420 .proc_handler = proc_dointvec_jiffies,
1420 }, 1421 },
1421 { 1422 {
1422 .procname = "nf_conntrack_tcp_timeout_close_wait", 1423 .procname = "nf_conntrack_tcp_timeout_close_wait",
1423 .maxlen = sizeof(unsigned int), 1424 .maxlen = sizeof(unsigned int),
1424 .mode = 0644, 1425 .mode = 0644,
1425 .proc_handler = proc_dointvec_jiffies, 1426 .proc_handler = proc_dointvec_jiffies,
1426 }, 1427 },
1427 { 1428 {
1428 .procname = "nf_conntrack_tcp_timeout_last_ack", 1429 .procname = "nf_conntrack_tcp_timeout_last_ack",
1429 .maxlen = sizeof(unsigned int), 1430 .maxlen = sizeof(unsigned int),
1430 .mode = 0644, 1431 .mode = 0644,
1431 .proc_handler = proc_dointvec_jiffies, 1432 .proc_handler = proc_dointvec_jiffies,
1432 }, 1433 },
1433 { 1434 {
1434 .procname = "nf_conntrack_tcp_timeout_time_wait", 1435 .procname = "nf_conntrack_tcp_timeout_time_wait",
1435 .maxlen = sizeof(unsigned int), 1436 .maxlen = sizeof(unsigned int),
1436 .mode = 0644, 1437 .mode = 0644,
1437 .proc_handler = proc_dointvec_jiffies, 1438 .proc_handler = proc_dointvec_jiffies,
1438 }, 1439 },
1439 { 1440 {
1440 .procname = "nf_conntrack_tcp_timeout_close", 1441 .procname = "nf_conntrack_tcp_timeout_close",
1441 .maxlen = sizeof(unsigned int), 1442 .maxlen = sizeof(unsigned int),
1442 .mode = 0644, 1443 .mode = 0644,
1443 .proc_handler = proc_dointvec_jiffies, 1444 .proc_handler = proc_dointvec_jiffies,
1444 }, 1445 },
1445 { 1446 {
1446 .procname = "nf_conntrack_tcp_timeout_max_retrans", 1447 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1447 .maxlen = sizeof(unsigned int), 1448 .maxlen = sizeof(unsigned int),
1448 .mode = 0644, 1449 .mode = 0644,
1449 .proc_handler = proc_dointvec_jiffies, 1450 .proc_handler = proc_dointvec_jiffies,
1450 }, 1451 },
1451 { 1452 {
1452 .procname = "nf_conntrack_tcp_timeout_unacknowledged", 1453 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
1453 .maxlen = sizeof(unsigned int), 1454 .maxlen = sizeof(unsigned int),
1454 .mode = 0644, 1455 .mode = 0644,
1455 .proc_handler = proc_dointvec_jiffies, 1456 .proc_handler = proc_dointvec_jiffies,
1456 }, 1457 },
1457 { 1458 {
1458 .procname = "nf_conntrack_tcp_loose", 1459 .procname = "nf_conntrack_tcp_loose",
1459 .maxlen = sizeof(unsigned int), 1460 .maxlen = sizeof(unsigned int),
1460 .mode = 0644, 1461 .mode = 0644,
1461 .proc_handler = proc_dointvec, 1462 .proc_handler = proc_dointvec,
1462 }, 1463 },
1463 { 1464 {
1464 .procname = "nf_conntrack_tcp_be_liberal", 1465 .procname = "nf_conntrack_tcp_be_liberal",
1465 .maxlen = sizeof(unsigned int), 1466 .maxlen = sizeof(unsigned int),
1466 .mode = 0644, 1467 .mode = 0644,
1467 .proc_handler = proc_dointvec, 1468 .proc_handler = proc_dointvec,
1468 }, 1469 },
1469 { 1470 {
1470 .procname = "nf_conntrack_tcp_max_retrans", 1471 .procname = "nf_conntrack_tcp_max_retrans",
1471 .maxlen = sizeof(unsigned int), 1472 .maxlen = sizeof(unsigned int),
1472 .mode = 0644, 1473 .mode = 0644,
1473 .proc_handler = proc_dointvec, 1474 .proc_handler = proc_dointvec,
1474 }, 1475 },
1475 { } 1476 { }
1476 }; 1477 };
1477 #endif /* CONFIG_SYSCTL */ 1478 #endif /* CONFIG_SYSCTL */
1478 1479
1479 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, 1480 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1480 struct nf_tcp_net *tn) 1481 struct nf_tcp_net *tn)
1481 { 1482 {
1482 #ifdef CONFIG_SYSCTL 1483 #ifdef CONFIG_SYSCTL
1483 if (pn->ctl_table) 1484 if (pn->ctl_table)
1484 return 0; 1485 return 0;
1485 1486
1486 pn->ctl_table = kmemdup(tcp_sysctl_table, 1487 pn->ctl_table = kmemdup(tcp_sysctl_table,
1487 sizeof(tcp_sysctl_table), 1488 sizeof(tcp_sysctl_table),
1488 GFP_KERNEL); 1489 GFP_KERNEL);
1489 if (!pn->ctl_table) 1490 if (!pn->ctl_table)
1490 return -ENOMEM; 1491 return -ENOMEM;
1491 1492
1492 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; 1493 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1493 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; 1494 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1494 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; 1495 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1495 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; 1496 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1496 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; 1497 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1497 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; 1498 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1498 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; 1499 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1499 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; 1500 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1500 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; 1501 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1501 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; 1502 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1502 pn->ctl_table[10].data = &tn->tcp_loose; 1503 pn->ctl_table[10].data = &tn->tcp_loose;
1503 pn->ctl_table[11].data = &tn->tcp_be_liberal; 1504 pn->ctl_table[11].data = &tn->tcp_be_liberal;
1504 pn->ctl_table[12].data = &tn->tcp_max_retrans; 1505 pn->ctl_table[12].data = &tn->tcp_max_retrans;
1505 #endif 1506 #endif
1506 return 0; 1507 return 0;
1507 } 1508 }
1508 1509
1509 static int tcp_init_net(struct net *net, u_int16_t proto) 1510 static int tcp_init_net(struct net *net, u_int16_t proto)
1510 { 1511 {
1511 struct nf_tcp_net *tn = tcp_pernet(net); 1512 struct nf_tcp_net *tn = tcp_pernet(net);
1512 struct nf_proto_net *pn = &tn->pn; 1513 struct nf_proto_net *pn = &tn->pn;
1513 1514
1514 if (!pn->users) { 1515 if (!pn->users) {
1515 int i; 1516 int i;
1516 1517
1517 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) 1518 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1518 tn->timeouts[i] = tcp_timeouts[i]; 1519 tn->timeouts[i] = tcp_timeouts[i];
1519 1520
1520 tn->tcp_loose = nf_ct_tcp_loose; 1521 tn->tcp_loose = nf_ct_tcp_loose;
1521 tn->tcp_be_liberal = nf_ct_tcp_be_liberal; 1522 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1522 tn->tcp_max_retrans = nf_ct_tcp_max_retrans; 1523 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1523 } 1524 }
1524 1525
1525 return tcp_kmemdup_sysctl_table(pn, tn); 1526 return tcp_kmemdup_sysctl_table(pn, tn);
1526 } 1527 }
1527 1528
1528 static struct nf_proto_net *tcp_get_net_proto(struct net *net) 1529 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1529 { 1530 {
1530 return &net->ct.nf_ct_proto.tcp.pn; 1531 return &net->ct.nf_ct_proto.tcp.pn;
1531 } 1532 }
1532 1533
1533 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = 1534 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1534 { 1535 {
1535 .l3proto = PF_INET, 1536 .l3proto = PF_INET,
1536 .l4proto = IPPROTO_TCP, 1537 .l4proto = IPPROTO_TCP,
1537 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1538 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1538 .print_conntrack = tcp_print_conntrack, 1539 .print_conntrack = tcp_print_conntrack,
1539 #endif 1540 #endif
1540 .packet = tcp_packet, 1541 .packet = tcp_packet,
1541 .get_timeouts = tcp_get_timeouts,
1542 .new = tcp_new, 1542 .new = tcp_new,
1543 .error = tcp_error, 1543 .error = tcp_error,
1544 .can_early_drop = tcp_can_early_drop, 1544 .can_early_drop = tcp_can_early_drop,
1545 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1545 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1546 .to_nlattr = tcp_to_nlattr, 1546 .to_nlattr = tcp_to_nlattr,
1547 .from_nlattr = nlattr_to_tcp, 1547 .from_nlattr = nlattr_to_tcp,
1548 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1548 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1549 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1549 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1550 .nlattr_tuple_size = tcp_nlattr_tuple_size, 1550 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1551 .nlattr_size = TCP_NLATTR_SIZE, 1551 .nlattr_size = TCP_NLATTR_SIZE,
1552 .nla_policy = nf_ct_port_nla_policy, 1552 .nla_policy = nf_ct_port_nla_policy,
1553 #endif 1553 #endif
1554 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 1554 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1555 .ctnl_timeout = { 1555 .ctnl_timeout = {
1556 .nlattr_to_obj = tcp_timeout_nlattr_to_obj, 1556 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1557 .obj_to_nlattr = tcp_timeout_obj_to_nlattr, 1557 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1558 .nlattr_max = CTA_TIMEOUT_TCP_MAX, 1558 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1559 .obj_size = sizeof(unsigned int) * 1559 .obj_size = sizeof(unsigned int) *
1560 TCP_CONNTRACK_TIMEOUT_MAX, 1560 TCP_CONNTRACK_TIMEOUT_MAX,
1561 .nla_policy = tcp_timeout_nla_policy, 1561 .nla_policy = tcp_timeout_nla_policy,
1562 }, 1562 },
1563 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 1563 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1564 .init_net = tcp_init_net, 1564 .init_net = tcp_init_net,
1565 .get_net_proto = tcp_get_net_proto, 1565 .get_net_proto = tcp_get_net_proto,
1566 }; 1566 };
1567 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); 1567 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1568 1568
1569 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = 1569 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1570 { 1570 {
1571 .l3proto = PF_INET6, 1571 .l3proto = PF_INET6,
1572 .l4proto = IPPROTO_TCP, 1572 .l4proto = IPPROTO_TCP,
1573 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1573 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1574 .print_conntrack = tcp_print_conntrack, 1574 .print_conntrack = tcp_print_conntrack,
1575 #endif 1575 #endif
1576 .packet = tcp_packet, 1576 .packet = tcp_packet,
1577 .get_timeouts = tcp_get_timeouts,
1578 .new = tcp_new, 1577 .new = tcp_new,
1579 .error = tcp_error, 1578 .error = tcp_error,
1580 .can_early_drop = tcp_can_early_drop, 1579 .can_early_drop = tcp_can_early_drop,
1581 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1580 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1582 .nlattr_size = TCP_NLATTR_SIZE, 1581 .nlattr_size = TCP_NLATTR_SIZE,
1583 .to_nlattr = tcp_to_nlattr, 1582 .to_nlattr = tcp_to_nlattr,
1584 .from_nlattr = nlattr_to_tcp, 1583 .from_nlattr = nlattr_to_tcp,
1585 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1584 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1586 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1585 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1587 .nlattr_tuple_size = tcp_nlattr_tuple_size, 1586 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1588 .nla_policy = nf_ct_port_nla_policy, 1587 .nla_policy = nf_ct_port_nla_policy,
1589 #endif 1588 #endif
1590 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 1589 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1591 .ctnl_timeout = { 1590 .ctnl_timeout = {
1592 .nlattr_to_obj = tcp_timeout_nlattr_to_obj, 1591 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1593 .obj_to_nlattr = tcp_timeout_obj_to_nlattr, 1592 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1594 .nlattr_max = CTA_TIMEOUT_TCP_MAX, 1593 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1595 .obj_size = sizeof(unsigned int) * 1594 .obj_size = sizeof(unsigned int) *
net/netfilter/nf_conntrack_proto_udp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9
10 #include <linux/types.h> 10 #include <linux/types.h>
11 #include <linux/timer.h> 11 #include <linux/timer.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/udp.h> 13 #include <linux/udp.h>
14 #include <linux/seq_file.h> 14 #include <linux/seq_file.h>
15 #include <linux/skbuff.h> 15 #include <linux/skbuff.h>
16 #include <linux/ipv6.h> 16 #include <linux/ipv6.h>
17 #include <net/ip6_checksum.h> 17 #include <net/ip6_checksum.h>
18 #include <net/checksum.h> 18 #include <net/checksum.h>
19 19
20 #include <linux/netfilter.h> 20 #include <linux/netfilter.h>
21 #include <linux/netfilter_ipv4.h> 21 #include <linux/netfilter_ipv4.h>
22 #include <linux/netfilter_ipv6.h> 22 #include <linux/netfilter_ipv6.h>
23 #include <net/netfilter/nf_conntrack_l4proto.h> 23 #include <net/netfilter/nf_conntrack_l4proto.h>
24 #include <net/netfilter/nf_conntrack_ecache.h> 24 #include <net/netfilter/nf_conntrack_ecache.h>
25 #include <net/netfilter/nf_conntrack_timeout.h>
25 #include <net/netfilter/nf_log.h> 26 #include <net/netfilter/nf_log.h>
26 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 27 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
27 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 28 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
28 29
29 static const unsigned int udp_timeouts[UDP_CT_MAX] = { 30 static const unsigned int udp_timeouts[UDP_CT_MAX] = {
30 [UDP_CT_UNREPLIED] = 30*HZ, 31 [UDP_CT_UNREPLIED] = 30*HZ,
31 [UDP_CT_REPLIED] = 180*HZ, 32 [UDP_CT_REPLIED] = 180*HZ,
32 }; 33 };
33 34
34 static inline struct nf_udp_net *udp_pernet(struct net *net) 35 static inline struct nf_udp_net *udp_pernet(struct net *net)
35 { 36 {
36 return &net->ct.nf_ct_proto.udp; 37 return &net->ct.nf_ct_proto.udp;
37 } 38 }
38 39
39 static unsigned int *udp_get_timeouts(struct net *net) 40 static unsigned int *udp_get_timeouts(struct net *net)
40 { 41 {
41 return udp_pernet(net)->timeouts; 42 return udp_pernet(net)->timeouts;
42 } 43 }
43 44
44 /* Returns verdict for packet, and may modify conntracktype */ 45 /* Returns verdict for packet, and may modify conntracktype */
45 static int udp_packet(struct nf_conn *ct, 46 static int udp_packet(struct nf_conn *ct,
46 const struct sk_buff *skb, 47 const struct sk_buff *skb,
47 unsigned int dataoff, 48 unsigned int dataoff,
48 enum ip_conntrack_info ctinfo, 49 enum ip_conntrack_info ctinfo)
49 unsigned int *timeouts)
50 { 50 {
51 unsigned int *timeouts;
52
53 timeouts = nf_ct_timeout_lookup(ct);
54 if (!timeouts)
55 timeouts = udp_get_timeouts(nf_ct_net(ct));
56
51 /* If we've seen traffic both ways, this is some kind of UDP 57 /* If we've seen traffic both ways, this is some kind of UDP
52 stream. Extend timeout. */ 58 stream. Extend timeout. */
53 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 59 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
54 nf_ct_refresh_acct(ct, ctinfo, skb, 60 nf_ct_refresh_acct(ct, ctinfo, skb,
55 timeouts[UDP_CT_REPLIED]); 61 timeouts[UDP_CT_REPLIED]);
56 /* Also, more likely to be important, and not a probe */ 62 /* Also, more likely to be important, and not a probe */
57 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) 63 if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
58 nf_conntrack_event_cache(IPCT_ASSURED, ct); 64 nf_conntrack_event_cache(IPCT_ASSURED, ct);
59 } else { 65 } else {
60 nf_ct_refresh_acct(ct, ctinfo, skb, 66 nf_ct_refresh_acct(ct, ctinfo, skb,
61 timeouts[UDP_CT_UNREPLIED]); 67 timeouts[UDP_CT_UNREPLIED]);
62 } 68 }
63 return NF_ACCEPT; 69 return NF_ACCEPT;
64 } 70 }
65 71
66 /* Called when a new connection for this protocol found. */ 72 /* Called when a new connection for this protocol found. */
67 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, 73 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
68 unsigned int dataoff, unsigned int *timeouts) 74 unsigned int dataoff)
69 { 75 {
70 return true; 76 return true;
71 } 77 }
72 78
73 #ifdef CONFIG_NF_CT_PROTO_UDPLITE 79 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
74 static void udplite_error_log(const struct sk_buff *skb, struct net *net, 80 static void udplite_error_log(const struct sk_buff *skb, struct net *net,
75 u8 pf, const char *msg) 81 u8 pf, const char *msg)
76 { 82 {
77 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); 83 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg);
78 } 84 }
79 85
80 static int udplite_error(struct net *net, struct nf_conn *tmpl, 86 static int udplite_error(struct net *net, struct nf_conn *tmpl,
81 struct sk_buff *skb, 87 struct sk_buff *skb,
82 unsigned int dataoff, 88 unsigned int dataoff,
83 u8 pf, unsigned int hooknum) 89 u8 pf, unsigned int hooknum)
84 { 90 {
85 unsigned int udplen = skb->len - dataoff; 91 unsigned int udplen = skb->len - dataoff;
86 const struct udphdr *hdr; 92 const struct udphdr *hdr;
87 struct udphdr _hdr; 93 struct udphdr _hdr;
88 unsigned int cscov; 94 unsigned int cscov;
89 95
90 /* Header is too small? */ 96 /* Header is too small? */
91 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 97 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
92 if (!hdr) { 98 if (!hdr) {
93 udplite_error_log(skb, net, pf, "short packet"); 99 udplite_error_log(skb, net, pf, "short packet");
94 return -NF_ACCEPT; 100 return -NF_ACCEPT;
95 } 101 }
96 102
97 cscov = ntohs(hdr->len); 103 cscov = ntohs(hdr->len);
98 if (cscov == 0) { 104 if (cscov == 0) {
99 cscov = udplen; 105 cscov = udplen;
100 } else if (cscov < sizeof(*hdr) || cscov > udplen) { 106 } else if (cscov < sizeof(*hdr) || cscov > udplen) {
101 udplite_error_log(skb, net, pf, "invalid checksum coverage"); 107 udplite_error_log(skb, net, pf, "invalid checksum coverage");
102 return -NF_ACCEPT; 108 return -NF_ACCEPT;
103 } 109 }
104 110
105 /* UDPLITE mandates checksums */ 111 /* UDPLITE mandates checksums */
106 if (!hdr->check) { 112 if (!hdr->check) {
107 udplite_error_log(skb, net, pf, "checksum missing"); 113 udplite_error_log(skb, net, pf, "checksum missing");
108 return -NF_ACCEPT; 114 return -NF_ACCEPT;
109 } 115 }
110 116
111 /* Checksum invalid? Ignore. */ 117 /* Checksum invalid? Ignore. */
112 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 118 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
113 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, 119 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
114 pf)) { 120 pf)) {
115 udplite_error_log(skb, net, pf, "bad checksum"); 121 udplite_error_log(skb, net, pf, "bad checksum");
116 return -NF_ACCEPT; 122 return -NF_ACCEPT;
117 } 123 }
118 124
119 return NF_ACCEPT; 125 return NF_ACCEPT;
120 } 126 }
121 #endif 127 #endif
122 128
123 static void udp_error_log(const struct sk_buff *skb, struct net *net, 129 static void udp_error_log(const struct sk_buff *skb, struct net *net,
124 u8 pf, const char *msg) 130 u8 pf, const char *msg)
125 { 131 {
126 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); 132 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg);
127 } 133 }
128 134
129 static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, 135 static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
130 unsigned int dataoff, 136 unsigned int dataoff,
131 u_int8_t pf, 137 u_int8_t pf,
132 unsigned int hooknum) 138 unsigned int hooknum)
133 { 139 {
134 unsigned int udplen = skb->len - dataoff; 140 unsigned int udplen = skb->len - dataoff;
135 const struct udphdr *hdr; 141 const struct udphdr *hdr;
136 struct udphdr _hdr; 142 struct udphdr _hdr;
137 143
138 /* Header is too small? */ 144 /* Header is too small? */
139 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 145 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
140 if (hdr == NULL) { 146 if (hdr == NULL) {
141 udp_error_log(skb, net, pf, "short packet"); 147 udp_error_log(skb, net, pf, "short packet");
142 return -NF_ACCEPT; 148 return -NF_ACCEPT;
143 } 149 }
144 150
145 /* Truncated/malformed packets */ 151 /* Truncated/malformed packets */
146 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { 152 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
147 udp_error_log(skb, net, pf, "truncated/malformed packet"); 153 udp_error_log(skb, net, pf, "truncated/malformed packet");
148 return -NF_ACCEPT; 154 return -NF_ACCEPT;
149 } 155 }
150 156
151 /* Packet with no checksum */ 157 /* Packet with no checksum */
152 if (!hdr->check) 158 if (!hdr->check)
153 return NF_ACCEPT; 159 return NF_ACCEPT;
154 160
155 /* Checksum invalid? Ignore. 161 /* Checksum invalid? Ignore.
156 * We skip checking packets on the outgoing path 162 * We skip checking packets on the outgoing path
157 * because the checksum is assumed to be correct. 163 * because the checksum is assumed to be correct.
158 * FIXME: Source route IP option packets --RR */ 164 * FIXME: Source route IP option packets --RR */
159 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 165 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
160 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 166 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
161 udp_error_log(skb, net, pf, "bad checksum"); 167 udp_error_log(skb, net, pf, "bad checksum");
162 return -NF_ACCEPT; 168 return -NF_ACCEPT;
163 } 169 }
164 170
165 return NF_ACCEPT; 171 return NF_ACCEPT;
166 } 172 }
167 173
168 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 174 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
169 175
170 #include <linux/netfilter/nfnetlink.h> 176 #include <linux/netfilter/nfnetlink.h>
171 #include <linux/netfilter/nfnetlink_cttimeout.h> 177 #include <linux/netfilter/nfnetlink_cttimeout.h>
172 178
173 static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], 179 static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
174 struct net *net, void *data) 180 struct net *net, void *data)
175 { 181 {
176 unsigned int *timeouts = data; 182 unsigned int *timeouts = data;
177 struct nf_udp_net *un = udp_pernet(net); 183 struct nf_udp_net *un = udp_pernet(net);
178 184
185 if (!timeouts)
186 timeouts = un->timeouts;
187
179 /* set default timeouts for UDP. */ 188 /* set default timeouts for UDP. */
180 timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; 189 timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
181 timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; 190 timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
182 191
183 if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { 192 if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) {
184 timeouts[UDP_CT_UNREPLIED] = 193 timeouts[UDP_CT_UNREPLIED] =
185 ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; 194 ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ;
186 } 195 }
187 if (tb[CTA_TIMEOUT_UDP_REPLIED]) { 196 if (tb[CTA_TIMEOUT_UDP_REPLIED]) {
188 timeouts[UDP_CT_REPLIED] = 197 timeouts[UDP_CT_REPLIED] =
189 ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; 198 ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ;
190 } 199 }
191 return 0; 200 return 0;
192 } 201 }
193 202
194 static int 203 static int
195 udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) 204 udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
196 { 205 {
197 const unsigned int *timeouts = data; 206 const unsigned int *timeouts = data;
198 207
199 if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED, 208 if (nla_put_be32(skb, CTA_TIMEOUT_UDP_UNREPLIED,
200 htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) || 209 htonl(timeouts[UDP_CT_UNREPLIED] / HZ)) ||
201 nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED, 210 nla_put_be32(skb, CTA_TIMEOUT_UDP_REPLIED,
202 htonl(timeouts[UDP_CT_REPLIED] / HZ))) 211 htonl(timeouts[UDP_CT_REPLIED] / HZ)))
203 goto nla_put_failure; 212 goto nla_put_failure;
204 return 0; 213 return 0;
205 214
206 nla_put_failure: 215 nla_put_failure:
207 return -ENOSPC; 216 return -ENOSPC;
208 } 217 }
209 218
210 static const struct nla_policy 219 static const struct nla_policy
211 udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { 220 udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = {
212 [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, 221 [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 },
213 [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, 222 [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 },
214 }; 223 };
215 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 224 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
216 225
217 #ifdef CONFIG_SYSCTL 226 #ifdef CONFIG_SYSCTL
218 static struct ctl_table udp_sysctl_table[] = { 227 static struct ctl_table udp_sysctl_table[] = {
219 { 228 {
220 .procname = "nf_conntrack_udp_timeout", 229 .procname = "nf_conntrack_udp_timeout",
221 .maxlen = sizeof(unsigned int), 230 .maxlen = sizeof(unsigned int),
222 .mode = 0644, 231 .mode = 0644,
223 .proc_handler = proc_dointvec_jiffies, 232 .proc_handler = proc_dointvec_jiffies,
224 }, 233 },
225 { 234 {
226 .procname = "nf_conntrack_udp_timeout_stream", 235 .procname = "nf_conntrack_udp_timeout_stream",
227 .maxlen = sizeof(unsigned int), 236 .maxlen = sizeof(unsigned int),
228 .mode = 0644, 237 .mode = 0644,
229 .proc_handler = proc_dointvec_jiffies, 238 .proc_handler = proc_dointvec_jiffies,
230 }, 239 },
231 { } 240 { }
232 }; 241 };
233 #endif /* CONFIG_SYSCTL */ 242 #endif /* CONFIG_SYSCTL */
234 243
235 static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, 244 static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
236 struct nf_udp_net *un) 245 struct nf_udp_net *un)
237 { 246 {
238 #ifdef CONFIG_SYSCTL 247 #ifdef CONFIG_SYSCTL
239 if (pn->ctl_table) 248 if (pn->ctl_table)
240 return 0; 249 return 0;
241 pn->ctl_table = kmemdup(udp_sysctl_table, 250 pn->ctl_table = kmemdup(udp_sysctl_table,
242 sizeof(udp_sysctl_table), 251 sizeof(udp_sysctl_table),
243 GFP_KERNEL); 252 GFP_KERNEL);
244 if (!pn->ctl_table) 253 if (!pn->ctl_table)
245 return -ENOMEM; 254 return -ENOMEM;
246 pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; 255 pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
247 pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; 256 pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED];
248 #endif 257 #endif
249 return 0; 258 return 0;
250 } 259 }
251 260
252 static int udp_init_net(struct net *net, u_int16_t proto) 261 static int udp_init_net(struct net *net, u_int16_t proto)
253 { 262 {
254 struct nf_udp_net *un = udp_pernet(net); 263 struct nf_udp_net *un = udp_pernet(net);
255 struct nf_proto_net *pn = &un->pn; 264 struct nf_proto_net *pn = &un->pn;
256 265
257 if (!pn->users) { 266 if (!pn->users) {
258 int i; 267 int i;
259 268
260 for (i = 0; i < UDP_CT_MAX; i++) 269 for (i = 0; i < UDP_CT_MAX; i++)
261 un->timeouts[i] = udp_timeouts[i]; 270 un->timeouts[i] = udp_timeouts[i];
262 } 271 }
263 272
264 return udp_kmemdup_sysctl_table(pn, un); 273 return udp_kmemdup_sysctl_table(pn, un);
265 } 274 }
266 275
267 static struct nf_proto_net *udp_get_net_proto(struct net *net) 276 static struct nf_proto_net *udp_get_net_proto(struct net *net)
268 { 277 {
269 return &net->ct.nf_ct_proto.udp.pn; 278 return &net->ct.nf_ct_proto.udp.pn;
270 } 279 }
271 280
272 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = 281 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
273 { 282 {
274 .l3proto = PF_INET, 283 .l3proto = PF_INET,
275 .l4proto = IPPROTO_UDP, 284 .l4proto = IPPROTO_UDP,
276 .allow_clash = true, 285 .allow_clash = true,
277 .packet = udp_packet, 286 .packet = udp_packet,
278 .get_timeouts = udp_get_timeouts,
279 .new = udp_new, 287 .new = udp_new,
280 .error = udp_error, 288 .error = udp_error,
281 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 289 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
282 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 290 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
283 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 291 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
284 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 292 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
285 .nla_policy = nf_ct_port_nla_policy, 293 .nla_policy = nf_ct_port_nla_policy,
286 #endif 294 #endif
287 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 295 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
288 .ctnl_timeout = { 296 .ctnl_timeout = {
289 .nlattr_to_obj = udp_timeout_nlattr_to_obj, 297 .nlattr_to_obj = udp_timeout_nlattr_to_obj,
290 .obj_to_nlattr = udp_timeout_obj_to_nlattr, 298 .obj_to_nlattr = udp_timeout_obj_to_nlattr,
291 .nlattr_max = CTA_TIMEOUT_UDP_MAX, 299 .nlattr_max = CTA_TIMEOUT_UDP_MAX,
292 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, 300 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
293 .nla_policy = udp_timeout_nla_policy, 301 .nla_policy = udp_timeout_nla_policy,
294 }, 302 },
295 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 303 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
296 .init_net = udp_init_net, 304 .init_net = udp_init_net,
297 .get_net_proto = udp_get_net_proto, 305 .get_net_proto = udp_get_net_proto,
298 }; 306 };
299 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); 307 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
300 308
301 #ifdef CONFIG_NF_CT_PROTO_UDPLITE 309 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
302 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 = 310 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
303 { 311 {
304 .l3proto = PF_INET, 312 .l3proto = PF_INET,
305 .l4proto = IPPROTO_UDPLITE, 313 .l4proto = IPPROTO_UDPLITE,
306 .allow_clash = true, 314 .allow_clash = true,
307 .packet = udp_packet, 315 .packet = udp_packet,
308 .get_timeouts = udp_get_timeouts,
309 .new = udp_new, 316 .new = udp_new,
310 .error = udplite_error, 317 .error = udplite_error,
311 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 318 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
312 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 319 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
313 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 320 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
314 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 321 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
315 .nla_policy = nf_ct_port_nla_policy, 322 .nla_policy = nf_ct_port_nla_policy,
316 #endif 323 #endif
317 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 324 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
318 .ctnl_timeout = { 325 .ctnl_timeout = {
319 .nlattr_to_obj = udp_timeout_nlattr_to_obj, 326 .nlattr_to_obj = udp_timeout_nlattr_to_obj,
320 .obj_to_nlattr = udp_timeout_obj_to_nlattr, 327 .obj_to_nlattr = udp_timeout_obj_to_nlattr,
321 .nlattr_max = CTA_TIMEOUT_UDP_MAX, 328 .nlattr_max = CTA_TIMEOUT_UDP_MAX,
322 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, 329 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
323 .nla_policy = udp_timeout_nla_policy, 330 .nla_policy = udp_timeout_nla_policy,
324 }, 331 },
325 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 332 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
326 .init_net = udp_init_net, 333 .init_net = udp_init_net,
327 .get_net_proto = udp_get_net_proto, 334 .get_net_proto = udp_get_net_proto,
328 }; 335 };
329 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); 336 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4);
330 #endif 337 #endif
331 338
332 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = 339 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
333 { 340 {
334 .l3proto = PF_INET6, 341 .l3proto = PF_INET6,
335 .l4proto = IPPROTO_UDP, 342 .l4proto = IPPROTO_UDP,
336 .allow_clash = true, 343 .allow_clash = true,
337 .packet = udp_packet, 344 .packet = udp_packet,
338 .get_timeouts = udp_get_timeouts,
339 .new = udp_new, 345 .new = udp_new,
340 .error = udp_error, 346 .error = udp_error,
341 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 347 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
342 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 348 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
343 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 349 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
344 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 350 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
345 .nla_policy = nf_ct_port_nla_policy, 351 .nla_policy = nf_ct_port_nla_policy,
346 #endif 352 #endif
347 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 353 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
348 .ctnl_timeout = { 354 .ctnl_timeout = {
349 .nlattr_to_obj = udp_timeout_nlattr_to_obj, 355 .nlattr_to_obj = udp_timeout_nlattr_to_obj,
350 .obj_to_nlattr = udp_timeout_obj_to_nlattr, 356 .obj_to_nlattr = udp_timeout_obj_to_nlattr,
351 .nlattr_max = CTA_TIMEOUT_UDP_MAX, 357 .nlattr_max = CTA_TIMEOUT_UDP_MAX,
352 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, 358 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
353 .nla_policy = udp_timeout_nla_policy, 359 .nla_policy = udp_timeout_nla_policy,
354 }, 360 },
355 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 361 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
356 .init_net = udp_init_net, 362 .init_net = udp_init_net,
357 .get_net_proto = udp_get_net_proto, 363 .get_net_proto = udp_get_net_proto,
358 }; 364 };
359 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); 365 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
360 366
361 #ifdef CONFIG_NF_CT_PROTO_UDPLITE 367 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
362 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 = 368 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
363 { 369 {
364 .l3proto = PF_INET6, 370 .l3proto = PF_INET6,
365 .l4proto = IPPROTO_UDPLITE, 371 .l4proto = IPPROTO_UDPLITE,
366 .allow_clash = true, 372 .allow_clash = true,
367 .packet = udp_packet, 373 .packet = udp_packet,
368 .get_timeouts = udp_get_timeouts,
369 .new = udp_new, 374 .new = udp_new,
370 .error = udplite_error, 375 .error = udplite_error,
371 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 376 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
372 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 377 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
373 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 378 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
374 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, 379 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
375 .nla_policy = nf_ct_port_nla_policy, 380 .nla_policy = nf_ct_port_nla_policy,
376 #endif 381 #endif
377 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 382 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
378 .ctnl_timeout = { 383 .ctnl_timeout = {
379 .nlattr_to_obj = udp_timeout_nlattr_to_obj, 384 .nlattr_to_obj = udp_timeout_nlattr_to_obj,
380 .obj_to_nlattr = udp_timeout_obj_to_nlattr, 385 .obj_to_nlattr = udp_timeout_obj_to_nlattr,
381 .nlattr_max = CTA_TIMEOUT_UDP_MAX, 386 .nlattr_max = CTA_TIMEOUT_UDP_MAX,
382 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, 387 .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
383 .nla_policy = udp_timeout_nla_policy, 388 .nla_policy = udp_timeout_nla_policy,
384 }, 389 },
385 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ 390 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
386 .init_net = udp_init_net, 391 .init_net = udp_init_net,
387 .get_net_proto = udp_get_net_proto, 392 .get_net_proto = udp_get_net_proto,
net/netfilter/nfnetlink_cttimeout.c
1 /* 1 /*
2 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> 2 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
3 * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> 3 * (C) 2012 by Vyatta Inc. <http://www.vyatta.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation (or any later at your option). 7 * published by the Free Software Foundation (or any later at your option).
8 */ 8 */
9 #include <linux/init.h> 9 #include <linux/init.h>
10 #include <linux/module.h> 10 #include <linux/module.h>
11 #include <linux/kernel.h> 11 #include <linux/kernel.h>
12 #include <linux/rculist.h> 12 #include <linux/rculist.h>
13 #include <linux/rculist_nulls.h> 13 #include <linux/rculist_nulls.h>
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #include <linux/timer.h> 15 #include <linux/timer.h>
16 #include <linux/security.h> 16 #include <linux/security.h>
17 #include <linux/skbuff.h> 17 #include <linux/skbuff.h>
18 #include <linux/errno.h> 18 #include <linux/errno.h>
19 #include <linux/netlink.h> 19 #include <linux/netlink.h>
20 #include <linux/spinlock.h> 20 #include <linux/spinlock.h>
21 #include <linux/interrupt.h> 21 #include <linux/interrupt.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 23
24 #include <linux/netfilter.h> 24 #include <linux/netfilter.h>
25 #include <net/netlink.h> 25 #include <net/netlink.h>
26 #include <net/sock.h> 26 #include <net/sock.h>
27 #include <net/netfilter/nf_conntrack.h> 27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_core.h> 28 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_l4proto.h> 29 #include <net/netfilter/nf_conntrack_l4proto.h>
30 #include <net/netfilter/nf_conntrack_tuple.h> 30 #include <net/netfilter/nf_conntrack_tuple.h>
31 #include <net/netfilter/nf_conntrack_timeout.h> 31 #include <net/netfilter/nf_conntrack_timeout.h>
32 32
33 #include <linux/netfilter/nfnetlink.h> 33 #include <linux/netfilter/nfnetlink.h>
34 #include <linux/netfilter/nfnetlink_cttimeout.h> 34 #include <linux/netfilter/nfnetlink_cttimeout.h>
35 35
36 MODULE_LICENSE("GPL"); 36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); 37 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
38 MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); 38 MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
39 39
40 static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { 40 static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
41 [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, 41 [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
42 .len = CTNL_TIMEOUT_NAME_MAX - 1}, 42 .len = CTNL_TIMEOUT_NAME_MAX - 1},
43 [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, 43 [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 },
44 [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, 44 [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 },
45 [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, 45 [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED },
46 }; 46 };
47 47
48 static int 48 static int
49 ctnl_timeout_parse_policy(void *timeouts, 49 ctnl_timeout_parse_policy(void *timeout,
50 const struct nf_conntrack_l4proto *l4proto, 50 const struct nf_conntrack_l4proto *l4proto,
51 struct net *net, const struct nlattr *attr) 51 struct net *net, const struct nlattr *attr)
52 { 52 {
53 struct nlattr **tb; 53 struct nlattr **tb;
54 int ret = 0; 54 int ret = 0;
55 55
56 if (!l4proto->ctnl_timeout.nlattr_to_obj) 56 if (!l4proto->ctnl_timeout.nlattr_to_obj)
57 return 0; 57 return 0;
58 58
59 tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), 59 tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
60 GFP_KERNEL); 60 GFP_KERNEL);
61 61
62 if (!tb) 62 if (!tb)
63 return -ENOMEM; 63 return -ENOMEM;
64 64
65 ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, 65 ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
66 l4proto->ctnl_timeout.nla_policy, NULL); 66 l4proto->ctnl_timeout.nla_policy, NULL);
67 if (ret < 0) 67 if (ret < 0)
68 goto err; 68 goto err;
69 69
70 ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); 70 ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
71 71
72 err: 72 err:
73 kfree(tb); 73 kfree(tb);
74 return ret; 74 return ret;
75 } 75 }
76 76
77 static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, 77 static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
78 struct sk_buff *skb, 78 struct sk_buff *skb,
79 const struct nlmsghdr *nlh, 79 const struct nlmsghdr *nlh,
80 const struct nlattr * const cda[], 80 const struct nlattr * const cda[],
81 struct netlink_ext_ack *extack) 81 struct netlink_ext_ack *extack)
82 { 82 {
83 __u16 l3num; 83 __u16 l3num;
84 __u8 l4num; 84 __u8 l4num;
85 const struct nf_conntrack_l4proto *l4proto; 85 const struct nf_conntrack_l4proto *l4proto;
86 struct ctnl_timeout *timeout, *matching = NULL; 86 struct ctnl_timeout *timeout, *matching = NULL;
87 char *name; 87 char *name;
88 int ret; 88 int ret;
89 89
90 if (!cda[CTA_TIMEOUT_NAME] || 90 if (!cda[CTA_TIMEOUT_NAME] ||
91 !cda[CTA_TIMEOUT_L3PROTO] || 91 !cda[CTA_TIMEOUT_L3PROTO] ||
92 !cda[CTA_TIMEOUT_L4PROTO] || 92 !cda[CTA_TIMEOUT_L4PROTO] ||
93 !cda[CTA_TIMEOUT_DATA]) 93 !cda[CTA_TIMEOUT_DATA])
94 return -EINVAL; 94 return -EINVAL;
95 95
96 name = nla_data(cda[CTA_TIMEOUT_NAME]); 96 name = nla_data(cda[CTA_TIMEOUT_NAME]);
97 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); 97 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
98 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); 98 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
99 99
100 list_for_each_entry(timeout, &net->nfct_timeout_list, head) { 100 list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
101 if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) 101 if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
102 continue; 102 continue;
103 103
104 if (nlh->nlmsg_flags & NLM_F_EXCL) 104 if (nlh->nlmsg_flags & NLM_F_EXCL)
105 return -EEXIST; 105 return -EEXIST;
106 106
107 matching = timeout; 107 matching = timeout;
108 break; 108 break;
109 } 109 }
110 110
111 if (matching) { 111 if (matching) {
112 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 112 if (nlh->nlmsg_flags & NLM_F_REPLACE) {
113 /* You cannot replace one timeout policy by another of 113 /* You cannot replace one timeout policy by another of
114 * different kind, sorry. 114 * different kind, sorry.
115 */ 115 */
116 if (matching->l3num != l3num || 116 if (matching->l3num != l3num ||
117 matching->l4proto->l4proto != l4num) 117 matching->l4proto->l4proto != l4num)
118 return -EINVAL; 118 return -EINVAL;
119 119
120 return ctnl_timeout_parse_policy(&matching->data, 120 return ctnl_timeout_parse_policy(&matching->data,
121 matching->l4proto, net, 121 matching->l4proto, net,
122 cda[CTA_TIMEOUT_DATA]); 122 cda[CTA_TIMEOUT_DATA]);
123 } 123 }
124 124
125 return -EBUSY; 125 return -EBUSY;
126 } 126 }
127 127
128 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 128 l4proto = nf_ct_l4proto_find_get(l3num, l4num);
129 129
130 /* This protocol is not supportted, skip. */ 130 /* This protocol is not supportted, skip. */
131 if (l4proto->l4proto != l4num) { 131 if (l4proto->l4proto != l4num) {
132 ret = -EOPNOTSUPP; 132 ret = -EOPNOTSUPP;
133 goto err_proto_put; 133 goto err_proto_put;
134 } 134 }
135 135
136 timeout = kzalloc(sizeof(struct ctnl_timeout) + 136 timeout = kzalloc(sizeof(struct ctnl_timeout) +
137 l4proto->ctnl_timeout.obj_size, GFP_KERNEL); 137 l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
138 if (timeout == NULL) { 138 if (timeout == NULL) {
139 ret = -ENOMEM; 139 ret = -ENOMEM;
140 goto err_proto_put; 140 goto err_proto_put;
141 } 141 }
142 142
143 ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, 143 ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net,
144 cda[CTA_TIMEOUT_DATA]); 144 cda[CTA_TIMEOUT_DATA]);
145 if (ret < 0) 145 if (ret < 0)
146 goto err; 146 goto err;
147 147
148 strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); 148 strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
149 timeout->l3num = l3num; 149 timeout->l3num = l3num;
150 timeout->l4proto = l4proto; 150 timeout->l4proto = l4proto;
151 refcount_set(&timeout->refcnt, 1); 151 refcount_set(&timeout->refcnt, 1);
152 list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); 152 list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
153 153
154 return 0; 154 return 0;
155 err: 155 err:
156 kfree(timeout); 156 kfree(timeout);
157 err_proto_put: 157 err_proto_put:
158 nf_ct_l4proto_put(l4proto); 158 nf_ct_l4proto_put(l4proto);
159 return ret; 159 return ret;
160 } 160 }
161 161
162 static int 162 static int
163 ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, 163 ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
164 int event, struct ctnl_timeout *timeout) 164 int event, struct ctnl_timeout *timeout)
165 { 165 {
166 struct nlmsghdr *nlh; 166 struct nlmsghdr *nlh;
167 struct nfgenmsg *nfmsg; 167 struct nfgenmsg *nfmsg;
168 unsigned int flags = portid ? NLM_F_MULTI : 0; 168 unsigned int flags = portid ? NLM_F_MULTI : 0;
169 const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; 169 const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
170 170
171 event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); 171 event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
172 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); 172 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
173 if (nlh == NULL) 173 if (nlh == NULL)
174 goto nlmsg_failure; 174 goto nlmsg_failure;
175 175
176 nfmsg = nlmsg_data(nlh); 176 nfmsg = nlmsg_data(nlh);
177 nfmsg->nfgen_family = AF_UNSPEC; 177 nfmsg->nfgen_family = AF_UNSPEC;
178 nfmsg->version = NFNETLINK_V0; 178 nfmsg->version = NFNETLINK_V0;
179 nfmsg->res_id = 0; 179 nfmsg->res_id = 0;
180 180
181 if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || 181 if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
182 nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) || 182 nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
183 nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) || 183 nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
184 nla_put_be32(skb, CTA_TIMEOUT_USE, 184 nla_put_be32(skb, CTA_TIMEOUT_USE,
185 htonl(refcount_read(&timeout->refcnt)))) 185 htonl(refcount_read(&timeout->refcnt))))
186 goto nla_put_failure; 186 goto nla_put_failure;
187 187
188 if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { 188 if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
189 struct nlattr *nest_parms; 189 struct nlattr *nest_parms;
190 int ret; 190 int ret;
191 191
192 nest_parms = nla_nest_start(skb, 192 nest_parms = nla_nest_start(skb,
193 CTA_TIMEOUT_DATA | NLA_F_NESTED); 193 CTA_TIMEOUT_DATA | NLA_F_NESTED);
194 if (!nest_parms) 194 if (!nest_parms)
195 goto nla_put_failure; 195 goto nla_put_failure;
196 196
197 ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); 197 ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
198 if (ret < 0) 198 if (ret < 0)
199 goto nla_put_failure; 199 goto nla_put_failure;
200 200
201 nla_nest_end(skb, nest_parms); 201 nla_nest_end(skb, nest_parms);
202 } 202 }
203 203
204 nlmsg_end(skb, nlh); 204 nlmsg_end(skb, nlh);
205 return skb->len; 205 return skb->len;
206 206
207 nlmsg_failure: 207 nlmsg_failure:
208 nla_put_failure: 208 nla_put_failure:
209 nlmsg_cancel(skb, nlh); 209 nlmsg_cancel(skb, nlh);
210 return -1; 210 return -1;
211 } 211 }
212 212
213 static int 213 static int
214 ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) 214 ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
215 { 215 {
216 struct net *net = sock_net(skb->sk); 216 struct net *net = sock_net(skb->sk);
217 struct ctnl_timeout *cur, *last; 217 struct ctnl_timeout *cur, *last;
218 218
219 if (cb->args[2]) 219 if (cb->args[2])
220 return 0; 220 return 0;
221 221
222 last = (struct ctnl_timeout *)cb->args[1]; 222 last = (struct ctnl_timeout *)cb->args[1];
223 if (cb->args[1]) 223 if (cb->args[1])
224 cb->args[1] = 0; 224 cb->args[1] = 0;
225 225
226 rcu_read_lock(); 226 rcu_read_lock();
227 list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { 227 list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
228 if (last) { 228 if (last) {
229 if (cur != last) 229 if (cur != last)
230 continue; 230 continue;
231 231
232 last = NULL; 232 last = NULL;
233 } 233 }
234 if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, 234 if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
235 cb->nlh->nlmsg_seq, 235 cb->nlh->nlmsg_seq,
236 NFNL_MSG_TYPE(cb->nlh->nlmsg_type), 236 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
237 IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { 237 IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
238 cb->args[1] = (unsigned long)cur; 238 cb->args[1] = (unsigned long)cur;
239 break; 239 break;
240 } 240 }
241 } 241 }
242 if (!cb->args[1]) 242 if (!cb->args[1])
243 cb->args[2] = 1; 243 cb->args[2] = 1;
244 rcu_read_unlock(); 244 rcu_read_unlock();
245 return skb->len; 245 return skb->len;
246 } 246 }
247 247
248 static int cttimeout_get_timeout(struct net *net, struct sock *ctnl, 248 static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
249 struct sk_buff *skb, 249 struct sk_buff *skb,
250 const struct nlmsghdr *nlh, 250 const struct nlmsghdr *nlh,
251 const struct nlattr * const cda[], 251 const struct nlattr * const cda[],
252 struct netlink_ext_ack *extack) 252 struct netlink_ext_ack *extack)
253 { 253 {
254 int ret = -ENOENT; 254 int ret = -ENOENT;
255 char *name; 255 char *name;
256 struct ctnl_timeout *cur; 256 struct ctnl_timeout *cur;
257 257
258 if (nlh->nlmsg_flags & NLM_F_DUMP) { 258 if (nlh->nlmsg_flags & NLM_F_DUMP) {
259 struct netlink_dump_control c = { 259 struct netlink_dump_control c = {
260 .dump = ctnl_timeout_dump, 260 .dump = ctnl_timeout_dump,
261 }; 261 };
262 return netlink_dump_start(ctnl, skb, nlh, &c); 262 return netlink_dump_start(ctnl, skb, nlh, &c);
263 } 263 }
264 264
265 if (!cda[CTA_TIMEOUT_NAME]) 265 if (!cda[CTA_TIMEOUT_NAME])
266 return -EINVAL; 266 return -EINVAL;
267 name = nla_data(cda[CTA_TIMEOUT_NAME]); 267 name = nla_data(cda[CTA_TIMEOUT_NAME]);
268 268
269 list_for_each_entry(cur, &net->nfct_timeout_list, head) { 269 list_for_each_entry(cur, &net->nfct_timeout_list, head) {
270 struct sk_buff *skb2; 270 struct sk_buff *skb2;
271 271
272 if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) 272 if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
273 continue; 273 continue;
274 274
275 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 275 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
276 if (skb2 == NULL) { 276 if (skb2 == NULL) {
277 ret = -ENOMEM; 277 ret = -ENOMEM;
278 break; 278 break;
279 } 279 }
280 280
281 ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, 281 ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
282 nlh->nlmsg_seq, 282 nlh->nlmsg_seq,
283 NFNL_MSG_TYPE(nlh->nlmsg_type), 283 NFNL_MSG_TYPE(nlh->nlmsg_type),
284 IPCTNL_MSG_TIMEOUT_NEW, cur); 284 IPCTNL_MSG_TIMEOUT_NEW, cur);
285 if (ret <= 0) { 285 if (ret <= 0) {
286 kfree_skb(skb2); 286 kfree_skb(skb2);
287 break; 287 break;
288 } 288 }
289 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, 289 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
290 MSG_DONTWAIT); 290 MSG_DONTWAIT);
291 if (ret > 0) 291 if (ret > 0)
292 ret = 0; 292 ret = 0;
293 293
294 /* this avoids a loop in nfnetlink. */ 294 /* this avoids a loop in nfnetlink. */
295 return ret == -EAGAIN ? -ENOBUFS : ret; 295 return ret == -EAGAIN ? -ENOBUFS : ret;
296 } 296 }
297 return ret; 297 return ret;
298 } 298 }
299 299
300 static int untimeout(struct nf_conn *ct, void *timeout) 300 static int untimeout(struct nf_conn *ct, void *timeout)
301 { 301 {
302 struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); 302 struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
303 303
304 if (timeout_ext && (!timeout || timeout_ext->timeout == timeout)) 304 if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
305 RCU_INIT_POINTER(timeout_ext->timeout, NULL); 305 RCU_INIT_POINTER(timeout_ext->timeout, NULL);
306 306
307 /* We are not intended to delete this conntrack. */ 307 /* We are not intended to delete this conntrack. */
308 return 0; 308 return 0;
309 } 309 }
310 310
311 static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) 311 static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
312 { 312 {
313 nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0); 313 nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
314 } 314 }
315 315
316 /* try to delete object, fail if it is still in use. */ 316 /* try to delete object, fail if it is still in use. */
317 static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) 317 static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
318 { 318 {
319 int ret = 0; 319 int ret = 0;
320 320
321 /* We want to avoid races with ctnl_timeout_put. So only when the 321 /* We want to avoid races with ctnl_timeout_put. So only when the
322 * current refcnt is 1, we decrease it to 0. 322 * current refcnt is 1, we decrease it to 0.
323 */ 323 */
324 if (refcount_dec_if_one(&timeout->refcnt)) { 324 if (refcount_dec_if_one(&timeout->refcnt)) {
325 /* We are protected by nfnl mutex. */ 325 /* We are protected by nfnl mutex. */
326 list_del_rcu(&timeout->head); 326 list_del_rcu(&timeout->head);
327 nf_ct_l4proto_put(timeout->l4proto); 327 nf_ct_l4proto_put(timeout->l4proto);
328 ctnl_untimeout(net, timeout); 328 ctnl_untimeout(net, timeout);
329 kfree_rcu(timeout, rcu_head); 329 kfree_rcu(timeout, rcu_head);
330 } else { 330 } else {
331 ret = -EBUSY; 331 ret = -EBUSY;
332 } 332 }
333 return ret; 333 return ret;
334 } 334 }
335 335
336 static int cttimeout_del_timeout(struct net *net, struct sock *ctnl, 336 static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
337 struct sk_buff *skb, 337 struct sk_buff *skb,
338 const struct nlmsghdr *nlh, 338 const struct nlmsghdr *nlh,
339 const struct nlattr * const cda[], 339 const struct nlattr * const cda[],
340 struct netlink_ext_ack *extack) 340 struct netlink_ext_ack *extack)
341 { 341 {
342 struct ctnl_timeout *cur, *tmp; 342 struct ctnl_timeout *cur, *tmp;
343 int ret = -ENOENT; 343 int ret = -ENOENT;
344 char *name; 344 char *name;
345 345
346 if (!cda[CTA_TIMEOUT_NAME]) { 346 if (!cda[CTA_TIMEOUT_NAME]) {
347 list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, 347 list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list,
348 head) 348 head)
349 ctnl_timeout_try_del(net, cur); 349 ctnl_timeout_try_del(net, cur);
350 350
351 return 0; 351 return 0;
352 } 352 }
353 name = nla_data(cda[CTA_TIMEOUT_NAME]); 353 name = nla_data(cda[CTA_TIMEOUT_NAME]);
354 354
355 list_for_each_entry(cur, &net->nfct_timeout_list, head) { 355 list_for_each_entry(cur, &net->nfct_timeout_list, head) {
356 if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) 356 if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
357 continue; 357 continue;
358 358
359 ret = ctnl_timeout_try_del(net, cur); 359 ret = ctnl_timeout_try_del(net, cur);
360 if (ret < 0) 360 if (ret < 0)
361 return ret; 361 return ret;
362 362
363 break; 363 break;
364 } 364 }
365 return ret; 365 return ret;
366 } 366 }
367 367
368 static int cttimeout_default_set(struct net *net, struct sock *ctnl, 368 static int cttimeout_default_set(struct net *net, struct sock *ctnl,
369 struct sk_buff *skb, 369 struct sk_buff *skb,
370 const struct nlmsghdr *nlh, 370 const struct nlmsghdr *nlh,
371 const struct nlattr * const cda[], 371 const struct nlattr * const cda[],
372 struct netlink_ext_ack *extack) 372 struct netlink_ext_ack *extack)
373 { 373 {
374 const struct nf_conntrack_l4proto *l4proto; 374 const struct nf_conntrack_l4proto *l4proto;
375 unsigned int *timeouts;
376 __u16 l3num; 375 __u16 l3num;
377 __u8 l4num; 376 __u8 l4num;
378 int ret; 377 int ret;
379 378
380 if (!cda[CTA_TIMEOUT_L3PROTO] || 379 if (!cda[CTA_TIMEOUT_L3PROTO] ||
381 !cda[CTA_TIMEOUT_L4PROTO] || 380 !cda[CTA_TIMEOUT_L4PROTO] ||
382 !cda[CTA_TIMEOUT_DATA]) 381 !cda[CTA_TIMEOUT_DATA])
383 return -EINVAL; 382 return -EINVAL;
384 383
385 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); 384 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
386 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); 385 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
387 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 386 l4proto = nf_ct_l4proto_find_get(l3num, l4num);
388 387
389 /* This protocol is not supported, skip. */ 388 /* This protocol is not supported, skip. */
390 if (l4proto->l4proto != l4num) { 389 if (l4proto->l4proto != l4num) {
391 ret = -EOPNOTSUPP; 390 ret = -EOPNOTSUPP;
392 goto err; 391 goto err;
393 } 392 }
394 393
395 timeouts = l4proto->get_timeouts(net); 394 ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
396
397 ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
398 cda[CTA_TIMEOUT_DATA]); 395 cda[CTA_TIMEOUT_DATA]);
399 if (ret < 0) 396 if (ret < 0)
400 goto err; 397 goto err;
401 398
402 nf_ct_l4proto_put(l4proto); 399 nf_ct_l4proto_put(l4proto);
403 return 0; 400 return 0;
404 err: 401 err:
405 nf_ct_l4proto_put(l4proto); 402 nf_ct_l4proto_put(l4proto);
406 return ret; 403 return ret;
407 } 404 }
408 405
409 static int 406 static int
410 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, 407 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
411 u32 seq, u32 type, int event, 408 u32 seq, u32 type, int event,
412 const struct nf_conntrack_l4proto *l4proto) 409 const struct nf_conntrack_l4proto *l4proto)
413 { 410 {
414 struct nlmsghdr *nlh; 411 struct nlmsghdr *nlh;
415 struct nfgenmsg *nfmsg; 412 struct nfgenmsg *nfmsg;
416 unsigned int flags = portid ? NLM_F_MULTI : 0; 413 unsigned int flags = portid ? NLM_F_MULTI : 0;
417 414
418 event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); 415 event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
419 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); 416 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
420 if (nlh == NULL) 417 if (nlh == NULL)
421 goto nlmsg_failure; 418 goto nlmsg_failure;
422 419
423 nfmsg = nlmsg_data(nlh); 420 nfmsg = nlmsg_data(nlh);
424 nfmsg->nfgen_family = AF_UNSPEC; 421 nfmsg->nfgen_family = AF_UNSPEC;
425 nfmsg->version = NFNETLINK_V0; 422 nfmsg->version = NFNETLINK_V0;
426 nfmsg->res_id = 0; 423 nfmsg->res_id = 0;
427 424
428 if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || 425 if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
429 nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) 426 nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
430 goto nla_put_failure; 427 goto nla_put_failure;
431 428
432 if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { 429 if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
433 struct nlattr *nest_parms; 430 struct nlattr *nest_parms;
434 unsigned int *timeouts = l4proto->get_timeouts(net);
435 int ret; 431 int ret;
436 432
437 nest_parms = nla_nest_start(skb, 433 nest_parms = nla_nest_start(skb,
438 CTA_TIMEOUT_DATA | NLA_F_NESTED); 434 CTA_TIMEOUT_DATA | NLA_F_NESTED);
439 if (!nest_parms) 435 if (!nest_parms)
440 goto nla_put_failure; 436 goto nla_put_failure;
441 437
442 ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); 438 ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
443 if (ret < 0) 439 if (ret < 0)
444 goto nla_put_failure; 440 goto nla_put_failure;
445 441
446 nla_nest_end(skb, nest_parms); 442 nla_nest_end(skb, nest_parms);
447 } 443 }
448 444
449 nlmsg_end(skb, nlh); 445 nlmsg_end(skb, nlh);
450 return skb->len; 446 return skb->len;
451 447
452 nlmsg_failure: 448 nlmsg_failure:
453 nla_put_failure: 449 nla_put_failure:
454 nlmsg_cancel(skb, nlh); 450 nlmsg_cancel(skb, nlh);
455 return -1; 451 return -1;
456 } 452 }
457 453
458 static int cttimeout_default_get(struct net *net, struct sock *ctnl, 454 static int cttimeout_default_get(struct net *net, struct sock *ctnl,
459 struct sk_buff *skb, 455 struct sk_buff *skb,
460 const struct nlmsghdr *nlh, 456 const struct nlmsghdr *nlh,
461 const struct nlattr * const cda[], 457 const struct nlattr * const cda[],
462 struct netlink_ext_ack *extack) 458 struct netlink_ext_ack *extack)
463 { 459 {
464 const struct nf_conntrack_l4proto *l4proto; 460 const struct nf_conntrack_l4proto *l4proto;
465 struct sk_buff *skb2; 461 struct sk_buff *skb2;
466 int ret, err; 462 int ret, err;
467 __u16 l3num; 463 __u16 l3num;
468 __u8 l4num; 464 __u8 l4num;
469 465
470 if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) 466 if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
471 return -EINVAL; 467 return -EINVAL;
472 468
473 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); 469 l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
474 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); 470 l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
475 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 471 l4proto = nf_ct_l4proto_find_get(l3num, l4num);
476 472
477 /* This protocol is not supported, skip. */ 473 /* This protocol is not supported, skip. */
478 if (l4proto->l4proto != l4num) { 474 if (l4proto->l4proto != l4num) {
479 err = -EOPNOTSUPP; 475 err = -EOPNOTSUPP;
480 goto err; 476 goto err;
481 } 477 }
482 478
483 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 479 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
484 if (skb2 == NULL) { 480 if (skb2 == NULL) {
485 err = -ENOMEM; 481 err = -ENOMEM;
486 goto err; 482 goto err;
487 } 483 }
488 484
489 ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, 485 ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
490 nlh->nlmsg_seq, 486 nlh->nlmsg_seq,
491 NFNL_MSG_TYPE(nlh->nlmsg_type), 487 NFNL_MSG_TYPE(nlh->nlmsg_type),
492 IPCTNL_MSG_TIMEOUT_DEFAULT_SET, 488 IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
493 l4proto); 489 l4proto);
494 if (ret <= 0) { 490 if (ret <= 0) {
495 kfree_skb(skb2); 491 kfree_skb(skb2);
496 err = -ENOMEM; 492 err = -ENOMEM;
497 goto err; 493 goto err;
498 } 494 }
499 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); 495 ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
500 if (ret > 0) 496 if (ret > 0)
501 ret = 0; 497 ret = 0;
502 498
503 /* this avoids a loop in nfnetlink. */ 499 /* this avoids a loop in nfnetlink. */
504 return ret == -EAGAIN ? -ENOBUFS : ret; 500 return ret == -EAGAIN ? -ENOBUFS : ret;
505 err: 501 err:
506 nf_ct_l4proto_put(l4proto); 502 nf_ct_l4proto_put(l4proto);
507 return err; 503 return err;
508 } 504 }
509 505
510 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 506 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
511 static struct ctnl_timeout * 507 static struct ctnl_timeout *
512 ctnl_timeout_find_get(struct net *net, const char *name) 508 ctnl_timeout_find_get(struct net *net, const char *name)
513 { 509 {
514 struct ctnl_timeout *timeout, *matching = NULL; 510 struct ctnl_timeout *timeout, *matching = NULL;
515 511
516 list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { 512 list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
517 if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) 513 if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
518 continue; 514 continue;
519 515
520 if (!try_module_get(THIS_MODULE)) 516 if (!try_module_get(THIS_MODULE))
521 goto err; 517 goto err;
522 518
523 if (!refcount_inc_not_zero(&timeout->refcnt)) { 519 if (!refcount_inc_not_zero(&timeout->refcnt)) {
524 module_put(THIS_MODULE); 520 module_put(THIS_MODULE);
525 goto err; 521 goto err;
526 } 522 }
527 matching = timeout; 523 matching = timeout;
528 break; 524 break;
529 } 525 }
530 err: 526 err:
531 return matching; 527 return matching;
532 } 528 }
533 529
534 static void ctnl_timeout_put(struct ctnl_timeout *timeout) 530 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
535 { 531 {
536 if (refcount_dec_and_test(&timeout->refcnt)) 532 if (refcount_dec_and_test(&timeout->refcnt))
537 kfree_rcu(timeout, rcu_head); 533 kfree_rcu(timeout, rcu_head);
538 534
539 module_put(THIS_MODULE); 535 module_put(THIS_MODULE);
540 } 536 }
541 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ 537 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
542 538
543 static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { 539 static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
544 [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, 540 [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout,
545 .attr_count = CTA_TIMEOUT_MAX, 541 .attr_count = CTA_TIMEOUT_MAX,
546 .policy = cttimeout_nla_policy }, 542 .policy = cttimeout_nla_policy },
547 [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, 543 [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout,
548 .attr_count = CTA_TIMEOUT_MAX, 544 .attr_count = CTA_TIMEOUT_MAX,
549 .policy = cttimeout_nla_policy }, 545 .policy = cttimeout_nla_policy },
550 [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, 546 [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout,
551 .attr_count = CTA_TIMEOUT_MAX, 547 .attr_count = CTA_TIMEOUT_MAX,
552 .policy = cttimeout_nla_policy }, 548 .policy = cttimeout_nla_policy },
553 [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, 549 [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
554 .attr_count = CTA_TIMEOUT_MAX, 550 .attr_count = CTA_TIMEOUT_MAX,
555 .policy = cttimeout_nla_policy }, 551 .policy = cttimeout_nla_policy },
556 [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, 552 [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
557 .attr_count = CTA_TIMEOUT_MAX, 553 .attr_count = CTA_TIMEOUT_MAX,
558 .policy = cttimeout_nla_policy }, 554 .policy = cttimeout_nla_policy },
559 }; 555 };
560 556
561 static const struct nfnetlink_subsystem cttimeout_subsys = { 557 static const struct nfnetlink_subsystem cttimeout_subsys = {
562 .name = "conntrack_timeout", 558 .name = "conntrack_timeout",
563 .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, 559 .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT,
564 .cb_count = IPCTNL_MSG_TIMEOUT_MAX, 560 .cb_count = IPCTNL_MSG_TIMEOUT_MAX,
565 .cb = cttimeout_cb, 561 .cb = cttimeout_cb,
566 }; 562 };
567 563
568 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); 564 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
569 565
570 static int __net_init cttimeout_net_init(struct net *net) 566 static int __net_init cttimeout_net_init(struct net *net)
571 { 567 {
572 INIT_LIST_HEAD(&net->nfct_timeout_list); 568 INIT_LIST_HEAD(&net->nfct_timeout_list);
573 569
574 return 0; 570 return 0;
575 } 571 }
576 572
577 static void __net_exit cttimeout_net_exit(struct net *net) 573 static void __net_exit cttimeout_net_exit(struct net *net)
578 { 574 {
579 struct ctnl_timeout *cur, *tmp; 575 struct ctnl_timeout *cur, *tmp;
580 576
581 nf_ct_unconfirmed_destroy(net); 577 nf_ct_unconfirmed_destroy(net);
582 ctnl_untimeout(net, NULL); 578 ctnl_untimeout(net, NULL);
583 579
584 list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { 580 list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
585 list_del_rcu(&cur->head); 581 list_del_rcu(&cur->head);
586 nf_ct_l4proto_put(cur->l4proto); 582 nf_ct_l4proto_put(cur->l4proto);
587 583
588 if (refcount_dec_and_test(&cur->refcnt)) 584 if (refcount_dec_and_test(&cur->refcnt))
589 kfree_rcu(cur, rcu_head); 585 kfree_rcu(cur, rcu_head);
590 } 586 }
591 } 587 }
592 588
593 static struct pernet_operations cttimeout_ops = { 589 static struct pernet_operations cttimeout_ops = {
594 .init = cttimeout_net_init, 590 .init = cttimeout_net_init,
595 .exit = cttimeout_net_exit, 591 .exit = cttimeout_net_exit,
596 }; 592 };
597 593
598 static int __init cttimeout_init(void) 594 static int __init cttimeout_init(void)
599 { 595 {
600 int ret; 596 int ret;
601 597
602 ret = register_pernet_subsys(&cttimeout_ops); 598 ret = register_pernet_subsys(&cttimeout_ops);
603 if (ret < 0) 599 if (ret < 0)
604 return ret; 600 return ret;
605 601
606 ret = nfnetlink_subsys_register(&cttimeout_subsys); 602 ret = nfnetlink_subsys_register(&cttimeout_subsys);
607 if (ret < 0) { 603 if (ret < 0) {
608 pr_err("cttimeout_init: cannot register cttimeout with " 604 pr_err("cttimeout_init: cannot register cttimeout with "
609 "nfnetlink.\n"); 605 "nfnetlink.\n");
610 goto err_out; 606 goto err_out;
611 } 607 }
612 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 608 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
613 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get); 609 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, ctnl_timeout_find_get);
614 RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put); 610 RCU_INIT_POINTER(nf_ct_timeout_put_hook, ctnl_timeout_put);
615 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ 611 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
616 return 0; 612 return 0;
617 613
618 err_out: 614 err_out:
619 unregister_pernet_subsys(&cttimeout_ops); 615 unregister_pernet_subsys(&cttimeout_ops);
620 return ret; 616 return ret;
621 } 617 }
622 618
623 static void __exit cttimeout_exit(void) 619 static void __exit cttimeout_exit(void)
624 { 620 {
625 nfnetlink_subsys_unregister(&cttimeout_subsys); 621 nfnetlink_subsys_unregister(&cttimeout_subsys);
626 622
627 unregister_pernet_subsys(&cttimeout_ops); 623 unregister_pernet_subsys(&cttimeout_ops);
628 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 624 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
629 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); 625 RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
630 RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); 626 RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
631 synchronize_rcu(); 627 synchronize_rcu();
632 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ 628 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
633 } 629 }
634 630
635 module_init(cttimeout_init); 631 module_init(cttimeout_init);
636 module_exit(cttimeout_exit); 632 module_exit(cttimeout_exit);
637 633