Commit 9e337b0fb3baa3c22490365b1bdee6f4741413d4
1 parent
96c25c81ec
Exists in
master
and in
7 other branches
net: annotate inet_timewait_sock bitfields
The use of bitfields here would lead to false positive warnings with kmemcheck. Silence them. (Additionally, one erroneous comment related to the bitfield was also fixed.) Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Showing 2 changed files with 7 additions and 1 deletions Inline Diff
include/net/inet_timewait_sock.h
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Definitions for a generic INET TIMEWAIT sock | 6 | * Definitions for a generic INET TIMEWAIT sock |
7 | * | 7 | * |
8 | * From code originally in net/tcp.h | 8 | * From code originally in net/tcp.h |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License | 11 | * modify it under the terms of the GNU General Public License |
12 | * as published by the Free Software Foundation; either version | 12 | * as published by the Free Software Foundation; either version |
13 | * 2 of the License, or (at your option) any later version. | 13 | * 2 of the License, or (at your option) any later version. |
14 | */ | 14 | */ |
15 | #ifndef _INET_TIMEWAIT_SOCK_ | 15 | #ifndef _INET_TIMEWAIT_SOCK_ |
16 | #define _INET_TIMEWAIT_SOCK_ | 16 | #define _INET_TIMEWAIT_SOCK_ |
17 | 17 | ||
18 | 18 | ||
19 | #include <linux/kmemcheck.h> | ||
19 | #include <linux/list.h> | 20 | #include <linux/list.h> |
20 | #include <linux/module.h> | 21 | #include <linux/module.h> |
21 | #include <linux/timer.h> | 22 | #include <linux/timer.h> |
22 | #include <linux/types.h> | 23 | #include <linux/types.h> |
23 | #include <linux/workqueue.h> | 24 | #include <linux/workqueue.h> |
24 | 25 | ||
25 | #include <net/inet_sock.h> | 26 | #include <net/inet_sock.h> |
26 | #include <net/sock.h> | 27 | #include <net/sock.h> |
27 | #include <net/tcp_states.h> | 28 | #include <net/tcp_states.h> |
28 | #include <net/timewait_sock.h> | 29 | #include <net/timewait_sock.h> |
29 | 30 | ||
30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
31 | 32 | ||
32 | struct inet_hashinfo; | 33 | struct inet_hashinfo; |
33 | 34 | ||
34 | #define INET_TWDR_RECYCLE_SLOTS_LOG 5 | 35 | #define INET_TWDR_RECYCLE_SLOTS_LOG 5 |
35 | #define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) | 36 | #define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) |
36 | 37 | ||
37 | /* | 38 | /* |
38 | * If time > 4sec, it is "slow" path, no recycling is required, | 39 | * If time > 4sec, it is "slow" path, no recycling is required, |
39 | * so that we select tick to get range about 4 seconds. | 40 | * so that we select tick to get range about 4 seconds. |
40 | */ | 41 | */ |
41 | #if HZ <= 16 || HZ > 4096 | 42 | #if HZ <= 16 || HZ > 4096 |
42 | # error Unsupported: HZ <= 16 or HZ > 4096 | 43 | # error Unsupported: HZ <= 16 or HZ > 4096 |
43 | #elif HZ <= 32 | 44 | #elif HZ <= 32 |
44 | # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 45 | # define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
45 | #elif HZ <= 64 | 46 | #elif HZ <= 64 |
46 | # define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 47 | # define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
47 | #elif HZ <= 128 | 48 | #elif HZ <= 128 |
48 | # define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 49 | # define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
49 | #elif HZ <= 256 | 50 | #elif HZ <= 256 |
50 | # define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 51 | # define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
51 | #elif HZ <= 512 | 52 | #elif HZ <= 512 |
52 | # define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 53 | # define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
53 | #elif HZ <= 1024 | 54 | #elif HZ <= 1024 |
54 | # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 55 | # define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
55 | #elif HZ <= 2048 | 56 | #elif HZ <= 2048 |
56 | # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 57 | # define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
57 | #else | 58 | #else |
58 | # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) | 59 | # define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) |
59 | #endif | 60 | #endif |
60 | 61 | ||
61 | /* TIME_WAIT reaping mechanism. */ | 62 | /* TIME_WAIT reaping mechanism. */ |
62 | #define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ | 63 | #define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ |
63 | 64 | ||
64 | #define INET_TWDR_TWKILL_QUOTA 100 | 65 | #define INET_TWDR_TWKILL_QUOTA 100 |
65 | 66 | ||
66 | struct inet_timewait_death_row { | 67 | struct inet_timewait_death_row { |
67 | /* Short-time timewait calendar */ | 68 | /* Short-time timewait calendar */ |
68 | int twcal_hand; | 69 | int twcal_hand; |
69 | unsigned long twcal_jiffie; | 70 | unsigned long twcal_jiffie; |
70 | struct timer_list twcal_timer; | 71 | struct timer_list twcal_timer; |
71 | struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; | 72 | struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; |
72 | 73 | ||
73 | spinlock_t death_lock; | 74 | spinlock_t death_lock; |
74 | int tw_count; | 75 | int tw_count; |
75 | int period; | 76 | int period; |
76 | u32 thread_slots; | 77 | u32 thread_slots; |
77 | struct work_struct twkill_work; | 78 | struct work_struct twkill_work; |
78 | struct timer_list tw_timer; | 79 | struct timer_list tw_timer; |
79 | int slot; | 80 | int slot; |
80 | struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; | 81 | struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; |
81 | struct inet_hashinfo *hashinfo; | 82 | struct inet_hashinfo *hashinfo; |
82 | int sysctl_tw_recycle; | 83 | int sysctl_tw_recycle; |
83 | int sysctl_max_tw_buckets; | 84 | int sysctl_max_tw_buckets; |
84 | }; | 85 | }; |
85 | 86 | ||
86 | extern void inet_twdr_hangman(unsigned long data); | 87 | extern void inet_twdr_hangman(unsigned long data); |
87 | extern void inet_twdr_twkill_work(struct work_struct *work); | 88 | extern void inet_twdr_twkill_work(struct work_struct *work); |
88 | extern void inet_twdr_twcal_tick(unsigned long data); | 89 | extern void inet_twdr_twcal_tick(unsigned long data); |
89 | 90 | ||
90 | #if (BITS_PER_LONG == 64) | 91 | #if (BITS_PER_LONG == 64) |
91 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 | 92 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 |
92 | #else | 93 | #else |
93 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 | 94 | #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 |
94 | #endif | 95 | #endif |
95 | 96 | ||
96 | struct inet_bind_bucket; | 97 | struct inet_bind_bucket; |
97 | 98 | ||
98 | /* | 99 | /* |
99 | * This is a TIME_WAIT sock. It works around the memory consumption | 100 | * This is a TIME_WAIT sock. It works around the memory consumption |
100 | * problems of sockets in such a state on heavily loaded servers, but | 101 | * problems of sockets in such a state on heavily loaded servers, but |
101 | * without violating the protocol specification. | 102 | * without violating the protocol specification. |
102 | */ | 103 | */ |
103 | struct inet_timewait_sock { | 104 | struct inet_timewait_sock { |
104 | /* | 105 | /* |
105 | * Now struct sock also uses sock_common, so please just | 106 | * Now struct sock also uses sock_common, so please just |
106 | * don't add nothing before this first member (__tw_common) --acme | 107 | * don't add nothing before this first member (__tw_common) --acme |
107 | */ | 108 | */ |
108 | struct sock_common __tw_common; | 109 | struct sock_common __tw_common; |
109 | #define tw_family __tw_common.skc_family | 110 | #define tw_family __tw_common.skc_family |
110 | #define tw_state __tw_common.skc_state | 111 | #define tw_state __tw_common.skc_state |
111 | #define tw_reuse __tw_common.skc_reuse | 112 | #define tw_reuse __tw_common.skc_reuse |
112 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if | 113 | #define tw_bound_dev_if __tw_common.skc_bound_dev_if |
113 | #define tw_node __tw_common.skc_nulls_node | 114 | #define tw_node __tw_common.skc_nulls_node |
114 | #define tw_bind_node __tw_common.skc_bind_node | 115 | #define tw_bind_node __tw_common.skc_bind_node |
115 | #define tw_refcnt __tw_common.skc_refcnt | 116 | #define tw_refcnt __tw_common.skc_refcnt |
116 | #define tw_hash __tw_common.skc_hash | 117 | #define tw_hash __tw_common.skc_hash |
117 | #define tw_prot __tw_common.skc_prot | 118 | #define tw_prot __tw_common.skc_prot |
118 | #define tw_net __tw_common.skc_net | 119 | #define tw_net __tw_common.skc_net |
119 | int tw_timeout; | 120 | int tw_timeout; |
120 | volatile unsigned char tw_substate; | 121 | volatile unsigned char tw_substate; |
121 | /* 3 bits hole, try to pack */ | 122 | /* 3 bits hole, try to pack */ |
122 | unsigned char tw_rcv_wscale; | 123 | unsigned char tw_rcv_wscale; |
123 | /* Socket demultiplex comparisons on incoming packets. */ | 124 | /* Socket demultiplex comparisons on incoming packets. */ |
124 | /* these five are in inet_sock */ | 125 | /* these five are in inet_sock */ |
125 | __be16 tw_sport; | 126 | __be16 tw_sport; |
126 | __be32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); | 127 | __be32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); |
127 | __be32 tw_rcv_saddr; | 128 | __be32 tw_rcv_saddr; |
128 | __be16 tw_dport; | 129 | __be16 tw_dport; |
129 | __u16 tw_num; | 130 | __u16 tw_num; |
131 | kmemcheck_bitfield_begin(flags); | ||
130 | /* And these are ours. */ | 132 | /* And these are ours. */ |
131 | __u8 tw_ipv6only:1, | 133 | __u8 tw_ipv6only:1, |
132 | tw_transparent:1; | 134 | tw_transparent:1; |
133 | /* 15 bits hole, try to pack */ | 135 | /* 14 bits hole, try to pack */ |
136 | kmemcheck_bitfield_end(flags); | ||
134 | __u16 tw_ipv6_offset; | 137 | __u16 tw_ipv6_offset; |
135 | unsigned long tw_ttd; | 138 | unsigned long tw_ttd; |
136 | struct inet_bind_bucket *tw_tb; | 139 | struct inet_bind_bucket *tw_tb; |
137 | struct hlist_node tw_death_node; | 140 | struct hlist_node tw_death_node; |
138 | }; | 141 | }; |
139 | 142 | ||
140 | static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, | 143 | static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, |
141 | struct hlist_nulls_head *list) | 144 | struct hlist_nulls_head *list) |
142 | { | 145 | { |
143 | hlist_nulls_add_head_rcu(&tw->tw_node, list); | 146 | hlist_nulls_add_head_rcu(&tw->tw_node, list); |
144 | } | 147 | } |
145 | 148 | ||
146 | static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, | 149 | static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, |
147 | struct hlist_head *list) | 150 | struct hlist_head *list) |
148 | { | 151 | { |
149 | hlist_add_head(&tw->tw_bind_node, list); | 152 | hlist_add_head(&tw->tw_bind_node, list); |
150 | } | 153 | } |
151 | 154 | ||
152 | static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) | 155 | static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) |
153 | { | 156 | { |
154 | return !hlist_unhashed(&tw->tw_death_node); | 157 | return !hlist_unhashed(&tw->tw_death_node); |
155 | } | 158 | } |
156 | 159 | ||
157 | static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) | 160 | static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) |
158 | { | 161 | { |
159 | tw->tw_death_node.pprev = NULL; | 162 | tw->tw_death_node.pprev = NULL; |
160 | } | 163 | } |
161 | 164 | ||
162 | static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | 165 | static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) |
163 | { | 166 | { |
164 | __hlist_del(&tw->tw_death_node); | 167 | __hlist_del(&tw->tw_death_node); |
165 | inet_twsk_dead_node_init(tw); | 168 | inet_twsk_dead_node_init(tw); |
166 | } | 169 | } |
167 | 170 | ||
168 | static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) | 171 | static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) |
169 | { | 172 | { |
170 | if (inet_twsk_dead_hashed(tw)) { | 173 | if (inet_twsk_dead_hashed(tw)) { |
171 | __inet_twsk_del_dead_node(tw); | 174 | __inet_twsk_del_dead_node(tw); |
172 | return 1; | 175 | return 1; |
173 | } | 176 | } |
174 | return 0; | 177 | return 0; |
175 | } | 178 | } |
176 | 179 | ||
177 | #define inet_twsk_for_each(tw, node, head) \ | 180 | #define inet_twsk_for_each(tw, node, head) \ |
178 | hlist_nulls_for_each_entry(tw, node, head, tw_node) | 181 | hlist_nulls_for_each_entry(tw, node, head, tw_node) |
179 | 182 | ||
180 | #define inet_twsk_for_each_inmate(tw, node, jail) \ | 183 | #define inet_twsk_for_each_inmate(tw, node, jail) \ |
181 | hlist_for_each_entry(tw, node, jail, tw_death_node) | 184 | hlist_for_each_entry(tw, node, jail, tw_death_node) |
182 | 185 | ||
183 | #define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ | 186 | #define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ |
184 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) | 187 | hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) |
185 | 188 | ||
186 | static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) | 189 | static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) |
187 | { | 190 | { |
188 | return (struct inet_timewait_sock *)sk; | 191 | return (struct inet_timewait_sock *)sk; |
189 | } | 192 | } |
190 | 193 | ||
191 | static inline __be32 inet_rcv_saddr(const struct sock *sk) | 194 | static inline __be32 inet_rcv_saddr(const struct sock *sk) |
192 | { | 195 | { |
193 | return likely(sk->sk_state != TCP_TIME_WAIT) ? | 196 | return likely(sk->sk_state != TCP_TIME_WAIT) ? |
194 | inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; | 197 | inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; |
195 | } | 198 | } |
196 | 199 | ||
197 | extern void inet_twsk_put(struct inet_timewait_sock *tw); | 200 | extern void inet_twsk_put(struct inet_timewait_sock *tw); |
198 | 201 | ||
199 | extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, | 202 | extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, |
200 | const int state); | 203 | const int state); |
201 | 204 | ||
202 | extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, | 205 | extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, |
203 | struct sock *sk, | 206 | struct sock *sk, |
204 | struct inet_hashinfo *hashinfo); | 207 | struct inet_hashinfo *hashinfo); |
205 | 208 | ||
206 | extern void inet_twsk_schedule(struct inet_timewait_sock *tw, | 209 | extern void inet_twsk_schedule(struct inet_timewait_sock *tw, |
207 | struct inet_timewait_death_row *twdr, | 210 | struct inet_timewait_death_row *twdr, |
208 | const int timeo, const int timewait_len); | 211 | const int timeo, const int timewait_len); |
209 | extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, | 212 | extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, |
210 | struct inet_timewait_death_row *twdr); | 213 | struct inet_timewait_death_row *twdr); |
211 | 214 | ||
212 | extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | 215 | extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, |
213 | struct inet_timewait_death_row *twdr, int family); | 216 | struct inet_timewait_death_row *twdr, int family); |
214 | 217 | ||
215 | static inline | 218 | static inline |
216 | struct net *twsk_net(const struct inet_timewait_sock *twsk) | 219 | struct net *twsk_net(const struct inet_timewait_sock *twsk) |
217 | { | 220 | { |
218 | #ifdef CONFIG_NET_NS | 221 | #ifdef CONFIG_NET_NS |
219 | return twsk->tw_net; | 222 | return twsk->tw_net; |
220 | #else | 223 | #else |
221 | return &init_net; | 224 | return &init_net; |
222 | #endif | 225 | #endif |
223 | } | 226 | } |
224 | 227 | ||
225 | static inline | 228 | static inline |
226 | void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net) | 229 | void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net) |
227 | { | 230 | { |
228 | #ifdef CONFIG_NET_NS | 231 | #ifdef CONFIG_NET_NS |
229 | twsk->tw_net = net; | 232 | twsk->tw_net = net; |
230 | #endif | 233 | #endif |
231 | } | 234 | } |
232 | #endif /* _INET_TIMEWAIT_SOCK_ */ | 235 | #endif /* _INET_TIMEWAIT_SOCK_ */ |
233 | 236 |
net/ipv4/inet_timewait_sock.c
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Generic TIME_WAIT sockets functions | 6 | * Generic TIME_WAIT sockets functions |
7 | * | 7 | * |
8 | * From code orinally in TCP | 8 | * From code orinally in TCP |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/kmemcheck.h> | ||
12 | #include <net/inet_hashtables.h> | 13 | #include <net/inet_hashtables.h> |
13 | #include <net/inet_timewait_sock.h> | 14 | #include <net/inet_timewait_sock.h> |
14 | #include <net/ip.h> | 15 | #include <net/ip.h> |
15 | 16 | ||
16 | /* Must be called with locally disabled BHs. */ | 17 | /* Must be called with locally disabled BHs. */ |
17 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, | 18 | static void __inet_twsk_kill(struct inet_timewait_sock *tw, |
18 | struct inet_hashinfo *hashinfo) | 19 | struct inet_hashinfo *hashinfo) |
19 | { | 20 | { |
20 | struct inet_bind_hashbucket *bhead; | 21 | struct inet_bind_hashbucket *bhead; |
21 | struct inet_bind_bucket *tb; | 22 | struct inet_bind_bucket *tb; |
22 | /* Unlink from established hashes. */ | 23 | /* Unlink from established hashes. */ |
23 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); | 24 | spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); |
24 | 25 | ||
25 | spin_lock(lock); | 26 | spin_lock(lock); |
26 | if (hlist_nulls_unhashed(&tw->tw_node)) { | 27 | if (hlist_nulls_unhashed(&tw->tw_node)) { |
27 | spin_unlock(lock); | 28 | spin_unlock(lock); |
28 | return; | 29 | return; |
29 | } | 30 | } |
30 | hlist_nulls_del_rcu(&tw->tw_node); | 31 | hlist_nulls_del_rcu(&tw->tw_node); |
31 | sk_nulls_node_init(&tw->tw_node); | 32 | sk_nulls_node_init(&tw->tw_node); |
32 | spin_unlock(lock); | 33 | spin_unlock(lock); |
33 | 34 | ||
34 | /* Disassociate with bind bucket. */ | 35 | /* Disassociate with bind bucket. */ |
35 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, | 36 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, |
36 | hashinfo->bhash_size)]; | 37 | hashinfo->bhash_size)]; |
37 | spin_lock(&bhead->lock); | 38 | spin_lock(&bhead->lock); |
38 | tb = tw->tw_tb; | 39 | tb = tw->tw_tb; |
39 | __hlist_del(&tw->tw_bind_node); | 40 | __hlist_del(&tw->tw_bind_node); |
40 | tw->tw_tb = NULL; | 41 | tw->tw_tb = NULL; |
41 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | 42 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); |
42 | spin_unlock(&bhead->lock); | 43 | spin_unlock(&bhead->lock); |
43 | #ifdef SOCK_REFCNT_DEBUG | 44 | #ifdef SOCK_REFCNT_DEBUG |
44 | if (atomic_read(&tw->tw_refcnt) != 1) { | 45 | if (atomic_read(&tw->tw_refcnt) != 1) { |
45 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", | 46 | printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", |
46 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); | 47 | tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); |
47 | } | 48 | } |
48 | #endif | 49 | #endif |
49 | inet_twsk_put(tw); | 50 | inet_twsk_put(tw); |
50 | } | 51 | } |
51 | 52 | ||
52 | void inet_twsk_put(struct inet_timewait_sock *tw) | 53 | void inet_twsk_put(struct inet_timewait_sock *tw) |
53 | { | 54 | { |
54 | if (atomic_dec_and_test(&tw->tw_refcnt)) { | 55 | if (atomic_dec_and_test(&tw->tw_refcnt)) { |
55 | struct module *owner = tw->tw_prot->owner; | 56 | struct module *owner = tw->tw_prot->owner; |
56 | twsk_destructor((struct sock *)tw); | 57 | twsk_destructor((struct sock *)tw); |
57 | #ifdef SOCK_REFCNT_DEBUG | 58 | #ifdef SOCK_REFCNT_DEBUG |
58 | printk(KERN_DEBUG "%s timewait_sock %p released\n", | 59 | printk(KERN_DEBUG "%s timewait_sock %p released\n", |
59 | tw->tw_prot->name, tw); | 60 | tw->tw_prot->name, tw); |
60 | #endif | 61 | #endif |
61 | release_net(twsk_net(tw)); | 62 | release_net(twsk_net(tw)); |
62 | kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); | 63 | kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); |
63 | module_put(owner); | 64 | module_put(owner); |
64 | } | 65 | } |
65 | } | 66 | } |
66 | EXPORT_SYMBOL_GPL(inet_twsk_put); | 67 | EXPORT_SYMBOL_GPL(inet_twsk_put); |
67 | 68 | ||
68 | /* | 69 | /* |
69 | * Enter the time wait state. This is called with locally disabled BH. | 70 | * Enter the time wait state. This is called with locally disabled BH. |
70 | * Essentially we whip up a timewait bucket, copy the relevant info into it | 71 | * Essentially we whip up a timewait bucket, copy the relevant info into it |
71 | * from the SK, and mess with hash chains and list linkage. | 72 | * from the SK, and mess with hash chains and list linkage. |
72 | */ | 73 | */ |
73 | void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | 74 | void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, |
74 | struct inet_hashinfo *hashinfo) | 75 | struct inet_hashinfo *hashinfo) |
75 | { | 76 | { |
76 | const struct inet_sock *inet = inet_sk(sk); | 77 | const struct inet_sock *inet = inet_sk(sk); |
77 | const struct inet_connection_sock *icsk = inet_csk(sk); | 78 | const struct inet_connection_sock *icsk = inet_csk(sk); |
78 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); | 79 | struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); |
79 | spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 80 | spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
80 | struct inet_bind_hashbucket *bhead; | 81 | struct inet_bind_hashbucket *bhead; |
81 | /* Step 1: Put TW into bind hash. Original socket stays there too. | 82 | /* Step 1: Put TW into bind hash. Original socket stays there too. |
82 | Note, that any socket with inet->num != 0 MUST be bound in | 83 | Note, that any socket with inet->num != 0 MUST be bound in |
83 | binding cache, even if it is closed. | 84 | binding cache, even if it is closed. |
84 | */ | 85 | */ |
85 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, | 86 | bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, |
86 | hashinfo->bhash_size)]; | 87 | hashinfo->bhash_size)]; |
87 | spin_lock(&bhead->lock); | 88 | spin_lock(&bhead->lock); |
88 | tw->tw_tb = icsk->icsk_bind_hash; | 89 | tw->tw_tb = icsk->icsk_bind_hash; |
89 | WARN_ON(!icsk->icsk_bind_hash); | 90 | WARN_ON(!icsk->icsk_bind_hash); |
90 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); | 91 | inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); |
91 | spin_unlock(&bhead->lock); | 92 | spin_unlock(&bhead->lock); |
92 | 93 | ||
93 | spin_lock(lock); | 94 | spin_lock(lock); |
94 | 95 | ||
95 | /* | 96 | /* |
96 | * Step 2: Hash TW into TIMEWAIT chain. | 97 | * Step 2: Hash TW into TIMEWAIT chain. |
97 | * Should be done before removing sk from established chain | 98 | * Should be done before removing sk from established chain |
98 | * because readers are lockless and search established first. | 99 | * because readers are lockless and search established first. |
99 | */ | 100 | */ |
100 | atomic_inc(&tw->tw_refcnt); | 101 | atomic_inc(&tw->tw_refcnt); |
101 | inet_twsk_add_node_rcu(tw, &ehead->twchain); | 102 | inet_twsk_add_node_rcu(tw, &ehead->twchain); |
102 | 103 | ||
103 | /* Step 3: Remove SK from established hash. */ | 104 | /* Step 3: Remove SK from established hash. */ |
104 | if (__sk_nulls_del_node_init_rcu(sk)) | 105 | if (__sk_nulls_del_node_init_rcu(sk)) |
105 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 106 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
106 | 107 | ||
107 | spin_unlock(lock); | 108 | spin_unlock(lock); |
108 | } | 109 | } |
109 | 110 | ||
110 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); | 111 | EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); |
111 | 112 | ||
112 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) | 113 | struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) |
113 | { | 114 | { |
114 | struct inet_timewait_sock *tw = | 115 | struct inet_timewait_sock *tw = |
115 | kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, | 116 | kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, |
116 | GFP_ATOMIC); | 117 | GFP_ATOMIC); |
117 | if (tw != NULL) { | 118 | if (tw != NULL) { |
118 | const struct inet_sock *inet = inet_sk(sk); | 119 | const struct inet_sock *inet = inet_sk(sk); |
120 | |||
121 | kmemcheck_annotate_bitfield(tw, flags); | ||
119 | 122 | ||
120 | /* Give us an identity. */ | 123 | /* Give us an identity. */ |
121 | tw->tw_daddr = inet->daddr; | 124 | tw->tw_daddr = inet->daddr; |
122 | tw->tw_rcv_saddr = inet->rcv_saddr; | 125 | tw->tw_rcv_saddr = inet->rcv_saddr; |
123 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; | 126 | tw->tw_bound_dev_if = sk->sk_bound_dev_if; |
124 | tw->tw_num = inet->num; | 127 | tw->tw_num = inet->num; |
125 | tw->tw_state = TCP_TIME_WAIT; | 128 | tw->tw_state = TCP_TIME_WAIT; |
126 | tw->tw_substate = state; | 129 | tw->tw_substate = state; |
127 | tw->tw_sport = inet->sport; | 130 | tw->tw_sport = inet->sport; |
128 | tw->tw_dport = inet->dport; | 131 | tw->tw_dport = inet->dport; |
129 | tw->tw_family = sk->sk_family; | 132 | tw->tw_family = sk->sk_family; |
130 | tw->tw_reuse = sk->sk_reuse; | 133 | tw->tw_reuse = sk->sk_reuse; |
131 | tw->tw_hash = sk->sk_hash; | 134 | tw->tw_hash = sk->sk_hash; |
132 | tw->tw_ipv6only = 0; | 135 | tw->tw_ipv6only = 0; |
133 | tw->tw_transparent = inet->transparent; | 136 | tw->tw_transparent = inet->transparent; |
134 | tw->tw_prot = sk->sk_prot_creator; | 137 | tw->tw_prot = sk->sk_prot_creator; |
135 | twsk_net_set(tw, hold_net(sock_net(sk))); | 138 | twsk_net_set(tw, hold_net(sock_net(sk))); |
136 | atomic_set(&tw->tw_refcnt, 1); | 139 | atomic_set(&tw->tw_refcnt, 1); |
137 | inet_twsk_dead_node_init(tw); | 140 | inet_twsk_dead_node_init(tw); |
138 | __module_get(tw->tw_prot->owner); | 141 | __module_get(tw->tw_prot->owner); |
139 | } | 142 | } |
140 | 143 | ||
141 | return tw; | 144 | return tw; |
142 | } | 145 | } |
143 | 146 | ||
144 | EXPORT_SYMBOL_GPL(inet_twsk_alloc); | 147 | EXPORT_SYMBOL_GPL(inet_twsk_alloc); |
145 | 148 | ||
146 | /* Returns non-zero if quota exceeded. */ | 149 | /* Returns non-zero if quota exceeded. */ |
147 | static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, | 150 | static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, |
148 | const int slot) | 151 | const int slot) |
149 | { | 152 | { |
150 | struct inet_timewait_sock *tw; | 153 | struct inet_timewait_sock *tw; |
151 | struct hlist_node *node; | 154 | struct hlist_node *node; |
152 | unsigned int killed; | 155 | unsigned int killed; |
153 | int ret; | 156 | int ret; |
154 | 157 | ||
155 | /* NOTE: compare this to previous version where lock | 158 | /* NOTE: compare this to previous version where lock |
156 | * was released after detaching chain. It was racy, | 159 | * was released after detaching chain. It was racy, |
157 | * because tw buckets are scheduled in not serialized context | 160 | * because tw buckets are scheduled in not serialized context |
158 | * in 2.3 (with netfilter), and with softnet it is common, because | 161 | * in 2.3 (with netfilter), and with softnet it is common, because |
159 | * soft irqs are not sequenced. | 162 | * soft irqs are not sequenced. |
160 | */ | 163 | */ |
161 | killed = 0; | 164 | killed = 0; |
162 | ret = 0; | 165 | ret = 0; |
163 | rescan: | 166 | rescan: |
164 | inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { | 167 | inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { |
165 | __inet_twsk_del_dead_node(tw); | 168 | __inet_twsk_del_dead_node(tw); |
166 | spin_unlock(&twdr->death_lock); | 169 | spin_unlock(&twdr->death_lock); |
167 | __inet_twsk_kill(tw, twdr->hashinfo); | 170 | __inet_twsk_kill(tw, twdr->hashinfo); |
168 | #ifdef CONFIG_NET_NS | 171 | #ifdef CONFIG_NET_NS |
169 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); | 172 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); |
170 | #endif | 173 | #endif |
171 | inet_twsk_put(tw); | 174 | inet_twsk_put(tw); |
172 | killed++; | 175 | killed++; |
173 | spin_lock(&twdr->death_lock); | 176 | spin_lock(&twdr->death_lock); |
174 | if (killed > INET_TWDR_TWKILL_QUOTA) { | 177 | if (killed > INET_TWDR_TWKILL_QUOTA) { |
175 | ret = 1; | 178 | ret = 1; |
176 | break; | 179 | break; |
177 | } | 180 | } |
178 | 181 | ||
179 | /* While we dropped twdr->death_lock, another cpu may have | 182 | /* While we dropped twdr->death_lock, another cpu may have |
180 | * killed off the next TW bucket in the list, therefore | 183 | * killed off the next TW bucket in the list, therefore |
181 | * do a fresh re-read of the hlist head node with the | 184 | * do a fresh re-read of the hlist head node with the |
182 | * lock reacquired. We still use the hlist traversal | 185 | * lock reacquired. We still use the hlist traversal |
183 | * macro in order to get the prefetches. | 186 | * macro in order to get the prefetches. |
184 | */ | 187 | */ |
185 | goto rescan; | 188 | goto rescan; |
186 | } | 189 | } |
187 | 190 | ||
188 | twdr->tw_count -= killed; | 191 | twdr->tw_count -= killed; |
189 | #ifndef CONFIG_NET_NS | 192 | #ifndef CONFIG_NET_NS |
190 | NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); | 193 | NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); |
191 | #endif | 194 | #endif |
192 | return ret; | 195 | return ret; |
193 | } | 196 | } |
194 | 197 | ||
195 | void inet_twdr_hangman(unsigned long data) | 198 | void inet_twdr_hangman(unsigned long data) |
196 | { | 199 | { |
197 | struct inet_timewait_death_row *twdr; | 200 | struct inet_timewait_death_row *twdr; |
198 | int unsigned need_timer; | 201 | int unsigned need_timer; |
199 | 202 | ||
200 | twdr = (struct inet_timewait_death_row *)data; | 203 | twdr = (struct inet_timewait_death_row *)data; |
201 | spin_lock(&twdr->death_lock); | 204 | spin_lock(&twdr->death_lock); |
202 | 205 | ||
203 | if (twdr->tw_count == 0) | 206 | if (twdr->tw_count == 0) |
204 | goto out; | 207 | goto out; |
205 | 208 | ||
206 | need_timer = 0; | 209 | need_timer = 0; |
207 | if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { | 210 | if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { |
208 | twdr->thread_slots |= (1 << twdr->slot); | 211 | twdr->thread_slots |= (1 << twdr->slot); |
209 | schedule_work(&twdr->twkill_work); | 212 | schedule_work(&twdr->twkill_work); |
210 | need_timer = 1; | 213 | need_timer = 1; |
211 | } else { | 214 | } else { |
212 | /* We purged the entire slot, anything left? */ | 215 | /* We purged the entire slot, anything left? */ |
213 | if (twdr->tw_count) | 216 | if (twdr->tw_count) |
214 | need_timer = 1; | 217 | need_timer = 1; |
215 | } | 218 | } |
216 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | 219 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); |
217 | if (need_timer) | 220 | if (need_timer) |
218 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | 221 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
219 | out: | 222 | out: |
220 | spin_unlock(&twdr->death_lock); | 223 | spin_unlock(&twdr->death_lock); |
221 | } | 224 | } |
222 | 225 | ||
223 | EXPORT_SYMBOL_GPL(inet_twdr_hangman); | 226 | EXPORT_SYMBOL_GPL(inet_twdr_hangman); |
224 | 227 | ||
225 | void inet_twdr_twkill_work(struct work_struct *work) | 228 | void inet_twdr_twkill_work(struct work_struct *work) |
226 | { | 229 | { |
227 | struct inet_timewait_death_row *twdr = | 230 | struct inet_timewait_death_row *twdr = |
228 | container_of(work, struct inet_timewait_death_row, twkill_work); | 231 | container_of(work, struct inet_timewait_death_row, twkill_work); |
229 | int i; | 232 | int i; |
230 | 233 | ||
231 | BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > | 234 | BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > |
232 | (sizeof(twdr->thread_slots) * 8)); | 235 | (sizeof(twdr->thread_slots) * 8)); |
233 | 236 | ||
234 | while (twdr->thread_slots) { | 237 | while (twdr->thread_slots) { |
235 | spin_lock_bh(&twdr->death_lock); | 238 | spin_lock_bh(&twdr->death_lock); |
236 | for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { | 239 | for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { |
237 | if (!(twdr->thread_slots & (1 << i))) | 240 | if (!(twdr->thread_slots & (1 << i))) |
238 | continue; | 241 | continue; |
239 | 242 | ||
240 | while (inet_twdr_do_twkill_work(twdr, i) != 0) { | 243 | while (inet_twdr_do_twkill_work(twdr, i) != 0) { |
241 | if (need_resched()) { | 244 | if (need_resched()) { |
242 | spin_unlock_bh(&twdr->death_lock); | 245 | spin_unlock_bh(&twdr->death_lock); |
243 | schedule(); | 246 | schedule(); |
244 | spin_lock_bh(&twdr->death_lock); | 247 | spin_lock_bh(&twdr->death_lock); |
245 | } | 248 | } |
246 | } | 249 | } |
247 | 250 | ||
248 | twdr->thread_slots &= ~(1 << i); | 251 | twdr->thread_slots &= ~(1 << i); |
249 | } | 252 | } |
250 | spin_unlock_bh(&twdr->death_lock); | 253 | spin_unlock_bh(&twdr->death_lock); |
251 | } | 254 | } |
252 | } | 255 | } |
253 | 256 | ||
254 | EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); | 257 | EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); |
255 | 258 | ||
256 | /* These are always called from BH context. See callers in | 259 | /* These are always called from BH context. See callers in |
257 | * tcp_input.c to verify this. | 260 | * tcp_input.c to verify this. |
258 | */ | 261 | */ |
259 | 262 | ||
260 | /* This is for handling early-kills of TIME_WAIT sockets. */ | 263 | /* This is for handling early-kills of TIME_WAIT sockets. */ |
261 | void inet_twsk_deschedule(struct inet_timewait_sock *tw, | 264 | void inet_twsk_deschedule(struct inet_timewait_sock *tw, |
262 | struct inet_timewait_death_row *twdr) | 265 | struct inet_timewait_death_row *twdr) |
263 | { | 266 | { |
264 | spin_lock(&twdr->death_lock); | 267 | spin_lock(&twdr->death_lock); |
265 | if (inet_twsk_del_dead_node(tw)) { | 268 | if (inet_twsk_del_dead_node(tw)) { |
266 | inet_twsk_put(tw); | 269 | inet_twsk_put(tw); |
267 | if (--twdr->tw_count == 0) | 270 | if (--twdr->tw_count == 0) |
268 | del_timer(&twdr->tw_timer); | 271 | del_timer(&twdr->tw_timer); |
269 | } | 272 | } |
270 | spin_unlock(&twdr->death_lock); | 273 | spin_unlock(&twdr->death_lock); |
271 | __inet_twsk_kill(tw, twdr->hashinfo); | 274 | __inet_twsk_kill(tw, twdr->hashinfo); |
272 | } | 275 | } |
273 | 276 | ||
274 | EXPORT_SYMBOL(inet_twsk_deschedule); | 277 | EXPORT_SYMBOL(inet_twsk_deschedule); |
275 | 278 | ||
276 | void inet_twsk_schedule(struct inet_timewait_sock *tw, | 279 | void inet_twsk_schedule(struct inet_timewait_sock *tw, |
277 | struct inet_timewait_death_row *twdr, | 280 | struct inet_timewait_death_row *twdr, |
278 | const int timeo, const int timewait_len) | 281 | const int timeo, const int timewait_len) |
279 | { | 282 | { |
280 | struct hlist_head *list; | 283 | struct hlist_head *list; |
281 | int slot; | 284 | int slot; |
282 | 285 | ||
283 | /* timeout := RTO * 3.5 | 286 | /* timeout := RTO * 3.5 |
284 | * | 287 | * |
285 | * 3.5 = 1+2+0.5 to wait for two retransmits. | 288 | * 3.5 = 1+2+0.5 to wait for two retransmits. |
286 | * | 289 | * |
287 | * RATIONALE: if FIN arrived and we entered TIME-WAIT state, | 290 | * RATIONALE: if FIN arrived and we entered TIME-WAIT state, |
288 | * our ACK acking that FIN can be lost. If N subsequent retransmitted | 291 | * our ACK acking that FIN can be lost. If N subsequent retransmitted |
289 | * FINs (or previous seqments) are lost (probability of such event | 292 | * FINs (or previous seqments) are lost (probability of such event |
290 | * is p^(N+1), where p is probability to lose single packet and | 293 | * is p^(N+1), where p is probability to lose single packet and |
291 | * time to detect the loss is about RTO*(2^N - 1) with exponential | 294 | * time to detect the loss is about RTO*(2^N - 1) with exponential |
292 | * backoff). Normal timewait length is calculated so, that we | 295 | * backoff). Normal timewait length is calculated so, that we |
293 | * waited at least for one retransmitted FIN (maximal RTO is 120sec). | 296 | * waited at least for one retransmitted FIN (maximal RTO is 120sec). |
294 | * [ BTW Linux. following BSD, violates this requirement waiting | 297 | * [ BTW Linux. following BSD, violates this requirement waiting |
295 | * only for 60sec, we should wait at least for 240 secs. | 298 | * only for 60sec, we should wait at least for 240 secs. |
296 | * Well, 240 consumes too much of resources 8) | 299 | * Well, 240 consumes too much of resources 8) |
297 | * ] | 300 | * ] |
298 | * This interval is not reduced to catch old duplicate and | 301 | * This interval is not reduced to catch old duplicate and |
299 | * responces to our wandering segments living for two MSLs. | 302 | * responces to our wandering segments living for two MSLs. |
300 | * However, if we use PAWS to detect | 303 | * However, if we use PAWS to detect |
301 | * old duplicates, we can reduce the interval to bounds required | 304 | * old duplicates, we can reduce the interval to bounds required |
302 | * by RTO, rather than MSL. So, if peer understands PAWS, we | 305 | * by RTO, rather than MSL. So, if peer understands PAWS, we |
303 | * kill tw bucket after 3.5*RTO (it is important that this number | 306 | * kill tw bucket after 3.5*RTO (it is important that this number |
304 | * is greater than TS tick!) and detect old duplicates with help | 307 | * is greater than TS tick!) and detect old duplicates with help |
305 | * of PAWS. | 308 | * of PAWS. |
306 | */ | 309 | */ |
307 | slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; | 310 | slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; |
308 | 311 | ||
309 | spin_lock(&twdr->death_lock); | 312 | spin_lock(&twdr->death_lock); |
310 | 313 | ||
311 | /* Unlink it, if it was scheduled */ | 314 | /* Unlink it, if it was scheduled */ |
312 | if (inet_twsk_del_dead_node(tw)) | 315 | if (inet_twsk_del_dead_node(tw)) |
313 | twdr->tw_count--; | 316 | twdr->tw_count--; |
314 | else | 317 | else |
315 | atomic_inc(&tw->tw_refcnt); | 318 | atomic_inc(&tw->tw_refcnt); |
316 | 319 | ||
317 | if (slot >= INET_TWDR_RECYCLE_SLOTS) { | 320 | if (slot >= INET_TWDR_RECYCLE_SLOTS) { |
318 | /* Schedule to slow timer */ | 321 | /* Schedule to slow timer */ |
319 | if (timeo >= timewait_len) { | 322 | if (timeo >= timewait_len) { |
320 | slot = INET_TWDR_TWKILL_SLOTS - 1; | 323 | slot = INET_TWDR_TWKILL_SLOTS - 1; |
321 | } else { | 324 | } else { |
322 | slot = DIV_ROUND_UP(timeo, twdr->period); | 325 | slot = DIV_ROUND_UP(timeo, twdr->period); |
323 | if (slot >= INET_TWDR_TWKILL_SLOTS) | 326 | if (slot >= INET_TWDR_TWKILL_SLOTS) |
324 | slot = INET_TWDR_TWKILL_SLOTS - 1; | 327 | slot = INET_TWDR_TWKILL_SLOTS - 1; |
325 | } | 328 | } |
326 | tw->tw_ttd = jiffies + timeo; | 329 | tw->tw_ttd = jiffies + timeo; |
327 | slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); | 330 | slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); |
328 | list = &twdr->cells[slot]; | 331 | list = &twdr->cells[slot]; |
329 | } else { | 332 | } else { |
330 | tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); | 333 | tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); |
331 | 334 | ||
332 | if (twdr->twcal_hand < 0) { | 335 | if (twdr->twcal_hand < 0) { |
333 | twdr->twcal_hand = 0; | 336 | twdr->twcal_hand = 0; |
334 | twdr->twcal_jiffie = jiffies; | 337 | twdr->twcal_jiffie = jiffies; |
335 | twdr->twcal_timer.expires = twdr->twcal_jiffie + | 338 | twdr->twcal_timer.expires = twdr->twcal_jiffie + |
336 | (slot << INET_TWDR_RECYCLE_TICK); | 339 | (slot << INET_TWDR_RECYCLE_TICK); |
337 | add_timer(&twdr->twcal_timer); | 340 | add_timer(&twdr->twcal_timer); |
338 | } else { | 341 | } else { |
339 | if (time_after(twdr->twcal_timer.expires, | 342 | if (time_after(twdr->twcal_timer.expires, |
340 | jiffies + (slot << INET_TWDR_RECYCLE_TICK))) | 343 | jiffies + (slot << INET_TWDR_RECYCLE_TICK))) |
341 | mod_timer(&twdr->twcal_timer, | 344 | mod_timer(&twdr->twcal_timer, |
342 | jiffies + (slot << INET_TWDR_RECYCLE_TICK)); | 345 | jiffies + (slot << INET_TWDR_RECYCLE_TICK)); |
343 | slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); | 346 | slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); |
344 | } | 347 | } |
345 | list = &twdr->twcal_row[slot]; | 348 | list = &twdr->twcal_row[slot]; |
346 | } | 349 | } |
347 | 350 | ||
348 | hlist_add_head(&tw->tw_death_node, list); | 351 | hlist_add_head(&tw->tw_death_node, list); |
349 | 352 | ||
350 | if (twdr->tw_count++ == 0) | 353 | if (twdr->tw_count++ == 0) |
351 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | 354 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
352 | spin_unlock(&twdr->death_lock); | 355 | spin_unlock(&twdr->death_lock); |
353 | } | 356 | } |
354 | 357 | ||
355 | EXPORT_SYMBOL_GPL(inet_twsk_schedule); | 358 | EXPORT_SYMBOL_GPL(inet_twsk_schedule); |
356 | 359 | ||
357 | void inet_twdr_twcal_tick(unsigned long data) | 360 | void inet_twdr_twcal_tick(unsigned long data) |
358 | { | 361 | { |
359 | struct inet_timewait_death_row *twdr; | 362 | struct inet_timewait_death_row *twdr; |
360 | int n, slot; | 363 | int n, slot; |
361 | unsigned long j; | 364 | unsigned long j; |
362 | unsigned long now = jiffies; | 365 | unsigned long now = jiffies; |
363 | int killed = 0; | 366 | int killed = 0; |
364 | int adv = 0; | 367 | int adv = 0; |
365 | 368 | ||
366 | twdr = (struct inet_timewait_death_row *)data; | 369 | twdr = (struct inet_timewait_death_row *)data; |
367 | 370 | ||
368 | spin_lock(&twdr->death_lock); | 371 | spin_lock(&twdr->death_lock); |
369 | if (twdr->twcal_hand < 0) | 372 | if (twdr->twcal_hand < 0) |
370 | goto out; | 373 | goto out; |
371 | 374 | ||
372 | slot = twdr->twcal_hand; | 375 | slot = twdr->twcal_hand; |
373 | j = twdr->twcal_jiffie; | 376 | j = twdr->twcal_jiffie; |
374 | 377 | ||
375 | for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { | 378 | for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { |
376 | if (time_before_eq(j, now)) { | 379 | if (time_before_eq(j, now)) { |
377 | struct hlist_node *node, *safe; | 380 | struct hlist_node *node, *safe; |
378 | struct inet_timewait_sock *tw; | 381 | struct inet_timewait_sock *tw; |
379 | 382 | ||
380 | inet_twsk_for_each_inmate_safe(tw, node, safe, | 383 | inet_twsk_for_each_inmate_safe(tw, node, safe, |
381 | &twdr->twcal_row[slot]) { | 384 | &twdr->twcal_row[slot]) { |
382 | __inet_twsk_del_dead_node(tw); | 385 | __inet_twsk_del_dead_node(tw); |
383 | __inet_twsk_kill(tw, twdr->hashinfo); | 386 | __inet_twsk_kill(tw, twdr->hashinfo); |
384 | #ifdef CONFIG_NET_NS | 387 | #ifdef CONFIG_NET_NS |
385 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); | 388 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); |
386 | #endif | 389 | #endif |
387 | inet_twsk_put(tw); | 390 | inet_twsk_put(tw); |
388 | killed++; | 391 | killed++; |
389 | } | 392 | } |
390 | } else { | 393 | } else { |
391 | if (!adv) { | 394 | if (!adv) { |
392 | adv = 1; | 395 | adv = 1; |
393 | twdr->twcal_jiffie = j; | 396 | twdr->twcal_jiffie = j; |
394 | twdr->twcal_hand = slot; | 397 | twdr->twcal_hand = slot; |
395 | } | 398 | } |
396 | 399 | ||
397 | if (!hlist_empty(&twdr->twcal_row[slot])) { | 400 | if (!hlist_empty(&twdr->twcal_row[slot])) { |
398 | mod_timer(&twdr->twcal_timer, j); | 401 | mod_timer(&twdr->twcal_timer, j); |
399 | goto out; | 402 | goto out; |
400 | } | 403 | } |
401 | } | 404 | } |
402 | j += 1 << INET_TWDR_RECYCLE_TICK; | 405 | j += 1 << INET_TWDR_RECYCLE_TICK; |
403 | slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); | 406 | slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); |
404 | } | 407 | } |
405 | twdr->twcal_hand = -1; | 408 | twdr->twcal_hand = -1; |
406 | 409 | ||
407 | out: | 410 | out: |
408 | if ((twdr->tw_count -= killed) == 0) | 411 | if ((twdr->tw_count -= killed) == 0) |
409 | del_timer(&twdr->tw_timer); | 412 | del_timer(&twdr->tw_timer); |
410 | #ifndef CONFIG_NET_NS | 413 | #ifndef CONFIG_NET_NS |
411 | NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); | 414 | NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); |
412 | #endif | 415 | #endif |
413 | spin_unlock(&twdr->death_lock); | 416 | spin_unlock(&twdr->death_lock); |
414 | } | 417 | } |
415 | 418 | ||
416 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); | 419 | EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); |
417 | 420 | ||
418 | void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, | 421 | void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, |
419 | struct inet_timewait_death_row *twdr, int family) | 422 | struct inet_timewait_death_row *twdr, int family) |
420 | { | 423 | { |
421 | struct inet_timewait_sock *tw; | 424 | struct inet_timewait_sock *tw; |
422 | struct sock *sk; | 425 | struct sock *sk; |
423 | struct hlist_nulls_node *node; | 426 | struct hlist_nulls_node *node; |
424 | int h; | 427 | int h; |
425 | 428 | ||
426 | local_bh_disable(); | 429 | local_bh_disable(); |
427 | for (h = 0; h < (hashinfo->ehash_size); h++) { | 430 | for (h = 0; h < (hashinfo->ehash_size); h++) { |
428 | struct inet_ehash_bucket *head = | 431 | struct inet_ehash_bucket *head = |
429 | inet_ehash_bucket(hashinfo, h); | 432 | inet_ehash_bucket(hashinfo, h); |
430 | spinlock_t *lock = inet_ehash_lockp(hashinfo, h); | 433 | spinlock_t *lock = inet_ehash_lockp(hashinfo, h); |
431 | restart: | 434 | restart: |
432 | spin_lock(lock); | 435 | spin_lock(lock); |
433 | sk_nulls_for_each(sk, node, &head->twchain) { | 436 | sk_nulls_for_each(sk, node, &head->twchain) { |
434 | 437 | ||
435 | tw = inet_twsk(sk); | 438 | tw = inet_twsk(sk); |
436 | if (!net_eq(twsk_net(tw), net) || | 439 | if (!net_eq(twsk_net(tw), net) || |
437 | tw->tw_family != family) | 440 | tw->tw_family != family) |
438 | continue; | 441 | continue; |
439 | 442 | ||
440 | atomic_inc(&tw->tw_refcnt); | 443 | atomic_inc(&tw->tw_refcnt); |
441 | spin_unlock(lock); | 444 | spin_unlock(lock); |
442 | inet_twsk_deschedule(tw, twdr); | 445 | inet_twsk_deschedule(tw, twdr); |
443 | inet_twsk_put(tw); | 446 | inet_twsk_put(tw); |
444 | 447 | ||
445 | goto restart; | 448 | goto restart; |
446 | } | 449 | } |
447 | spin_unlock(lock); | 450 | spin_unlock(lock); |
448 | } | 451 | } |
449 | local_bh_enable(); | 452 | local_bh_enable(); |
450 | } | 453 | } |
451 | EXPORT_SYMBOL_GPL(inet_twsk_purge); | 454 | EXPORT_SYMBOL_GPL(inet_twsk_purge); |
452 | 455 |