Commit 6d2553612fa329979e6423a5f2410fd7be5aa902
Committed by
David S. Miller
1 parent
f6e63cfb5c
Exists in
master
and in
7 other branches
[INET]: Shrink struct inet_ehash_bucket on 32 bits UP
No need to align struct inet_ehash_bucket on a 8 bytes boundary. On 32 bits Uniprocessor, that's a waste of 4 bytes per struct (50 %) On other platforms, the attribute is useless, natual alignement is already 8. platform | Size before | Size after patch -------------+-------------+------------------ 32 bits, UP | 8 | 4 32 bits, SMP | 8 | 8 64 bits, UP | 8 | 8 64 bits, SMP | 16 | 16 Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 1 changed file with 1 additions and 1 deletions Inline Diff
include/net/inet_hashtables.h
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Authors: Lotsa people, from code originally in tcp | 6 | * Authors: Lotsa people, from code originally in tcp |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version | 10 | * as published by the Free Software Foundation; either version |
11 | * 2 of the License, or (at your option) any later version. | 11 | * 2 of the License, or (at your option) any later version. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #ifndef _INET_HASHTABLES_H | 14 | #ifndef _INET_HASHTABLES_H |
15 | #define _INET_HASHTABLES_H | 15 | #define _INET_HASHTABLES_H |
16 | 16 | ||
17 | #include <linux/config.h> | 17 | #include <linux/config.h> |
18 | 18 | ||
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/ipv6.h> | 20 | #include <linux/ipv6.h> |
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/socket.h> | 23 | #include <linux/socket.h> |
24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
26 | #include <linux/wait.h> | 26 | #include <linux/wait.h> |
27 | 27 | ||
28 | #include <net/inet_connection_sock.h> | 28 | #include <net/inet_connection_sock.h> |
29 | #include <net/route.h> | 29 | #include <net/route.h> |
30 | #include <net/sock.h> | 30 | #include <net/sock.h> |
31 | #include <net/tcp_states.h> | 31 | #include <net/tcp_states.h> |
32 | 32 | ||
33 | #include <asm/atomic.h> | 33 | #include <asm/atomic.h> |
34 | #include <asm/byteorder.h> | 34 | #include <asm/byteorder.h> |
35 | 35 | ||
36 | /* This is for all connections with a full identity, no wildcards. | 36 | /* This is for all connections with a full identity, no wildcards. |
37 | * New scheme, half the table is for TIME_WAIT, the other half is | 37 | * New scheme, half the table is for TIME_WAIT, the other half is |
38 | * for the rest. I'll experiment with dynamic table growth later. | 38 | * for the rest. I'll experiment with dynamic table growth later. |
39 | */ | 39 | */ |
40 | struct inet_ehash_bucket { | 40 | struct inet_ehash_bucket { |
41 | rwlock_t lock; | 41 | rwlock_t lock; |
42 | struct hlist_head chain; | 42 | struct hlist_head chain; |
43 | } __attribute__((__aligned__(8))); | 43 | }; |
44 | 44 | ||
45 | /* There are a few simple rules, which allow for local port reuse by | 45 | /* There are a few simple rules, which allow for local port reuse by |
46 | * an application. In essence: | 46 | * an application. In essence: |
47 | * | 47 | * |
48 | * 1) Sockets bound to different interfaces may share a local port. | 48 | * 1) Sockets bound to different interfaces may share a local port. |
49 | * Failing that, goto test 2. | 49 | * Failing that, goto test 2. |
50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | 50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in |
51 | * TCP_LISTEN state, the port may be shared. | 51 | * TCP_LISTEN state, the port may be shared. |
52 | * Failing that, goto test 3. | 52 | * Failing that, goto test 3. |
53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | 53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local |
54 | * address, and none of them are the same, the port may be | 54 | * address, and none of them are the same, the port may be |
55 | * shared. | 55 | * shared. |
56 | * Failing this, the port cannot be shared. | 56 | * Failing this, the port cannot be shared. |
57 | * | 57 | * |
58 | * The interesting point, is test #2. This is what an FTP server does | 58 | * The interesting point, is test #2. This is what an FTP server does |
59 | * all day. To optimize this case we use a specific flag bit defined | 59 | * all day. To optimize this case we use a specific flag bit defined |
60 | * below. As we add sockets to a bind bucket list, we perform a | 60 | * below. As we add sockets to a bind bucket list, we perform a |
61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | 61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) |
62 | * As long as all sockets added to a bind bucket pass this test, | 62 | * As long as all sockets added to a bind bucket pass this test, |
63 | * the flag bit will be set. | 63 | * the flag bit will be set. |
64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | 64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check |
65 | * for this flag bit, if it is set and the socket trying to bind has | 65 | * for this flag bit, if it is set and the socket trying to bind has |
66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | 66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, |
67 | * we return that it is ok to bind this socket to the requested local port. | 67 | * we return that it is ok to bind this socket to the requested local port. |
68 | * | 68 | * |
69 | * Sounds like a lot of work, but it is worth it. In a more naive | 69 | * Sounds like a lot of work, but it is worth it. In a more naive |
70 | * implementation (ie. current FreeBSD etc.) the entire list of ports | 70 | * implementation (ie. current FreeBSD etc.) the entire list of ports |
71 | * must be walked for each data port opened by an ftp server. Needless | 71 | * must be walked for each data port opened by an ftp server. Needless |
72 | * to say, this does not scale at all. With a couple thousand FTP | 72 | * to say, this does not scale at all. With a couple thousand FTP |
73 | * users logged onto your box, isn't it nice to know that new data | 73 | * users logged onto your box, isn't it nice to know that new data |
74 | * ports are created in O(1) time? I thought so. ;-) -DaveM | 74 | * ports are created in O(1) time? I thought so. ;-) -DaveM |
75 | */ | 75 | */ |
76 | struct inet_bind_bucket { | 76 | struct inet_bind_bucket { |
77 | unsigned short port; | 77 | unsigned short port; |
78 | signed short fastreuse; | 78 | signed short fastreuse; |
79 | struct hlist_node node; | 79 | struct hlist_node node; |
80 | struct hlist_head owners; | 80 | struct hlist_head owners; |
81 | }; | 81 | }; |
82 | 82 | ||
83 | #define inet_bind_bucket_for_each(tb, node, head) \ | 83 | #define inet_bind_bucket_for_each(tb, node, head) \ |
84 | hlist_for_each_entry(tb, node, head, node) | 84 | hlist_for_each_entry(tb, node, head, node) |
85 | 85 | ||
86 | struct inet_bind_hashbucket { | 86 | struct inet_bind_hashbucket { |
87 | spinlock_t lock; | 87 | spinlock_t lock; |
88 | struct hlist_head chain; | 88 | struct hlist_head chain; |
89 | }; | 89 | }; |
90 | 90 | ||
91 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | 91 | /* This is for listening sockets, thus all sockets which possess wildcards. */ |
92 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | 92 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ |
93 | 93 | ||
94 | struct inet_hashinfo { | 94 | struct inet_hashinfo { |
95 | /* This is for sockets with full identity only. Sockets here will | 95 | /* This is for sockets with full identity only. Sockets here will |
96 | * always be without wildcards and will have the following invariant: | 96 | * always be without wildcards and will have the following invariant: |
97 | * | 97 | * |
98 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | 98 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE |
99 | * | 99 | * |
100 | * First half of the table is for sockets not in TIME_WAIT, second half | 100 | * First half of the table is for sockets not in TIME_WAIT, second half |
101 | * is for TIME_WAIT sockets only. | 101 | * is for TIME_WAIT sockets only. |
102 | */ | 102 | */ |
103 | struct inet_ehash_bucket *ehash; | 103 | struct inet_ehash_bucket *ehash; |
104 | 104 | ||
105 | /* Ok, let's try this, I give up, we do need a local binding | 105 | /* Ok, let's try this, I give up, we do need a local binding |
106 | * TCP hash as well as the others for fast bind/connect. | 106 | * TCP hash as well as the others for fast bind/connect. |
107 | */ | 107 | */ |
108 | struct inet_bind_hashbucket *bhash; | 108 | struct inet_bind_hashbucket *bhash; |
109 | 109 | ||
110 | int bhash_size; | 110 | int bhash_size; |
111 | unsigned int ehash_size; | 111 | unsigned int ehash_size; |
112 | 112 | ||
113 | /* All sockets in TCP_LISTEN state will be in here. This is the only | 113 | /* All sockets in TCP_LISTEN state will be in here. This is the only |
114 | * table where wildcard'd TCP sockets can exist. Hash function here | 114 | * table where wildcard'd TCP sockets can exist. Hash function here |
115 | * is just local port number. | 115 | * is just local port number. |
116 | */ | 116 | */ |
117 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; | 117 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; |
118 | 118 | ||
119 | /* All the above members are written once at bootup and | 119 | /* All the above members are written once at bootup and |
120 | * never written again _or_ are predominantly read-access. | 120 | * never written again _or_ are predominantly read-access. |
121 | * | 121 | * |
122 | * Now align to a new cache line as all the following members | 122 | * Now align to a new cache line as all the following members |
123 | * are often dirty. | 123 | * are often dirty. |
124 | */ | 124 | */ |
125 | rwlock_t lhash_lock ____cacheline_aligned; | 125 | rwlock_t lhash_lock ____cacheline_aligned; |
126 | atomic_t lhash_users; | 126 | atomic_t lhash_users; |
127 | wait_queue_head_t lhash_wait; | 127 | wait_queue_head_t lhash_wait; |
128 | spinlock_t portalloc_lock; | 128 | spinlock_t portalloc_lock; |
129 | kmem_cache_t *bind_bucket_cachep; | 129 | kmem_cache_t *bind_bucket_cachep; |
130 | int port_rover; | 130 | int port_rover; |
131 | }; | 131 | }; |
132 | 132 | ||
133 | static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, | 133 | static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, |
134 | const __u32 faddr, const __u16 fport) | 134 | const __u32 faddr, const __u16 fport) |
135 | { | 135 | { |
136 | unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); | 136 | unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); |
137 | h ^= h >> 16; | 137 | h ^= h >> 16; |
138 | h ^= h >> 8; | 138 | h ^= h >> 8; |
139 | return h; | 139 | return h; |
140 | } | 140 | } |
141 | 141 | ||
142 | static inline int inet_sk_ehashfn(const struct sock *sk) | 142 | static inline int inet_sk_ehashfn(const struct sock *sk) |
143 | { | 143 | { |
144 | const struct inet_sock *inet = inet_sk(sk); | 144 | const struct inet_sock *inet = inet_sk(sk); |
145 | const __u32 laddr = inet->rcv_saddr; | 145 | const __u32 laddr = inet->rcv_saddr; |
146 | const __u16 lport = inet->num; | 146 | const __u16 lport = inet->num; |
147 | const __u32 faddr = inet->daddr; | 147 | const __u32 faddr = inet->daddr; |
148 | const __u16 fport = inet->dport; | 148 | const __u16 fport = inet->dport; |
149 | 149 | ||
150 | return inet_ehashfn(laddr, lport, faddr, fport); | 150 | return inet_ehashfn(laddr, lport, faddr, fport); |
151 | } | 151 | } |
152 | 152 | ||
153 | static inline struct inet_ehash_bucket *inet_ehash_bucket( | 153 | static inline struct inet_ehash_bucket *inet_ehash_bucket( |
154 | struct inet_hashinfo *hashinfo, | 154 | struct inet_hashinfo *hashinfo, |
155 | unsigned int hash) | 155 | unsigned int hash) |
156 | { | 156 | { |
157 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; | 157 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; |
158 | } | 158 | } |
159 | 159 | ||
160 | extern struct inet_bind_bucket * | 160 | extern struct inet_bind_bucket * |
161 | inet_bind_bucket_create(kmem_cache_t *cachep, | 161 | inet_bind_bucket_create(kmem_cache_t *cachep, |
162 | struct inet_bind_hashbucket *head, | 162 | struct inet_bind_hashbucket *head, |
163 | const unsigned short snum); | 163 | const unsigned short snum); |
164 | extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, | 164 | extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, |
165 | struct inet_bind_bucket *tb); | 165 | struct inet_bind_bucket *tb); |
166 | 166 | ||
167 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) | 167 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) |
168 | { | 168 | { |
169 | return lport & (bhash_size - 1); | 169 | return lport & (bhash_size - 1); |
170 | } | 170 | } |
171 | 171 | ||
172 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | 172 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, |
173 | const unsigned short snum); | 173 | const unsigned short snum); |
174 | 174 | ||
175 | /* These can have wildcards, don't try too hard. */ | 175 | /* These can have wildcards, don't try too hard. */ |
176 | static inline int inet_lhashfn(const unsigned short num) | 176 | static inline int inet_lhashfn(const unsigned short num) |
177 | { | 177 | { |
178 | return num & (INET_LHTABLE_SIZE - 1); | 178 | return num & (INET_LHTABLE_SIZE - 1); |
179 | } | 179 | } |
180 | 180 | ||
181 | static inline int inet_sk_listen_hashfn(const struct sock *sk) | 181 | static inline int inet_sk_listen_hashfn(const struct sock *sk) |
182 | { | 182 | { |
183 | return inet_lhashfn(inet_sk(sk)->num); | 183 | return inet_lhashfn(inet_sk(sk)->num); |
184 | } | 184 | } |
185 | 185 | ||
186 | /* Caller must disable local BH processing. */ | 186 | /* Caller must disable local BH processing. */ |
187 | static inline void __inet_inherit_port(struct inet_hashinfo *table, | 187 | static inline void __inet_inherit_port(struct inet_hashinfo *table, |
188 | struct sock *sk, struct sock *child) | 188 | struct sock *sk, struct sock *child) |
189 | { | 189 | { |
190 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | 190 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); |
191 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | 191 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; |
192 | struct inet_bind_bucket *tb; | 192 | struct inet_bind_bucket *tb; |
193 | 193 | ||
194 | spin_lock(&head->lock); | 194 | spin_lock(&head->lock); |
195 | tb = inet_csk(sk)->icsk_bind_hash; | 195 | tb = inet_csk(sk)->icsk_bind_hash; |
196 | sk_add_bind_node(child, &tb->owners); | 196 | sk_add_bind_node(child, &tb->owners); |
197 | inet_csk(child)->icsk_bind_hash = tb; | 197 | inet_csk(child)->icsk_bind_hash = tb; |
198 | spin_unlock(&head->lock); | 198 | spin_unlock(&head->lock); |
199 | } | 199 | } |
200 | 200 | ||
201 | static inline void inet_inherit_port(struct inet_hashinfo *table, | 201 | static inline void inet_inherit_port(struct inet_hashinfo *table, |
202 | struct sock *sk, struct sock *child) | 202 | struct sock *sk, struct sock *child) |
203 | { | 203 | { |
204 | local_bh_disable(); | 204 | local_bh_disable(); |
205 | __inet_inherit_port(table, sk, child); | 205 | __inet_inherit_port(table, sk, child); |
206 | local_bh_enable(); | 206 | local_bh_enable(); |
207 | } | 207 | } |
208 | 208 | ||
209 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); | 209 | extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); |
210 | 210 | ||
211 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | 211 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * - We may sleep inside this lock. | 214 | * - We may sleep inside this lock. |
215 | * - If sleeping is not required (or called from BH), | 215 | * - If sleeping is not required (or called from BH), |
216 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | 216 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). |
217 | */ | 217 | */ |
218 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | 218 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) |
219 | { | 219 | { |
220 | /* read_lock synchronizes to candidates to writers */ | 220 | /* read_lock synchronizes to candidates to writers */ |
221 | read_lock(&hashinfo->lhash_lock); | 221 | read_lock(&hashinfo->lhash_lock); |
222 | atomic_inc(&hashinfo->lhash_users); | 222 | atomic_inc(&hashinfo->lhash_users); |
223 | read_unlock(&hashinfo->lhash_lock); | 223 | read_unlock(&hashinfo->lhash_lock); |
224 | } | 224 | } |
225 | 225 | ||
226 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | 226 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) |
227 | { | 227 | { |
228 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | 228 | if (atomic_dec_and_test(&hashinfo->lhash_users)) |
229 | wake_up(&hashinfo->lhash_wait); | 229 | wake_up(&hashinfo->lhash_wait); |
230 | } | 230 | } |
231 | 231 | ||
232 | static inline void __inet_hash(struct inet_hashinfo *hashinfo, | 232 | static inline void __inet_hash(struct inet_hashinfo *hashinfo, |
233 | struct sock *sk, const int listen_possible) | 233 | struct sock *sk, const int listen_possible) |
234 | { | 234 | { |
235 | struct hlist_head *list; | 235 | struct hlist_head *list; |
236 | rwlock_t *lock; | 236 | rwlock_t *lock; |
237 | 237 | ||
238 | BUG_TRAP(sk_unhashed(sk)); | 238 | BUG_TRAP(sk_unhashed(sk)); |
239 | if (listen_possible && sk->sk_state == TCP_LISTEN) { | 239 | if (listen_possible && sk->sk_state == TCP_LISTEN) { |
240 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 240 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
241 | lock = &hashinfo->lhash_lock; | 241 | lock = &hashinfo->lhash_lock; |
242 | inet_listen_wlock(hashinfo); | 242 | inet_listen_wlock(hashinfo); |
243 | } else { | 243 | } else { |
244 | struct inet_ehash_bucket *head; | 244 | struct inet_ehash_bucket *head; |
245 | sk->sk_hash = inet_sk_ehashfn(sk); | 245 | sk->sk_hash = inet_sk_ehashfn(sk); |
246 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); | 246 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
247 | list = &head->chain; | 247 | list = &head->chain; |
248 | lock = &head->lock; | 248 | lock = &head->lock; |
249 | write_lock(lock); | 249 | write_lock(lock); |
250 | } | 250 | } |
251 | __sk_add_node(sk, list); | 251 | __sk_add_node(sk, list); |
252 | sock_prot_inc_use(sk->sk_prot); | 252 | sock_prot_inc_use(sk->sk_prot); |
253 | write_unlock(lock); | 253 | write_unlock(lock); |
254 | if (listen_possible && sk->sk_state == TCP_LISTEN) | 254 | if (listen_possible && sk->sk_state == TCP_LISTEN) |
255 | wake_up(&hashinfo->lhash_wait); | 255 | wake_up(&hashinfo->lhash_wait); |
256 | } | 256 | } |
257 | 257 | ||
258 | static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | 258 | static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) |
259 | { | 259 | { |
260 | if (sk->sk_state != TCP_CLOSE) { | 260 | if (sk->sk_state != TCP_CLOSE) { |
261 | local_bh_disable(); | 261 | local_bh_disable(); |
262 | __inet_hash(hashinfo, sk, 1); | 262 | __inet_hash(hashinfo, sk, 1); |
263 | local_bh_enable(); | 263 | local_bh_enable(); |
264 | } | 264 | } |
265 | } | 265 | } |
266 | 266 | ||
267 | static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) | 267 | static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) |
268 | { | 268 | { |
269 | rwlock_t *lock; | 269 | rwlock_t *lock; |
270 | 270 | ||
271 | if (sk_unhashed(sk)) | 271 | if (sk_unhashed(sk)) |
272 | goto out; | 272 | goto out; |
273 | 273 | ||
274 | if (sk->sk_state == TCP_LISTEN) { | 274 | if (sk->sk_state == TCP_LISTEN) { |
275 | local_bh_disable(); | 275 | local_bh_disable(); |
276 | inet_listen_wlock(hashinfo); | 276 | inet_listen_wlock(hashinfo); |
277 | lock = &hashinfo->lhash_lock; | 277 | lock = &hashinfo->lhash_lock; |
278 | } else { | 278 | } else { |
279 | lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; | 279 | lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; |
280 | write_lock_bh(lock); | 280 | write_lock_bh(lock); |
281 | } | 281 | } |
282 | 282 | ||
283 | if (__sk_del_node_init(sk)) | 283 | if (__sk_del_node_init(sk)) |
284 | sock_prot_dec_use(sk->sk_prot); | 284 | sock_prot_dec_use(sk->sk_prot); |
285 | write_unlock_bh(lock); | 285 | write_unlock_bh(lock); |
286 | out: | 286 | out: |
287 | if (sk->sk_state == TCP_LISTEN) | 287 | if (sk->sk_state == TCP_LISTEN) |
288 | wake_up(&hashinfo->lhash_wait); | 288 | wake_up(&hashinfo->lhash_wait); |
289 | } | 289 | } |
290 | 290 | ||
291 | static inline int inet_iif(const struct sk_buff *skb) | 291 | static inline int inet_iif(const struct sk_buff *skb) |
292 | { | 292 | { |
293 | return ((struct rtable *)skb->dst)->rt_iif; | 293 | return ((struct rtable *)skb->dst)->rt_iif; |
294 | } | 294 | } |
295 | 295 | ||
296 | extern struct sock *__inet_lookup_listener(const struct hlist_head *head, | 296 | extern struct sock *__inet_lookup_listener(const struct hlist_head *head, |
297 | const u32 daddr, | 297 | const u32 daddr, |
298 | const unsigned short hnum, | 298 | const unsigned short hnum, |
299 | const int dif); | 299 | const int dif); |
300 | 300 | ||
301 | /* Optimize the common listener case. */ | 301 | /* Optimize the common listener case. */ |
302 | static inline struct sock * | 302 | static inline struct sock * |
303 | inet_lookup_listener(struct inet_hashinfo *hashinfo, | 303 | inet_lookup_listener(struct inet_hashinfo *hashinfo, |
304 | const u32 daddr, | 304 | const u32 daddr, |
305 | const unsigned short hnum, const int dif) | 305 | const unsigned short hnum, const int dif) |
306 | { | 306 | { |
307 | struct sock *sk = NULL; | 307 | struct sock *sk = NULL; |
308 | const struct hlist_head *head; | 308 | const struct hlist_head *head; |
309 | 309 | ||
310 | read_lock(&hashinfo->lhash_lock); | 310 | read_lock(&hashinfo->lhash_lock); |
311 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; | 311 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; |
312 | if (!hlist_empty(head)) { | 312 | if (!hlist_empty(head)) { |
313 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | 313 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); |
314 | 314 | ||
315 | if (inet->num == hnum && !sk->sk_node.next && | 315 | if (inet->num == hnum && !sk->sk_node.next && |
316 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 316 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
317 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 317 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
318 | !sk->sk_bound_dev_if) | 318 | !sk->sk_bound_dev_if) |
319 | goto sherry_cache; | 319 | goto sherry_cache; |
320 | sk = __inet_lookup_listener(head, daddr, hnum, dif); | 320 | sk = __inet_lookup_listener(head, daddr, hnum, dif); |
321 | } | 321 | } |
322 | if (sk) { | 322 | if (sk) { |
323 | sherry_cache: | 323 | sherry_cache: |
324 | sock_hold(sk); | 324 | sock_hold(sk); |
325 | } | 325 | } |
326 | read_unlock(&hashinfo->lhash_lock); | 326 | read_unlock(&hashinfo->lhash_lock); |
327 | return sk; | 327 | return sk; |
328 | } | 328 | } |
329 | 329 | ||
330 | /* Socket demux engine toys. */ | 330 | /* Socket demux engine toys. */ |
331 | #ifdef __BIG_ENDIAN | 331 | #ifdef __BIG_ENDIAN |
332 | #define INET_COMBINED_PORTS(__sport, __dport) \ | 332 | #define INET_COMBINED_PORTS(__sport, __dport) \ |
333 | (((__u32)(__sport) << 16) | (__u32)(__dport)) | 333 | (((__u32)(__sport) << 16) | (__u32)(__dport)) |
334 | #else /* __LITTLE_ENDIAN */ | 334 | #else /* __LITTLE_ENDIAN */ |
335 | #define INET_COMBINED_PORTS(__sport, __dport) \ | 335 | #define INET_COMBINED_PORTS(__sport, __dport) \ |
336 | (((__u32)(__dport) << 16) | (__u32)(__sport)) | 336 | (((__u32)(__dport) << 16) | (__u32)(__sport)) |
337 | #endif | 337 | #endif |
338 | 338 | ||
339 | #if (BITS_PER_LONG == 64) | 339 | #if (BITS_PER_LONG == 64) |
340 | #ifdef __BIG_ENDIAN | 340 | #ifdef __BIG_ENDIAN |
341 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | 341 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ |
342 | const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); | 342 | const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); |
343 | #else /* __LITTLE_ENDIAN */ | 343 | #else /* __LITTLE_ENDIAN */ |
344 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | 344 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ |
345 | const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); | 345 | const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); |
346 | #endif /* __BIG_ENDIAN */ | 346 | #endif /* __BIG_ENDIAN */ |
347 | #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 347 | #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
348 | (((__sk)->sk_hash == (__hash)) && \ | 348 | (((__sk)->sk_hash == (__hash)) && \ |
349 | ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | 349 | ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ |
350 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 350 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
351 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 351 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
352 | #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 352 | #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
353 | (((__sk)->sk_hash == (__hash)) && \ | 353 | (((__sk)->sk_hash == (__hash)) && \ |
354 | ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | 354 | ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ |
355 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 355 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |
356 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 356 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
357 | #else /* 32-bit arch */ | 357 | #else /* 32-bit arch */ |
358 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | 358 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) |
359 | #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ | 359 | #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ |
360 | (((__sk)->sk_hash == (__hash)) && \ | 360 | (((__sk)->sk_hash == (__hash)) && \ |
361 | (inet_sk(__sk)->daddr == (__saddr)) && \ | 361 | (inet_sk(__sk)->daddr == (__saddr)) && \ |
362 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | 362 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ |
363 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 363 | ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
364 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 364 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
365 | #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ | 365 | #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ |
366 | (((__sk)->sk_hash == (__hash)) && \ | 366 | (((__sk)->sk_hash == (__hash)) && \ |
367 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | 367 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ |
368 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | 368 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ |
369 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 369 | ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |
370 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 370 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
371 | #endif /* 64-bit arch */ | 371 | #endif /* 64-bit arch */ |
372 | 372 | ||
373 | /* | 373 | /* |
374 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need | 374 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need |
375 | * not check it for lookups anymore, thanks Alexey. -DaveM | 375 | * not check it for lookups anymore, thanks Alexey. -DaveM |
376 | * | 376 | * |
377 | * Local BH must be disabled here. | 377 | * Local BH must be disabled here. |
378 | */ | 378 | */ |
379 | static inline struct sock * | 379 | static inline struct sock * |
380 | __inet_lookup_established(struct inet_hashinfo *hashinfo, | 380 | __inet_lookup_established(struct inet_hashinfo *hashinfo, |
381 | const u32 saddr, const u16 sport, | 381 | const u32 saddr, const u16 sport, |
382 | const u32 daddr, const u16 hnum, | 382 | const u32 daddr, const u16 hnum, |
383 | const int dif) | 383 | const int dif) |
384 | { | 384 | { |
385 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 385 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
386 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); | 386 | const __u32 ports = INET_COMBINED_PORTS(sport, hnum); |
387 | struct sock *sk; | 387 | struct sock *sk; |
388 | const struct hlist_node *node; | 388 | const struct hlist_node *node; |
389 | /* Optimize here for direct hit, only listening connections can | 389 | /* Optimize here for direct hit, only listening connections can |
390 | * have wildcards anyways. | 390 | * have wildcards anyways. |
391 | */ | 391 | */ |
392 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); | 392 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); |
393 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 393 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
394 | 394 | ||
395 | prefetch(head->chain.first); | 395 | prefetch(head->chain.first); |
396 | read_lock(&head->lock); | 396 | read_lock(&head->lock); |
397 | sk_for_each(sk, node, &head->chain) { | 397 | sk_for_each(sk, node, &head->chain) { |
398 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 398 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) |
399 | goto hit; /* You sunk my battleship! */ | 399 | goto hit; /* You sunk my battleship! */ |
400 | } | 400 | } |
401 | 401 | ||
402 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 402 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
403 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { | 403 | sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { |
404 | if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 404 | if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) |
405 | goto hit; | 405 | goto hit; |
406 | } | 406 | } |
407 | sk = NULL; | 407 | sk = NULL; |
408 | out: | 408 | out: |
409 | read_unlock(&head->lock); | 409 | read_unlock(&head->lock); |
410 | return sk; | 410 | return sk; |
411 | hit: | 411 | hit: |
412 | sock_hold(sk); | 412 | sock_hold(sk); |
413 | goto out; | 413 | goto out; |
414 | } | 414 | } |
415 | 415 | ||
416 | static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, | 416 | static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, |
417 | const u32 saddr, const u16 sport, | 417 | const u32 saddr, const u16 sport, |
418 | const u32 daddr, const u16 hnum, | 418 | const u32 daddr, const u16 hnum, |
419 | const int dif) | 419 | const int dif) |
420 | { | 420 | { |
421 | struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, | 421 | struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, |
422 | hnum, dif); | 422 | hnum, dif); |
423 | return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); | 423 | return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); |
424 | } | 424 | } |
425 | 425 | ||
426 | static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, | 426 | static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, |
427 | const u32 saddr, const u16 sport, | 427 | const u32 saddr, const u16 sport, |
428 | const u32 daddr, const u16 dport, | 428 | const u32 daddr, const u16 dport, |
429 | const int dif) | 429 | const int dif) |
430 | { | 430 | { |
431 | struct sock *sk; | 431 | struct sock *sk; |
432 | 432 | ||
433 | local_bh_disable(); | 433 | local_bh_disable(); |
434 | sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); | 434 | sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); |
435 | local_bh_enable(); | 435 | local_bh_enable(); |
436 | 436 | ||
437 | return sk; | 437 | return sk; |
438 | } | 438 | } |
439 | #endif /* _INET_HASHTABLES_H */ | 439 | #endif /* _INET_HASHTABLES_H */ |
440 | 440 |