Commit 6d2553612fa329979e6423a5f2410fd7be5aa902

Authored by Eric Dumazet
Committed by David S. Miller
1 parent f6e63cfb5c

[INET]: Shrink struct inet_ehash_bucket on 32 bits UP

No need to align struct inet_ehash_bucket on a 8 bytes boundary.

On 32 bits Uniprocessor, that's a waste of 4 bytes per struct (50 %)

On other platforms, the attribute is useless, natual alignement is already 8.

platform     | Size before | Size after patch
-------------+-------------+------------------
32 bits, UP  |         8   |     4
32 bits, SMP |         8   |     8
64 bits, UP  |         8   |     8
64 bits, SMP |        16   |    16

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 1 additions and 1 deletions Inline Diff

include/net/inet_hashtables.h
1 /* 1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX 2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket 3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level. 4 * interface as the means of communication with the user level.
5 * 5 *
6 * Authors: Lotsa people, from code originally in tcp 6 * Authors: Lotsa people, from code originally in tcp
7 * 7 *
8 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
12 */ 12 */
13 13
14 #ifndef _INET_HASHTABLES_H 14 #ifndef _INET_HASHTABLES_H
15 #define _INET_HASHTABLES_H 15 #define _INET_HASHTABLES_H
16 16
17 #include <linux/config.h> 17 #include <linux/config.h>
18 18
19 #include <linux/interrupt.h> 19 #include <linux/interrupt.h>
20 #include <linux/ipv6.h> 20 #include <linux/ipv6.h>
21 #include <linux/list.h> 21 #include <linux/list.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 #include <linux/socket.h> 23 #include <linux/socket.h>
24 #include <linux/spinlock.h> 24 #include <linux/spinlock.h>
25 #include <linux/types.h> 25 #include <linux/types.h>
26 #include <linux/wait.h> 26 #include <linux/wait.h>
27 27
28 #include <net/inet_connection_sock.h> 28 #include <net/inet_connection_sock.h>
29 #include <net/route.h> 29 #include <net/route.h>
30 #include <net/sock.h> 30 #include <net/sock.h>
31 #include <net/tcp_states.h> 31 #include <net/tcp_states.h>
32 32
33 #include <asm/atomic.h> 33 #include <asm/atomic.h>
34 #include <asm/byteorder.h> 34 #include <asm/byteorder.h>
35 35
36 /* This is for all connections with a full identity, no wildcards. 36 /* This is for all connections with a full identity, no wildcards.
37 * New scheme, half the table is for TIME_WAIT, the other half is 37 * New scheme, half the table is for TIME_WAIT, the other half is
38 * for the rest. I'll experiment with dynamic table growth later. 38 * for the rest. I'll experiment with dynamic table growth later.
39 */ 39 */
40 struct inet_ehash_bucket { 40 struct inet_ehash_bucket {
41 rwlock_t lock; 41 rwlock_t lock;
42 struct hlist_head chain; 42 struct hlist_head chain;
43 } __attribute__((__aligned__(8))); 43 };
44 44
45 /* There are a few simple rules, which allow for local port reuse by 45 /* There are a few simple rules, which allow for local port reuse by
46 * an application. In essence: 46 * an application. In essence:
47 * 47 *
48 * 1) Sockets bound to different interfaces may share a local port. 48 * 1) Sockets bound to different interfaces may share a local port.
49 * Failing that, goto test 2. 49 * Failing that, goto test 2.
50 * 2) If all sockets have sk->sk_reuse set, and none of them are in 50 * 2) If all sockets have sk->sk_reuse set, and none of them are in
51 * TCP_LISTEN state, the port may be shared. 51 * TCP_LISTEN state, the port may be shared.
52 * Failing that, goto test 3. 52 * Failing that, goto test 3.
53 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local 53 * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
54 * address, and none of them are the same, the port may be 54 * address, and none of them are the same, the port may be
55 * shared. 55 * shared.
56 * Failing this, the port cannot be shared. 56 * Failing this, the port cannot be shared.
57 * 57 *
58 * The interesting point, is test #2. This is what an FTP server does 58 * The interesting point, is test #2. This is what an FTP server does
59 * all day. To optimize this case we use a specific flag bit defined 59 * all day. To optimize this case we use a specific flag bit defined
60 * below. As we add sockets to a bind bucket list, we perform a 60 * below. As we add sockets to a bind bucket list, we perform a
61 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) 61 * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
62 * As long as all sockets added to a bind bucket pass this test, 62 * As long as all sockets added to a bind bucket pass this test,
63 * the flag bit will be set. 63 * the flag bit will be set.
64 * The resulting situation is that tcp_v[46]_verify_bind() can just check 64 * The resulting situation is that tcp_v[46]_verify_bind() can just check
65 * for this flag bit, if it is set and the socket trying to bind has 65 * for this flag bit, if it is set and the socket trying to bind has
66 * sk->sk_reuse set, we don't even have to walk the owners list at all, 66 * sk->sk_reuse set, we don't even have to walk the owners list at all,
67 * we return that it is ok to bind this socket to the requested local port. 67 * we return that it is ok to bind this socket to the requested local port.
68 * 68 *
69 * Sounds like a lot of work, but it is worth it. In a more naive 69 * Sounds like a lot of work, but it is worth it. In a more naive
70 * implementation (ie. current FreeBSD etc.) the entire list of ports 70 * implementation (ie. current FreeBSD etc.) the entire list of ports
71 * must be walked for each data port opened by an ftp server. Needless 71 * must be walked for each data port opened by an ftp server. Needless
72 * to say, this does not scale at all. With a couple thousand FTP 72 * to say, this does not scale at all. With a couple thousand FTP
73 * users logged onto your box, isn't it nice to know that new data 73 * users logged onto your box, isn't it nice to know that new data
74 * ports are created in O(1) time? I thought so. ;-) -DaveM 74 * ports are created in O(1) time? I thought so. ;-) -DaveM
75 */ 75 */
76 struct inet_bind_bucket { 76 struct inet_bind_bucket {
77 unsigned short port; 77 unsigned short port;
78 signed short fastreuse; 78 signed short fastreuse;
79 struct hlist_node node; 79 struct hlist_node node;
80 struct hlist_head owners; 80 struct hlist_head owners;
81 }; 81 };
82 82
83 #define inet_bind_bucket_for_each(tb, node, head) \ 83 #define inet_bind_bucket_for_each(tb, node, head) \
84 hlist_for_each_entry(tb, node, head, node) 84 hlist_for_each_entry(tb, node, head, node)
85 85
86 struct inet_bind_hashbucket { 86 struct inet_bind_hashbucket {
87 spinlock_t lock; 87 spinlock_t lock;
88 struct hlist_head chain; 88 struct hlist_head chain;
89 }; 89 };
90 90
91 /* This is for listening sockets, thus all sockets which possess wildcards. */ 91 /* This is for listening sockets, thus all sockets which possess wildcards. */
92 #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ 92 #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
93 93
94 struct inet_hashinfo { 94 struct inet_hashinfo {
95 /* This is for sockets with full identity only. Sockets here will 95 /* This is for sockets with full identity only. Sockets here will
96 * always be without wildcards and will have the following invariant: 96 * always be without wildcards and will have the following invariant:
97 * 97 *
98 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE 98 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
99 * 99 *
100 * First half of the table is for sockets not in TIME_WAIT, second half 100 * First half of the table is for sockets not in TIME_WAIT, second half
101 * is for TIME_WAIT sockets only. 101 * is for TIME_WAIT sockets only.
102 */ 102 */
103 struct inet_ehash_bucket *ehash; 103 struct inet_ehash_bucket *ehash;
104 104
105 /* Ok, let's try this, I give up, we do need a local binding 105 /* Ok, let's try this, I give up, we do need a local binding
106 * TCP hash as well as the others for fast bind/connect. 106 * TCP hash as well as the others for fast bind/connect.
107 */ 107 */
108 struct inet_bind_hashbucket *bhash; 108 struct inet_bind_hashbucket *bhash;
109 109
110 int bhash_size; 110 int bhash_size;
111 unsigned int ehash_size; 111 unsigned int ehash_size;
112 112
113 /* All sockets in TCP_LISTEN state will be in here. This is the only 113 /* All sockets in TCP_LISTEN state will be in here. This is the only
114 * table where wildcard'd TCP sockets can exist. Hash function here 114 * table where wildcard'd TCP sockets can exist. Hash function here
115 * is just local port number. 115 * is just local port number.
116 */ 116 */
117 struct hlist_head listening_hash[INET_LHTABLE_SIZE]; 117 struct hlist_head listening_hash[INET_LHTABLE_SIZE];
118 118
119 /* All the above members are written once at bootup and 119 /* All the above members are written once at bootup and
120 * never written again _or_ are predominantly read-access. 120 * never written again _or_ are predominantly read-access.
121 * 121 *
122 * Now align to a new cache line as all the following members 122 * Now align to a new cache line as all the following members
123 * are often dirty. 123 * are often dirty.
124 */ 124 */
125 rwlock_t lhash_lock ____cacheline_aligned; 125 rwlock_t lhash_lock ____cacheline_aligned;
126 atomic_t lhash_users; 126 atomic_t lhash_users;
127 wait_queue_head_t lhash_wait; 127 wait_queue_head_t lhash_wait;
128 spinlock_t portalloc_lock; 128 spinlock_t portalloc_lock;
129 kmem_cache_t *bind_bucket_cachep; 129 kmem_cache_t *bind_bucket_cachep;
130 int port_rover; 130 int port_rover;
131 }; 131 };
132 132
133 static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, 133 static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
134 const __u32 faddr, const __u16 fport) 134 const __u32 faddr, const __u16 fport)
135 { 135 {
136 unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); 136 unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
137 h ^= h >> 16; 137 h ^= h >> 16;
138 h ^= h >> 8; 138 h ^= h >> 8;
139 return h; 139 return h;
140 } 140 }
141 141
142 static inline int inet_sk_ehashfn(const struct sock *sk) 142 static inline int inet_sk_ehashfn(const struct sock *sk)
143 { 143 {
144 const struct inet_sock *inet = inet_sk(sk); 144 const struct inet_sock *inet = inet_sk(sk);
145 const __u32 laddr = inet->rcv_saddr; 145 const __u32 laddr = inet->rcv_saddr;
146 const __u16 lport = inet->num; 146 const __u16 lport = inet->num;
147 const __u32 faddr = inet->daddr; 147 const __u32 faddr = inet->daddr;
148 const __u16 fport = inet->dport; 148 const __u16 fport = inet->dport;
149 149
150 return inet_ehashfn(laddr, lport, faddr, fport); 150 return inet_ehashfn(laddr, lport, faddr, fport);
151 } 151 }
152 152
153 static inline struct inet_ehash_bucket *inet_ehash_bucket( 153 static inline struct inet_ehash_bucket *inet_ehash_bucket(
154 struct inet_hashinfo *hashinfo, 154 struct inet_hashinfo *hashinfo,
155 unsigned int hash) 155 unsigned int hash)
156 { 156 {
157 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; 157 return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
158 } 158 }
159 159
160 extern struct inet_bind_bucket * 160 extern struct inet_bind_bucket *
161 inet_bind_bucket_create(kmem_cache_t *cachep, 161 inet_bind_bucket_create(kmem_cache_t *cachep,
162 struct inet_bind_hashbucket *head, 162 struct inet_bind_hashbucket *head,
163 const unsigned short snum); 163 const unsigned short snum);
164 extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, 164 extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
165 struct inet_bind_bucket *tb); 165 struct inet_bind_bucket *tb);
166 166
167 static inline int inet_bhashfn(const __u16 lport, const int bhash_size) 167 static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
168 { 168 {
169 return lport & (bhash_size - 1); 169 return lport & (bhash_size - 1);
170 } 170 }
171 171
172 extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 172 extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
173 const unsigned short snum); 173 const unsigned short snum);
174 174
175 /* These can have wildcards, don't try too hard. */ 175 /* These can have wildcards, don't try too hard. */
176 static inline int inet_lhashfn(const unsigned short num) 176 static inline int inet_lhashfn(const unsigned short num)
177 { 177 {
178 return num & (INET_LHTABLE_SIZE - 1); 178 return num & (INET_LHTABLE_SIZE - 1);
179 } 179 }
180 180
181 static inline int inet_sk_listen_hashfn(const struct sock *sk) 181 static inline int inet_sk_listen_hashfn(const struct sock *sk)
182 { 182 {
183 return inet_lhashfn(inet_sk(sk)->num); 183 return inet_lhashfn(inet_sk(sk)->num);
184 } 184 }
185 185
186 /* Caller must disable local BH processing. */ 186 /* Caller must disable local BH processing. */
187 static inline void __inet_inherit_port(struct inet_hashinfo *table, 187 static inline void __inet_inherit_port(struct inet_hashinfo *table,
188 struct sock *sk, struct sock *child) 188 struct sock *sk, struct sock *child)
189 { 189 {
190 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); 190 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
191 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 191 struct inet_bind_hashbucket *head = &table->bhash[bhash];
192 struct inet_bind_bucket *tb; 192 struct inet_bind_bucket *tb;
193 193
194 spin_lock(&head->lock); 194 spin_lock(&head->lock);
195 tb = inet_csk(sk)->icsk_bind_hash; 195 tb = inet_csk(sk)->icsk_bind_hash;
196 sk_add_bind_node(child, &tb->owners); 196 sk_add_bind_node(child, &tb->owners);
197 inet_csk(child)->icsk_bind_hash = tb; 197 inet_csk(child)->icsk_bind_hash = tb;
198 spin_unlock(&head->lock); 198 spin_unlock(&head->lock);
199 } 199 }
200 200
201 static inline void inet_inherit_port(struct inet_hashinfo *table, 201 static inline void inet_inherit_port(struct inet_hashinfo *table,
202 struct sock *sk, struct sock *child) 202 struct sock *sk, struct sock *child)
203 { 203 {
204 local_bh_disable(); 204 local_bh_disable();
205 __inet_inherit_port(table, sk, child); 205 __inet_inherit_port(table, sk, child);
206 local_bh_enable(); 206 local_bh_enable();
207 } 207 }
208 208
209 extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); 209 extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
210 210
211 extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); 211 extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
212 212
213 /* 213 /*
214 * - We may sleep inside this lock. 214 * - We may sleep inside this lock.
215 * - If sleeping is not required (or called from BH), 215 * - If sleeping is not required (or called from BH),
216 * use plain read_(un)lock(&inet_hashinfo.lhash_lock). 216 * use plain read_(un)lock(&inet_hashinfo.lhash_lock).
217 */ 217 */
218 static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) 218 static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
219 { 219 {
220 /* read_lock synchronizes to candidates to writers */ 220 /* read_lock synchronizes to candidates to writers */
221 read_lock(&hashinfo->lhash_lock); 221 read_lock(&hashinfo->lhash_lock);
222 atomic_inc(&hashinfo->lhash_users); 222 atomic_inc(&hashinfo->lhash_users);
223 read_unlock(&hashinfo->lhash_lock); 223 read_unlock(&hashinfo->lhash_lock);
224 } 224 }
225 225
226 static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) 226 static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
227 { 227 {
228 if (atomic_dec_and_test(&hashinfo->lhash_users)) 228 if (atomic_dec_and_test(&hashinfo->lhash_users))
229 wake_up(&hashinfo->lhash_wait); 229 wake_up(&hashinfo->lhash_wait);
230 } 230 }
231 231
232 static inline void __inet_hash(struct inet_hashinfo *hashinfo, 232 static inline void __inet_hash(struct inet_hashinfo *hashinfo,
233 struct sock *sk, const int listen_possible) 233 struct sock *sk, const int listen_possible)
234 { 234 {
235 struct hlist_head *list; 235 struct hlist_head *list;
236 rwlock_t *lock; 236 rwlock_t *lock;
237 237
238 BUG_TRAP(sk_unhashed(sk)); 238 BUG_TRAP(sk_unhashed(sk));
239 if (listen_possible && sk->sk_state == TCP_LISTEN) { 239 if (listen_possible && sk->sk_state == TCP_LISTEN) {
240 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 240 list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
241 lock = &hashinfo->lhash_lock; 241 lock = &hashinfo->lhash_lock;
242 inet_listen_wlock(hashinfo); 242 inet_listen_wlock(hashinfo);
243 } else { 243 } else {
244 struct inet_ehash_bucket *head; 244 struct inet_ehash_bucket *head;
245 sk->sk_hash = inet_sk_ehashfn(sk); 245 sk->sk_hash = inet_sk_ehashfn(sk);
246 head = inet_ehash_bucket(hashinfo, sk->sk_hash); 246 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
247 list = &head->chain; 247 list = &head->chain;
248 lock = &head->lock; 248 lock = &head->lock;
249 write_lock(lock); 249 write_lock(lock);
250 } 250 }
251 __sk_add_node(sk, list); 251 __sk_add_node(sk, list);
252 sock_prot_inc_use(sk->sk_prot); 252 sock_prot_inc_use(sk->sk_prot);
253 write_unlock(lock); 253 write_unlock(lock);
254 if (listen_possible && sk->sk_state == TCP_LISTEN) 254 if (listen_possible && sk->sk_state == TCP_LISTEN)
255 wake_up(&hashinfo->lhash_wait); 255 wake_up(&hashinfo->lhash_wait);
256 } 256 }
257 257
258 static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) 258 static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
259 { 259 {
260 if (sk->sk_state != TCP_CLOSE) { 260 if (sk->sk_state != TCP_CLOSE) {
261 local_bh_disable(); 261 local_bh_disable();
262 __inet_hash(hashinfo, sk, 1); 262 __inet_hash(hashinfo, sk, 1);
263 local_bh_enable(); 263 local_bh_enable();
264 } 264 }
265 } 265 }
266 266
267 static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) 267 static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
268 { 268 {
269 rwlock_t *lock; 269 rwlock_t *lock;
270 270
271 if (sk_unhashed(sk)) 271 if (sk_unhashed(sk))
272 goto out; 272 goto out;
273 273
274 if (sk->sk_state == TCP_LISTEN) { 274 if (sk->sk_state == TCP_LISTEN) {
275 local_bh_disable(); 275 local_bh_disable();
276 inet_listen_wlock(hashinfo); 276 inet_listen_wlock(hashinfo);
277 lock = &hashinfo->lhash_lock; 277 lock = &hashinfo->lhash_lock;
278 } else { 278 } else {
279 lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; 279 lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock;
280 write_lock_bh(lock); 280 write_lock_bh(lock);
281 } 281 }
282 282
283 if (__sk_del_node_init(sk)) 283 if (__sk_del_node_init(sk))
284 sock_prot_dec_use(sk->sk_prot); 284 sock_prot_dec_use(sk->sk_prot);
285 write_unlock_bh(lock); 285 write_unlock_bh(lock);
286 out: 286 out:
287 if (sk->sk_state == TCP_LISTEN) 287 if (sk->sk_state == TCP_LISTEN)
288 wake_up(&hashinfo->lhash_wait); 288 wake_up(&hashinfo->lhash_wait);
289 } 289 }
290 290
291 static inline int inet_iif(const struct sk_buff *skb) 291 static inline int inet_iif(const struct sk_buff *skb)
292 { 292 {
293 return ((struct rtable *)skb->dst)->rt_iif; 293 return ((struct rtable *)skb->dst)->rt_iif;
294 } 294 }
295 295
296 extern struct sock *__inet_lookup_listener(const struct hlist_head *head, 296 extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
297 const u32 daddr, 297 const u32 daddr,
298 const unsigned short hnum, 298 const unsigned short hnum,
299 const int dif); 299 const int dif);
300 300
301 /* Optimize the common listener case. */ 301 /* Optimize the common listener case. */
302 static inline struct sock * 302 static inline struct sock *
303 inet_lookup_listener(struct inet_hashinfo *hashinfo, 303 inet_lookup_listener(struct inet_hashinfo *hashinfo,
304 const u32 daddr, 304 const u32 daddr,
305 const unsigned short hnum, const int dif) 305 const unsigned short hnum, const int dif)
306 { 306 {
307 struct sock *sk = NULL; 307 struct sock *sk = NULL;
308 const struct hlist_head *head; 308 const struct hlist_head *head;
309 309
310 read_lock(&hashinfo->lhash_lock); 310 read_lock(&hashinfo->lhash_lock);
311 head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; 311 head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
312 if (!hlist_empty(head)) { 312 if (!hlist_empty(head)) {
313 const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); 313 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
314 314
315 if (inet->num == hnum && !sk->sk_node.next && 315 if (inet->num == hnum && !sk->sk_node.next &&
316 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && 316 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
317 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && 317 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
318 !sk->sk_bound_dev_if) 318 !sk->sk_bound_dev_if)
319 goto sherry_cache; 319 goto sherry_cache;
320 sk = __inet_lookup_listener(head, daddr, hnum, dif); 320 sk = __inet_lookup_listener(head, daddr, hnum, dif);
321 } 321 }
322 if (sk) { 322 if (sk) {
323 sherry_cache: 323 sherry_cache:
324 sock_hold(sk); 324 sock_hold(sk);
325 } 325 }
326 read_unlock(&hashinfo->lhash_lock); 326 read_unlock(&hashinfo->lhash_lock);
327 return sk; 327 return sk;
328 } 328 }
329 329
330 /* Socket demux engine toys. */ 330 /* Socket demux engine toys. */
331 #ifdef __BIG_ENDIAN 331 #ifdef __BIG_ENDIAN
332 #define INET_COMBINED_PORTS(__sport, __dport) \ 332 #define INET_COMBINED_PORTS(__sport, __dport) \
333 (((__u32)(__sport) << 16) | (__u32)(__dport)) 333 (((__u32)(__sport) << 16) | (__u32)(__dport))
334 #else /* __LITTLE_ENDIAN */ 334 #else /* __LITTLE_ENDIAN */
335 #define INET_COMBINED_PORTS(__sport, __dport) \ 335 #define INET_COMBINED_PORTS(__sport, __dport) \
336 (((__u32)(__dport) << 16) | (__u32)(__sport)) 336 (((__u32)(__dport) << 16) | (__u32)(__sport))
337 #endif 337 #endif
338 338
339 #if (BITS_PER_LONG == 64) 339 #if (BITS_PER_LONG == 64)
340 #ifdef __BIG_ENDIAN 340 #ifdef __BIG_ENDIAN
341 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ 341 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
342 const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); 342 const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
343 #else /* __LITTLE_ENDIAN */ 343 #else /* __LITTLE_ENDIAN */
344 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ 344 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
345 const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); 345 const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
346 #endif /* __BIG_ENDIAN */ 346 #endif /* __BIG_ENDIAN */
347 #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ 347 #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
348 (((__sk)->sk_hash == (__hash)) && \ 348 (((__sk)->sk_hash == (__hash)) && \
349 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ 349 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
350 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ 350 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
351 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 351 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
352 #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ 352 #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
353 (((__sk)->sk_hash == (__hash)) && \ 353 (((__sk)->sk_hash == (__hash)) && \
354 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ 354 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
355 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ 355 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
356 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 356 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
357 #else /* 32-bit arch */ 357 #else /* 32-bit arch */
358 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) 358 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
359 #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ 359 #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \
360 (((__sk)->sk_hash == (__hash)) && \ 360 (((__sk)->sk_hash == (__hash)) && \
361 (inet_sk(__sk)->daddr == (__saddr)) && \ 361 (inet_sk(__sk)->daddr == (__saddr)) && \
362 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ 362 (inet_sk(__sk)->rcv_saddr == (__daddr)) && \
363 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ 363 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
364 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 364 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
365 #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ 365 #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \
366 (((__sk)->sk_hash == (__hash)) && \ 366 (((__sk)->sk_hash == (__hash)) && \
367 (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ 367 (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
368 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ 368 (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
369 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ 369 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
370 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 370 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
371 #endif /* 64-bit arch */ 371 #endif /* 64-bit arch */
372 372
373 /* 373 /*
374 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need 374 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
375 * not check it for lookups anymore, thanks Alexey. -DaveM 375 * not check it for lookups anymore, thanks Alexey. -DaveM
376 * 376 *
377 * Local BH must be disabled here. 377 * Local BH must be disabled here.
378 */ 378 */
379 static inline struct sock * 379 static inline struct sock *
380 __inet_lookup_established(struct inet_hashinfo *hashinfo, 380 __inet_lookup_established(struct inet_hashinfo *hashinfo,
381 const u32 saddr, const u16 sport, 381 const u32 saddr, const u16 sport,
382 const u32 daddr, const u16 hnum, 382 const u32 daddr, const u16 hnum,
383 const int dif) 383 const int dif)
384 { 384 {
385 INET_ADDR_COOKIE(acookie, saddr, daddr) 385 INET_ADDR_COOKIE(acookie, saddr, daddr)
386 const __u32 ports = INET_COMBINED_PORTS(sport, hnum); 386 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
387 struct sock *sk; 387 struct sock *sk;
388 const struct hlist_node *node; 388 const struct hlist_node *node;
389 /* Optimize here for direct hit, only listening connections can 389 /* Optimize here for direct hit, only listening connections can
390 * have wildcards anyways. 390 * have wildcards anyways.
391 */ 391 */
392 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); 392 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
393 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 393 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
394 394
395 prefetch(head->chain.first); 395 prefetch(head->chain.first);
396 read_lock(&head->lock); 396 read_lock(&head->lock);
397 sk_for_each(sk, node, &head->chain) { 397 sk_for_each(sk, node, &head->chain) {
398 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 398 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
399 goto hit; /* You sunk my battleship! */ 399 goto hit; /* You sunk my battleship! */
400 } 400 }
401 401
402 /* Must check for a TIME_WAIT'er before going to listener hash. */ 402 /* Must check for a TIME_WAIT'er before going to listener hash. */
403 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { 403 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
404 if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 404 if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
405 goto hit; 405 goto hit;
406 } 406 }
407 sk = NULL; 407 sk = NULL;
408 out: 408 out:
409 read_unlock(&head->lock); 409 read_unlock(&head->lock);
410 return sk; 410 return sk;
411 hit: 411 hit:
412 sock_hold(sk); 412 sock_hold(sk);
413 goto out; 413 goto out;
414 } 414 }
415 415
416 static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, 416 static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
417 const u32 saddr, const u16 sport, 417 const u32 saddr, const u16 sport,
418 const u32 daddr, const u16 hnum, 418 const u32 daddr, const u16 hnum,
419 const int dif) 419 const int dif)
420 { 420 {
421 struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, 421 struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
422 hnum, dif); 422 hnum, dif);
423 return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); 423 return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
424 } 424 }
425 425
426 static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, 426 static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
427 const u32 saddr, const u16 sport, 427 const u32 saddr, const u16 sport,
428 const u32 daddr, const u16 dport, 428 const u32 daddr, const u16 dport,
429 const int dif) 429 const int dif)
430 { 430 {
431 struct sock *sk; 431 struct sock *sk;
432 432
433 local_bh_disable(); 433 local_bh_disable();
434 sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); 434 sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
435 local_bh_enable(); 435 local_bh_enable();
436 436
437 return sk; 437 return sk;
438 } 438 }
439 #endif /* _INET_HASHTABLES_H */ 439 #endif /* _INET_HASHTABLES_H */
440 440