Commit 53083773dcbd3c80477e2ace143e361e1e806745
Committed by
David S. Miller
1 parent
e56d8b8a2e
Exists in
master
and in
7 other branches
[INET]: Uninline the __inet_inherit_port call.
This deblats ~200 bytes when ipv6 and dccp are 'y'. Besides, this will ease compilation issues for patches I'm working on to make inet hash tables more scalable wrt net namespaces. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 17 additions and 13 deletions Inline Diff
include/net/inet_hashtables.h
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Authors: Lotsa people, from code originally in tcp | 6 | * Authors: Lotsa people, from code originally in tcp |
7 | * | 7 | * |
8 | * This program is free software; you can redistribute it and/or | 8 | * This program is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU General Public License | 9 | * modify it under the terms of the GNU General Public License |
10 | * as published by the Free Software Foundation; either version | 10 | * as published by the Free Software Foundation; either version |
11 | * 2 of the License, or (at your option) any later version. | 11 | * 2 of the License, or (at your option) any later version. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #ifndef _INET_HASHTABLES_H | 14 | #ifndef _INET_HASHTABLES_H |
15 | #define _INET_HASHTABLES_H | 15 | #define _INET_HASHTABLES_H |
16 | 16 | ||
17 | 17 | ||
18 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
19 | #include <linux/ipv6.h> | 19 | #include <linux/ipv6.h> |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/socket.h> | 22 | #include <linux/socket.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/wait.h> | 25 | #include <linux/wait.h> |
26 | #include <linux/vmalloc.h> | 26 | #include <linux/vmalloc.h> |
27 | 27 | ||
28 | #include <net/inet_connection_sock.h> | 28 | #include <net/inet_connection_sock.h> |
29 | #include <net/inet_sock.h> | 29 | #include <net/inet_sock.h> |
30 | #include <net/sock.h> | 30 | #include <net/sock.h> |
31 | #include <net/tcp_states.h> | 31 | #include <net/tcp_states.h> |
32 | 32 | ||
33 | #include <asm/atomic.h> | 33 | #include <asm/atomic.h> |
34 | #include <asm/byteorder.h> | 34 | #include <asm/byteorder.h> |
35 | 35 | ||
36 | /* This is for all connections with a full identity, no wildcards. | 36 | /* This is for all connections with a full identity, no wildcards. |
37 | * One chain is dedicated to TIME_WAIT sockets. | 37 | * One chain is dedicated to TIME_WAIT sockets. |
38 | * I'll experiment with dynamic table growth later. | 38 | * I'll experiment with dynamic table growth later. |
39 | */ | 39 | */ |
40 | struct inet_ehash_bucket { | 40 | struct inet_ehash_bucket { |
41 | struct hlist_head chain; | 41 | struct hlist_head chain; |
42 | struct hlist_head twchain; | 42 | struct hlist_head twchain; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | /* There are a few simple rules, which allow for local port reuse by | 45 | /* There are a few simple rules, which allow for local port reuse by |
46 | * an application. In essence: | 46 | * an application. In essence: |
47 | * | 47 | * |
48 | * 1) Sockets bound to different interfaces may share a local port. | 48 | * 1) Sockets bound to different interfaces may share a local port. |
49 | * Failing that, goto test 2. | 49 | * Failing that, goto test 2. |
50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in | 50 | * 2) If all sockets have sk->sk_reuse set, and none of them are in |
51 | * TCP_LISTEN state, the port may be shared. | 51 | * TCP_LISTEN state, the port may be shared. |
52 | * Failing that, goto test 3. | 52 | * Failing that, goto test 3. |
53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local | 53 | * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local |
54 | * address, and none of them are the same, the port may be | 54 | * address, and none of them are the same, the port may be |
55 | * shared. | 55 | * shared. |
56 | * Failing this, the port cannot be shared. | 56 | * Failing this, the port cannot be shared. |
57 | * | 57 | * |
58 | * The interesting point, is test #2. This is what an FTP server does | 58 | * The interesting point, is test #2. This is what an FTP server does |
59 | * all day. To optimize this case we use a specific flag bit defined | 59 | * all day. To optimize this case we use a specific flag bit defined |
60 | * below. As we add sockets to a bind bucket list, we perform a | 60 | * below. As we add sockets to a bind bucket list, we perform a |
61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) | 61 | * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) |
62 | * As long as all sockets added to a bind bucket pass this test, | 62 | * As long as all sockets added to a bind bucket pass this test, |
63 | * the flag bit will be set. | 63 | * the flag bit will be set. |
64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check | 64 | * The resulting situation is that tcp_v[46]_verify_bind() can just check |
65 | * for this flag bit, if it is set and the socket trying to bind has | 65 | * for this flag bit, if it is set and the socket trying to bind has |
66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, | 66 | * sk->sk_reuse set, we don't even have to walk the owners list at all, |
67 | * we return that it is ok to bind this socket to the requested local port. | 67 | * we return that it is ok to bind this socket to the requested local port. |
68 | * | 68 | * |
69 | * Sounds like a lot of work, but it is worth it. In a more naive | 69 | * Sounds like a lot of work, but it is worth it. In a more naive |
70 | * implementation (ie. current FreeBSD etc.) the entire list of ports | 70 | * implementation (ie. current FreeBSD etc.) the entire list of ports |
71 | * must be walked for each data port opened by an ftp server. Needless | 71 | * must be walked for each data port opened by an ftp server. Needless |
72 | * to say, this does not scale at all. With a couple thousand FTP | 72 | * to say, this does not scale at all. With a couple thousand FTP |
73 | * users logged onto your box, isn't it nice to know that new data | 73 | * users logged onto your box, isn't it nice to know that new data |
74 | * ports are created in O(1) time? I thought so. ;-) -DaveM | 74 | * ports are created in O(1) time? I thought so. ;-) -DaveM |
75 | */ | 75 | */ |
76 | struct inet_bind_bucket { | 76 | struct inet_bind_bucket { |
77 | struct net *ib_net; | 77 | struct net *ib_net; |
78 | unsigned short port; | 78 | unsigned short port; |
79 | signed short fastreuse; | 79 | signed short fastreuse; |
80 | struct hlist_node node; | 80 | struct hlist_node node; |
81 | struct hlist_head owners; | 81 | struct hlist_head owners; |
82 | }; | 82 | }; |
83 | 83 | ||
84 | #define inet_bind_bucket_for_each(tb, node, head) \ | 84 | #define inet_bind_bucket_for_each(tb, node, head) \ |
85 | hlist_for_each_entry(tb, node, head, node) | 85 | hlist_for_each_entry(tb, node, head, node) |
86 | 86 | ||
87 | struct inet_bind_hashbucket { | 87 | struct inet_bind_hashbucket { |
88 | spinlock_t lock; | 88 | spinlock_t lock; |
89 | struct hlist_head chain; | 89 | struct hlist_head chain; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | /* This is for listening sockets, thus all sockets which possess wildcards. */ | 92 | /* This is for listening sockets, thus all sockets which possess wildcards. */ |
93 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ | 93 | #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ |
94 | 94 | ||
95 | struct inet_hashinfo { | 95 | struct inet_hashinfo { |
96 | /* This is for sockets with full identity only. Sockets here will | 96 | /* This is for sockets with full identity only. Sockets here will |
97 | * always be without wildcards and will have the following invariant: | 97 | * always be without wildcards and will have the following invariant: |
98 | * | 98 | * |
99 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE | 99 | * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE |
100 | * | 100 | * |
101 | * TIME_WAIT sockets use a separate chain (twchain). | 101 | * TIME_WAIT sockets use a separate chain (twchain). |
102 | */ | 102 | */ |
103 | struct inet_ehash_bucket *ehash; | 103 | struct inet_ehash_bucket *ehash; |
104 | rwlock_t *ehash_locks; | 104 | rwlock_t *ehash_locks; |
105 | unsigned int ehash_size; | 105 | unsigned int ehash_size; |
106 | unsigned int ehash_locks_mask; | 106 | unsigned int ehash_locks_mask; |
107 | 107 | ||
108 | /* Ok, let's try this, I give up, we do need a local binding | 108 | /* Ok, let's try this, I give up, we do need a local binding |
109 | * TCP hash as well as the others for fast bind/connect. | 109 | * TCP hash as well as the others for fast bind/connect. |
110 | */ | 110 | */ |
111 | struct inet_bind_hashbucket *bhash; | 111 | struct inet_bind_hashbucket *bhash; |
112 | 112 | ||
113 | unsigned int bhash_size; | 113 | unsigned int bhash_size; |
114 | /* Note : 4 bytes padding on 64 bit arches */ | 114 | /* Note : 4 bytes padding on 64 bit arches */ |
115 | 115 | ||
116 | /* All sockets in TCP_LISTEN state will be in here. This is the only | 116 | /* All sockets in TCP_LISTEN state will be in here. This is the only |
117 | * table where wildcard'd TCP sockets can exist. Hash function here | 117 | * table where wildcard'd TCP sockets can exist. Hash function here |
118 | * is just local port number. | 118 | * is just local port number. |
119 | */ | 119 | */ |
120 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; | 120 | struct hlist_head listening_hash[INET_LHTABLE_SIZE]; |
121 | 121 | ||
122 | /* All the above members are written once at bootup and | 122 | /* All the above members are written once at bootup and |
123 | * never written again _or_ are predominantly read-access. | 123 | * never written again _or_ are predominantly read-access. |
124 | * | 124 | * |
125 | * Now align to a new cache line as all the following members | 125 | * Now align to a new cache line as all the following members |
126 | * are often dirty. | 126 | * are often dirty. |
127 | */ | 127 | */ |
128 | rwlock_t lhash_lock ____cacheline_aligned; | 128 | rwlock_t lhash_lock ____cacheline_aligned; |
129 | atomic_t lhash_users; | 129 | atomic_t lhash_users; |
130 | wait_queue_head_t lhash_wait; | 130 | wait_queue_head_t lhash_wait; |
131 | struct kmem_cache *bind_bucket_cachep; | 131 | struct kmem_cache *bind_bucket_cachep; |
132 | }; | 132 | }; |
133 | 133 | ||
134 | static inline struct inet_ehash_bucket *inet_ehash_bucket( | 134 | static inline struct inet_ehash_bucket *inet_ehash_bucket( |
135 | struct inet_hashinfo *hashinfo, | 135 | struct inet_hashinfo *hashinfo, |
136 | unsigned int hash) | 136 | unsigned int hash) |
137 | { | 137 | { |
138 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; | 138 | return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; |
139 | } | 139 | } |
140 | 140 | ||
141 | static inline rwlock_t *inet_ehash_lockp( | 141 | static inline rwlock_t *inet_ehash_lockp( |
142 | struct inet_hashinfo *hashinfo, | 142 | struct inet_hashinfo *hashinfo, |
143 | unsigned int hash) | 143 | unsigned int hash) |
144 | { | 144 | { |
145 | return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; | 145 | return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; |
146 | } | 146 | } |
147 | 147 | ||
148 | static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) | 148 | static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) |
149 | { | 149 | { |
150 | unsigned int i, size = 256; | 150 | unsigned int i, size = 256; |
151 | #if defined(CONFIG_PROVE_LOCKING) | 151 | #if defined(CONFIG_PROVE_LOCKING) |
152 | unsigned int nr_pcpus = 2; | 152 | unsigned int nr_pcpus = 2; |
153 | #else | 153 | #else |
154 | unsigned int nr_pcpus = num_possible_cpus(); | 154 | unsigned int nr_pcpus = num_possible_cpus(); |
155 | #endif | 155 | #endif |
156 | if (nr_pcpus >= 4) | 156 | if (nr_pcpus >= 4) |
157 | size = 512; | 157 | size = 512; |
158 | if (nr_pcpus >= 8) | 158 | if (nr_pcpus >= 8) |
159 | size = 1024; | 159 | size = 1024; |
160 | if (nr_pcpus >= 16) | 160 | if (nr_pcpus >= 16) |
161 | size = 2048; | 161 | size = 2048; |
162 | if (nr_pcpus >= 32) | 162 | if (nr_pcpus >= 32) |
163 | size = 4096; | 163 | size = 4096; |
164 | if (sizeof(rwlock_t) != 0) { | 164 | if (sizeof(rwlock_t) != 0) { |
165 | #ifdef CONFIG_NUMA | 165 | #ifdef CONFIG_NUMA |
166 | if (size * sizeof(rwlock_t) > PAGE_SIZE) | 166 | if (size * sizeof(rwlock_t) > PAGE_SIZE) |
167 | hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); | 167 | hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); |
168 | else | 168 | else |
169 | #endif | 169 | #endif |
170 | hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), | 170 | hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), |
171 | GFP_KERNEL); | 171 | GFP_KERNEL); |
172 | if (!hashinfo->ehash_locks) | 172 | if (!hashinfo->ehash_locks) |
173 | return ENOMEM; | 173 | return ENOMEM; |
174 | for (i = 0; i < size; i++) | 174 | for (i = 0; i < size; i++) |
175 | rwlock_init(&hashinfo->ehash_locks[i]); | 175 | rwlock_init(&hashinfo->ehash_locks[i]); |
176 | } | 176 | } |
177 | hashinfo->ehash_locks_mask = size - 1; | 177 | hashinfo->ehash_locks_mask = size - 1; |
178 | return 0; | 178 | return 0; |
179 | } | 179 | } |
180 | 180 | ||
181 | static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) | 181 | static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) |
182 | { | 182 | { |
183 | if (hashinfo->ehash_locks) { | 183 | if (hashinfo->ehash_locks) { |
184 | #ifdef CONFIG_NUMA | 184 | #ifdef CONFIG_NUMA |
185 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * | 185 | unsigned int size = (hashinfo->ehash_locks_mask + 1) * |
186 | sizeof(rwlock_t); | 186 | sizeof(rwlock_t); |
187 | if (size > PAGE_SIZE) | 187 | if (size > PAGE_SIZE) |
188 | vfree(hashinfo->ehash_locks); | 188 | vfree(hashinfo->ehash_locks); |
189 | else | 189 | else |
190 | #endif | 190 | #endif |
191 | kfree(hashinfo->ehash_locks); | 191 | kfree(hashinfo->ehash_locks); |
192 | hashinfo->ehash_locks = NULL; | 192 | hashinfo->ehash_locks = NULL; |
193 | } | 193 | } |
194 | } | 194 | } |
195 | 195 | ||
196 | extern struct inet_bind_bucket * | 196 | extern struct inet_bind_bucket * |
197 | inet_bind_bucket_create(struct kmem_cache *cachep, | 197 | inet_bind_bucket_create(struct kmem_cache *cachep, |
198 | struct net *net, | 198 | struct net *net, |
199 | struct inet_bind_hashbucket *head, | 199 | struct inet_bind_hashbucket *head, |
200 | const unsigned short snum); | 200 | const unsigned short snum); |
201 | extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, | 201 | extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, |
202 | struct inet_bind_bucket *tb); | 202 | struct inet_bind_bucket *tb); |
203 | 203 | ||
204 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) | 204 | static inline int inet_bhashfn(const __u16 lport, const int bhash_size) |
205 | { | 205 | { |
206 | return lport & (bhash_size - 1); | 206 | return lport & (bhash_size - 1); |
207 | } | 207 | } |
208 | 208 | ||
209 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | 209 | extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, |
210 | const unsigned short snum); | 210 | const unsigned short snum); |
211 | 211 | ||
212 | /* These can have wildcards, don't try too hard. */ | 212 | /* These can have wildcards, don't try too hard. */ |
213 | static inline int inet_lhashfn(const unsigned short num) | 213 | static inline int inet_lhashfn(const unsigned short num) |
214 | { | 214 | { |
215 | return num & (INET_LHTABLE_SIZE - 1); | 215 | return num & (INET_LHTABLE_SIZE - 1); |
216 | } | 216 | } |
217 | 217 | ||
218 | static inline int inet_sk_listen_hashfn(const struct sock *sk) | 218 | static inline int inet_sk_listen_hashfn(const struct sock *sk) |
219 | { | 219 | { |
220 | return inet_lhashfn(inet_sk(sk)->num); | 220 | return inet_lhashfn(inet_sk(sk)->num); |
221 | } | 221 | } |
222 | 222 | ||
223 | /* Caller must disable local BH processing. */ | 223 | /* Caller must disable local BH processing. */ |
224 | static inline void __inet_inherit_port(struct sock *sk, struct sock *child) | 224 | extern void __inet_inherit_port(struct sock *sk, struct sock *child); |
225 | { | ||
226 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | ||
227 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | ||
228 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | ||
229 | struct inet_bind_bucket *tb; | ||
230 | |||
231 | spin_lock(&head->lock); | ||
232 | tb = inet_csk(sk)->icsk_bind_hash; | ||
233 | sk_add_bind_node(child, &tb->owners); | ||
234 | inet_csk(child)->icsk_bind_hash = tb; | ||
235 | spin_unlock(&head->lock); | ||
236 | } | ||
237 | 225 | ||
238 | extern void inet_put_port(struct sock *sk); | 226 | extern void inet_put_port(struct sock *sk); |
239 | 227 | ||
240 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); | 228 | extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); |
241 | 229 | ||
242 | /* | 230 | /* |
243 | * - We may sleep inside this lock. | 231 | * - We may sleep inside this lock. |
244 | * - If sleeping is not required (or called from BH), | 232 | * - If sleeping is not required (or called from BH), |
245 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). | 233 | * use plain read_(un)lock(&inet_hashinfo.lhash_lock). |
246 | */ | 234 | */ |
247 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) | 235 | static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) |
248 | { | 236 | { |
249 | /* read_lock synchronizes to candidates to writers */ | 237 | /* read_lock synchronizes to candidates to writers */ |
250 | read_lock(&hashinfo->lhash_lock); | 238 | read_lock(&hashinfo->lhash_lock); |
251 | atomic_inc(&hashinfo->lhash_users); | 239 | atomic_inc(&hashinfo->lhash_users); |
252 | read_unlock(&hashinfo->lhash_lock); | 240 | read_unlock(&hashinfo->lhash_lock); |
253 | } | 241 | } |
254 | 242 | ||
255 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) | 243 | static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) |
256 | { | 244 | { |
257 | if (atomic_dec_and_test(&hashinfo->lhash_users)) | 245 | if (atomic_dec_and_test(&hashinfo->lhash_users)) |
258 | wake_up(&hashinfo->lhash_wait); | 246 | wake_up(&hashinfo->lhash_wait); |
259 | } | 247 | } |
260 | 248 | ||
261 | extern void __inet_hash_nolisten(struct sock *sk); | 249 | extern void __inet_hash_nolisten(struct sock *sk); |
262 | extern void inet_hash(struct sock *sk); | 250 | extern void inet_hash(struct sock *sk); |
263 | extern void inet_unhash(struct sock *sk); | 251 | extern void inet_unhash(struct sock *sk); |
264 | 252 | ||
265 | extern struct sock *__inet_lookup_listener(struct net *net, | 253 | extern struct sock *__inet_lookup_listener(struct net *net, |
266 | struct inet_hashinfo *hashinfo, | 254 | struct inet_hashinfo *hashinfo, |
267 | const __be32 daddr, | 255 | const __be32 daddr, |
268 | const unsigned short hnum, | 256 | const unsigned short hnum, |
269 | const int dif); | 257 | const int dif); |
270 | 258 | ||
271 | static inline struct sock *inet_lookup_listener(struct net *net, | 259 | static inline struct sock *inet_lookup_listener(struct net *net, |
272 | struct inet_hashinfo *hashinfo, | 260 | struct inet_hashinfo *hashinfo, |
273 | __be32 daddr, __be16 dport, int dif) | 261 | __be32 daddr, __be16 dport, int dif) |
274 | { | 262 | { |
275 | return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); | 263 | return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); |
276 | } | 264 | } |
277 | 265 | ||
278 | /* Socket demux engine toys. */ | 266 | /* Socket demux engine toys. */ |
279 | /* What happens here is ugly; there's a pair of adjacent fields in | 267 | /* What happens here is ugly; there's a pair of adjacent fields in |
280 | struct inet_sock; __be16 dport followed by __u16 num. We want to | 268 | struct inet_sock; __be16 dport followed by __u16 num. We want to |
281 | search by pair, so we combine the keys into a single 32bit value | 269 | search by pair, so we combine the keys into a single 32bit value |
282 | and compare with 32bit value read from &...->dport. Let's at least | 270 | and compare with 32bit value read from &...->dport. Let's at least |
283 | make sure that it's not mixed with anything else... | 271 | make sure that it's not mixed with anything else... |
284 | On 64bit targets we combine comparisons with pair of adjacent __be32 | 272 | On 64bit targets we combine comparisons with pair of adjacent __be32 |
285 | fields in the same way. | 273 | fields in the same way. |
286 | */ | 274 | */ |
287 | typedef __u32 __bitwise __portpair; | 275 | typedef __u32 __bitwise __portpair; |
288 | #ifdef __BIG_ENDIAN | 276 | #ifdef __BIG_ENDIAN |
289 | #define INET_COMBINED_PORTS(__sport, __dport) \ | 277 | #define INET_COMBINED_PORTS(__sport, __dport) \ |
290 | ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport))) | 278 | ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport))) |
291 | #else /* __LITTLE_ENDIAN */ | 279 | #else /* __LITTLE_ENDIAN */ |
292 | #define INET_COMBINED_PORTS(__sport, __dport) \ | 280 | #define INET_COMBINED_PORTS(__sport, __dport) \ |
293 | ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) | 281 | ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) |
294 | #endif | 282 | #endif |
295 | 283 | ||
296 | #if (BITS_PER_LONG == 64) | 284 | #if (BITS_PER_LONG == 64) |
297 | typedef __u64 __bitwise __addrpair; | 285 | typedef __u64 __bitwise __addrpair; |
298 | #ifdef __BIG_ENDIAN | 286 | #ifdef __BIG_ENDIAN |
299 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | 287 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ |
300 | const __addrpair __name = (__force __addrpair) ( \ | 288 | const __addrpair __name = (__force __addrpair) ( \ |
301 | (((__force __u64)(__be32)(__saddr)) << 32) | \ | 289 | (((__force __u64)(__be32)(__saddr)) << 32) | \ |
302 | ((__force __u64)(__be32)(__daddr))); | 290 | ((__force __u64)(__be32)(__daddr))); |
303 | #else /* __LITTLE_ENDIAN */ | 291 | #else /* __LITTLE_ENDIAN */ |
304 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ | 292 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ |
305 | const __addrpair __name = (__force __addrpair) ( \ | 293 | const __addrpair __name = (__force __addrpair) ( \ |
306 | (((__force __u64)(__be32)(__daddr)) << 32) | \ | 294 | (((__force __u64)(__be32)(__daddr)) << 32) | \ |
307 | ((__force __u64)(__be32)(__saddr))); | 295 | ((__force __u64)(__be32)(__saddr))); |
308 | #endif /* __BIG_ENDIAN */ | 296 | #endif /* __BIG_ENDIAN */ |
309 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 297 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
310 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 298 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ |
311 | ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ | 299 | ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ |
312 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 300 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
313 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 301 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
314 | #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ | 302 | #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ |
315 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 303 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ |
316 | ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ | 304 | ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ |
317 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 305 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |
318 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 306 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
319 | #else /* 32-bit arch */ | 307 | #else /* 32-bit arch */ |
320 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) | 308 | #define INET_ADDR_COOKIE(__name, __saddr, __daddr) |
321 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ | 309 | #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ |
322 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 310 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ |
323 | (inet_sk(__sk)->daddr == (__saddr)) && \ | 311 | (inet_sk(__sk)->daddr == (__saddr)) && \ |
324 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ | 312 | (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ |
325 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ | 313 | ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ |
326 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 314 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
327 | #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ | 315 | #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ |
328 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ | 316 | (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ |
329 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ | 317 | (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ |
330 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ | 318 | (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ |
331 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ | 319 | ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ |
332 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) | 320 | (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) |
333 | #endif /* 64-bit arch */ | 321 | #endif /* 64-bit arch */ |
334 | 322 | ||
335 | /* | 323 | /* |
336 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need | 324 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need |
337 | * not check it for lookups anymore, thanks Alexey. -DaveM | 325 | * not check it for lookups anymore, thanks Alexey. -DaveM |
338 | * | 326 | * |
339 | * Local BH must be disabled here. | 327 | * Local BH must be disabled here. |
340 | */ | 328 | */ |
341 | extern struct sock * __inet_lookup_established(struct net *net, | 329 | extern struct sock * __inet_lookup_established(struct net *net, |
342 | struct inet_hashinfo *hashinfo, | 330 | struct inet_hashinfo *hashinfo, |
343 | const __be32 saddr, const __be16 sport, | 331 | const __be32 saddr, const __be16 sport, |
344 | const __be32 daddr, const u16 hnum, const int dif); | 332 | const __be32 daddr, const u16 hnum, const int dif); |
345 | 333 | ||
346 | static inline struct sock * | 334 | static inline struct sock * |
347 | inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, | 335 | inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, |
348 | const __be32 saddr, const __be16 sport, | 336 | const __be32 saddr, const __be16 sport, |
349 | const __be32 daddr, const __be16 dport, | 337 | const __be32 daddr, const __be16 dport, |
350 | const int dif) | 338 | const int dif) |
351 | { | 339 | { |
352 | return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, | 340 | return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, |
353 | ntohs(dport), dif); | 341 | ntohs(dport), dif); |
354 | } | 342 | } |
355 | 343 | ||
356 | static inline struct sock *__inet_lookup(struct net *net, | 344 | static inline struct sock *__inet_lookup(struct net *net, |
357 | struct inet_hashinfo *hashinfo, | 345 | struct inet_hashinfo *hashinfo, |
358 | const __be32 saddr, const __be16 sport, | 346 | const __be32 saddr, const __be16 sport, |
359 | const __be32 daddr, const __be16 dport, | 347 | const __be32 daddr, const __be16 dport, |
360 | const int dif) | 348 | const int dif) |
361 | { | 349 | { |
362 | u16 hnum = ntohs(dport); | 350 | u16 hnum = ntohs(dport); |
363 | struct sock *sk = __inet_lookup_established(net, hashinfo, | 351 | struct sock *sk = __inet_lookup_established(net, hashinfo, |
364 | saddr, sport, daddr, hnum, dif); | 352 | saddr, sport, daddr, hnum, dif); |
365 | 353 | ||
366 | return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); | 354 | return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); |
367 | } | 355 | } |
368 | 356 | ||
369 | static inline struct sock *inet_lookup(struct net *net, | 357 | static inline struct sock *inet_lookup(struct net *net, |
370 | struct inet_hashinfo *hashinfo, | 358 | struct inet_hashinfo *hashinfo, |
371 | const __be32 saddr, const __be16 sport, | 359 | const __be32 saddr, const __be16 sport, |
372 | const __be32 daddr, const __be16 dport, | 360 | const __be32 daddr, const __be16 dport, |
373 | const int dif) | 361 | const int dif) |
374 | { | 362 | { |
375 | struct sock *sk; | 363 | struct sock *sk; |
376 | 364 | ||
377 | local_bh_disable(); | 365 | local_bh_disable(); |
378 | sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif); | 366 | sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif); |
379 | local_bh_enable(); | 367 | local_bh_enable(); |
380 | 368 | ||
381 | return sk; | 369 | return sk; |
382 | } | 370 | } |
383 | 371 | ||
384 | extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, | 372 | extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
385 | struct sock *sk, u32 port_offset, | 373 | struct sock *sk, u32 port_offset, |
386 | int (*check_established)(struct inet_timewait_death_row *, | 374 | int (*check_established)(struct inet_timewait_death_row *, |
387 | struct sock *, __u16, struct inet_timewait_sock **), | 375 | struct sock *, __u16, struct inet_timewait_sock **), |
388 | void (*hash)(struct sock *sk)); | 376 | void (*hash)(struct sock *sk)); |
389 | extern int inet_hash_connect(struct inet_timewait_death_row *death_row, | 377 | extern int inet_hash_connect(struct inet_timewait_death_row *death_row, |
390 | struct sock *sk); | 378 | struct sock *sk); |
391 | #endif /* _INET_HASHTABLES_H */ | 379 | #endif /* _INET_HASHTABLES_H */ |
392 | 380 |
net/ipv4/inet_hashtables.c
1 | /* | 1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
3 | * operating system. INET is implemented using the BSD Socket | 3 | * operating system. INET is implemented using the BSD Socket |
4 | * interface as the means of communication with the user level. | 4 | * interface as the means of communication with the user level. |
5 | * | 5 | * |
6 | * Generic INET transport hashtables | 6 | * Generic INET transport hashtables |
7 | * | 7 | * |
8 | * Authors: Lotsa people, from code originally in tcp | 8 | * Authors: Lotsa people, from code originally in tcp |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public License | 11 | * modify it under the terms of the GNU General Public License |
12 | * as published by the Free Software Foundation; either version | 12 | * as published by the Free Software Foundation; either version |
13 | * 2 of the License, or (at your option) any later version. | 13 | * 2 of the License, or (at your option) any later version. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/random.h> | 17 | #include <linux/random.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/wait.h> | 20 | #include <linux/wait.h> |
21 | 21 | ||
22 | #include <net/inet_connection_sock.h> | 22 | #include <net/inet_connection_sock.h> |
23 | #include <net/inet_hashtables.h> | 23 | #include <net/inet_hashtables.h> |
24 | #include <net/ip.h> | 24 | #include <net/ip.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Allocate and initialize a new local port bind bucket. | 27 | * Allocate and initialize a new local port bind bucket. |
28 | * The bindhash mutex for snum's hash chain must be held here. | 28 | * The bindhash mutex for snum's hash chain must be held here. |
29 | */ | 29 | */ |
30 | struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, | 30 | struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, |
31 | struct net *net, | 31 | struct net *net, |
32 | struct inet_bind_hashbucket *head, | 32 | struct inet_bind_hashbucket *head, |
33 | const unsigned short snum) | 33 | const unsigned short snum) |
34 | { | 34 | { |
35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); | 35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); |
36 | 36 | ||
37 | if (tb != NULL) { | 37 | if (tb != NULL) { |
38 | tb->ib_net = hold_net(net); | 38 | tb->ib_net = hold_net(net); |
39 | tb->port = snum; | 39 | tb->port = snum; |
40 | tb->fastreuse = 0; | 40 | tb->fastreuse = 0; |
41 | INIT_HLIST_HEAD(&tb->owners); | 41 | INIT_HLIST_HEAD(&tb->owners); |
42 | hlist_add_head(&tb->node, &head->chain); | 42 | hlist_add_head(&tb->node, &head->chain); |
43 | } | 43 | } |
44 | return tb; | 44 | return tb; |
45 | } | 45 | } |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Caller must hold hashbucket lock for this tb with local BH disabled | 48 | * Caller must hold hashbucket lock for this tb with local BH disabled |
49 | */ | 49 | */ |
50 | void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) | 50 | void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) |
51 | { | 51 | { |
52 | if (hlist_empty(&tb->owners)) { | 52 | if (hlist_empty(&tb->owners)) { |
53 | __hlist_del(&tb->node); | 53 | __hlist_del(&tb->node); |
54 | release_net(tb->ib_net); | 54 | release_net(tb->ib_net); |
55 | kmem_cache_free(cachep, tb); | 55 | kmem_cache_free(cachep, tb); |
56 | } | 56 | } |
57 | } | 57 | } |
58 | 58 | ||
59 | void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | 59 | void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, |
60 | const unsigned short snum) | 60 | const unsigned short snum) |
61 | { | 61 | { |
62 | inet_sk(sk)->num = snum; | 62 | inet_sk(sk)->num = snum; |
63 | sk_add_bind_node(sk, &tb->owners); | 63 | sk_add_bind_node(sk, &tb->owners); |
64 | inet_csk(sk)->icsk_bind_hash = tb; | 64 | inet_csk(sk)->icsk_bind_hash = tb; |
65 | } | 65 | } |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * Get rid of any references to a local port held by the given sock. | 68 | * Get rid of any references to a local port held by the given sock. |
69 | */ | 69 | */ |
70 | static void __inet_put_port(struct sock *sk) | 70 | static void __inet_put_port(struct sock *sk) |
71 | { | 71 | { |
72 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 72 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
73 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); | 73 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); |
74 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | 74 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; |
75 | struct inet_bind_bucket *tb; | 75 | struct inet_bind_bucket *tb; |
76 | 76 | ||
77 | spin_lock(&head->lock); | 77 | spin_lock(&head->lock); |
78 | tb = inet_csk(sk)->icsk_bind_hash; | 78 | tb = inet_csk(sk)->icsk_bind_hash; |
79 | __sk_del_bind_node(sk); | 79 | __sk_del_bind_node(sk); |
80 | inet_csk(sk)->icsk_bind_hash = NULL; | 80 | inet_csk(sk)->icsk_bind_hash = NULL; |
81 | inet_sk(sk)->num = 0; | 81 | inet_sk(sk)->num = 0; |
82 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | 82 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); |
83 | spin_unlock(&head->lock); | 83 | spin_unlock(&head->lock); |
84 | } | 84 | } |
85 | 85 | ||
86 | void inet_put_port(struct sock *sk) | 86 | void inet_put_port(struct sock *sk) |
87 | { | 87 | { |
88 | local_bh_disable(); | 88 | local_bh_disable(); |
89 | __inet_put_port(sk); | 89 | __inet_put_port(sk); |
90 | local_bh_enable(); | 90 | local_bh_enable(); |
91 | } | 91 | } |
92 | 92 | ||
93 | EXPORT_SYMBOL(inet_put_port); | 93 | EXPORT_SYMBOL(inet_put_port); |
94 | 94 | ||
95 | void __inet_inherit_port(struct sock *sk, struct sock *child) | ||
96 | { | ||
97 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | ||
98 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | ||
99 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | ||
100 | struct inet_bind_bucket *tb; | ||
101 | |||
102 | spin_lock(&head->lock); | ||
103 | tb = inet_csk(sk)->icsk_bind_hash; | ||
104 | sk_add_bind_node(child, &tb->owners); | ||
105 | inet_csk(child)->icsk_bind_hash = tb; | ||
106 | spin_unlock(&head->lock); | ||
107 | } | ||
108 | |||
109 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | ||
110 | |||
95 | /* | 111 | /* |
96 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | 112 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. |
97 | * Look, when several writers sleep and reader wakes them up, all but one | 113 | * Look, when several writers sleep and reader wakes them up, all but one |
98 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves | 114 | * immediately hit write lock and grab all the cpus. Exclusive sleep solves |
99 | * this, _but_ remember, it adds useless work on UP machines (wake up each | 115 | * this, _but_ remember, it adds useless work on UP machines (wake up each |
100 | * exclusive lock release). It should be ifdefed really. | 116 | * exclusive lock release). It should be ifdefed really. |
101 | */ | 117 | */ |
102 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) | 118 | void inet_listen_wlock(struct inet_hashinfo *hashinfo) |
103 | __acquires(hashinfo->lhash_lock) | 119 | __acquires(hashinfo->lhash_lock) |
104 | { | 120 | { |
105 | write_lock(&hashinfo->lhash_lock); | 121 | write_lock(&hashinfo->lhash_lock); |
106 | 122 | ||
107 | if (atomic_read(&hashinfo->lhash_users)) { | 123 | if (atomic_read(&hashinfo->lhash_users)) { |
108 | DEFINE_WAIT(wait); | 124 | DEFINE_WAIT(wait); |
109 | 125 | ||
110 | for (;;) { | 126 | for (;;) { |
111 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, | 127 | prepare_to_wait_exclusive(&hashinfo->lhash_wait, |
112 | &wait, TASK_UNINTERRUPTIBLE); | 128 | &wait, TASK_UNINTERRUPTIBLE); |
113 | if (!atomic_read(&hashinfo->lhash_users)) | 129 | if (!atomic_read(&hashinfo->lhash_users)) |
114 | break; | 130 | break; |
115 | write_unlock_bh(&hashinfo->lhash_lock); | 131 | write_unlock_bh(&hashinfo->lhash_lock); |
116 | schedule(); | 132 | schedule(); |
117 | write_lock_bh(&hashinfo->lhash_lock); | 133 | write_lock_bh(&hashinfo->lhash_lock); |
118 | } | 134 | } |
119 | 135 | ||
120 | finish_wait(&hashinfo->lhash_wait, &wait); | 136 | finish_wait(&hashinfo->lhash_wait, &wait); |
121 | } | 137 | } |
122 | } | 138 | } |
123 | 139 | ||
124 | /* | 140 | /* |
125 | * Don't inline this cruft. Here are some nice properties to exploit here. The | 141 | * Don't inline this cruft. Here are some nice properties to exploit here. The |
126 | * BSD API does not allow a listening sock to specify the remote port nor the | 142 | * BSD API does not allow a listening sock to specify the remote port nor the |
127 | * remote address for the connection. So always assume those are both | 143 | * remote address for the connection. So always assume those are both |
128 | * wildcarded during the search since they can never be otherwise. | 144 | * wildcarded during the search since they can never be otherwise. |
129 | */ | 145 | */ |
130 | static struct sock *inet_lookup_listener_slow(struct net *net, | 146 | static struct sock *inet_lookup_listener_slow(struct net *net, |
131 | const struct hlist_head *head, | 147 | const struct hlist_head *head, |
132 | const __be32 daddr, | 148 | const __be32 daddr, |
133 | const unsigned short hnum, | 149 | const unsigned short hnum, |
134 | const int dif) | 150 | const int dif) |
135 | { | 151 | { |
136 | struct sock *result = NULL, *sk; | 152 | struct sock *result = NULL, *sk; |
137 | const struct hlist_node *node; | 153 | const struct hlist_node *node; |
138 | int hiscore = -1; | 154 | int hiscore = -1; |
139 | 155 | ||
140 | sk_for_each(sk, node, head) { | 156 | sk_for_each(sk, node, head) { |
141 | const struct inet_sock *inet = inet_sk(sk); | 157 | const struct inet_sock *inet = inet_sk(sk); |
142 | 158 | ||
143 | if (net_eq(sock_net(sk), net) && inet->num == hnum && | 159 | if (net_eq(sock_net(sk), net) && inet->num == hnum && |
144 | !ipv6_only_sock(sk)) { | 160 | !ipv6_only_sock(sk)) { |
145 | const __be32 rcv_saddr = inet->rcv_saddr; | 161 | const __be32 rcv_saddr = inet->rcv_saddr; |
146 | int score = sk->sk_family == PF_INET ? 1 : 0; | 162 | int score = sk->sk_family == PF_INET ? 1 : 0; |
147 | 163 | ||
148 | if (rcv_saddr) { | 164 | if (rcv_saddr) { |
149 | if (rcv_saddr != daddr) | 165 | if (rcv_saddr != daddr) |
150 | continue; | 166 | continue; |
151 | score += 2; | 167 | score += 2; |
152 | } | 168 | } |
153 | if (sk->sk_bound_dev_if) { | 169 | if (sk->sk_bound_dev_if) { |
154 | if (sk->sk_bound_dev_if != dif) | 170 | if (sk->sk_bound_dev_if != dif) |
155 | continue; | 171 | continue; |
156 | score += 2; | 172 | score += 2; |
157 | } | 173 | } |
158 | if (score == 5) | 174 | if (score == 5) |
159 | return sk; | 175 | return sk; |
160 | if (score > hiscore) { | 176 | if (score > hiscore) { |
161 | hiscore = score; | 177 | hiscore = score; |
162 | result = sk; | 178 | result = sk; |
163 | } | 179 | } |
164 | } | 180 | } |
165 | } | 181 | } |
166 | return result; | 182 | return result; |
167 | } | 183 | } |
168 | 184 | ||
169 | /* Optimize the common listener case. */ | 185 | /* Optimize the common listener case. */ |
170 | struct sock *__inet_lookup_listener(struct net *net, | 186 | struct sock *__inet_lookup_listener(struct net *net, |
171 | struct inet_hashinfo *hashinfo, | 187 | struct inet_hashinfo *hashinfo, |
172 | const __be32 daddr, const unsigned short hnum, | 188 | const __be32 daddr, const unsigned short hnum, |
173 | const int dif) | 189 | const int dif) |
174 | { | 190 | { |
175 | struct sock *sk = NULL; | 191 | struct sock *sk = NULL; |
176 | const struct hlist_head *head; | 192 | const struct hlist_head *head; |
177 | 193 | ||
178 | read_lock(&hashinfo->lhash_lock); | 194 | read_lock(&hashinfo->lhash_lock); |
179 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; | 195 | head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; |
180 | if (!hlist_empty(head)) { | 196 | if (!hlist_empty(head)) { |
181 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); | 197 | const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); |
182 | 198 | ||
183 | if (inet->num == hnum && !sk->sk_node.next && | 199 | if (inet->num == hnum && !sk->sk_node.next && |
184 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 200 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
185 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 201 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
186 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) | 202 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) |
187 | goto sherry_cache; | 203 | goto sherry_cache; |
188 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); | 204 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); |
189 | } | 205 | } |
190 | if (sk) { | 206 | if (sk) { |
191 | sherry_cache: | 207 | sherry_cache: |
192 | sock_hold(sk); | 208 | sock_hold(sk); |
193 | } | 209 | } |
194 | read_unlock(&hashinfo->lhash_lock); | 210 | read_unlock(&hashinfo->lhash_lock); |
195 | return sk; | 211 | return sk; |
196 | } | 212 | } |
197 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 213 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
198 | 214 | ||
199 | struct sock * __inet_lookup_established(struct net *net, | 215 | struct sock * __inet_lookup_established(struct net *net, |
200 | struct inet_hashinfo *hashinfo, | 216 | struct inet_hashinfo *hashinfo, |
201 | const __be32 saddr, const __be16 sport, | 217 | const __be32 saddr, const __be16 sport, |
202 | const __be32 daddr, const u16 hnum, | 218 | const __be32 daddr, const u16 hnum, |
203 | const int dif) | 219 | const int dif) |
204 | { | 220 | { |
205 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 221 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
206 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); | 222 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
207 | struct sock *sk; | 223 | struct sock *sk; |
208 | const struct hlist_node *node; | 224 | const struct hlist_node *node; |
209 | /* Optimize here for direct hit, only listening connections can | 225 | /* Optimize here for direct hit, only listening connections can |
210 | * have wildcards anyways. | 226 | * have wildcards anyways. |
211 | */ | 227 | */ |
212 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); | 228 | unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); |
213 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); | 229 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); |
214 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); | 230 | rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); |
215 | 231 | ||
216 | prefetch(head->chain.first); | 232 | prefetch(head->chain.first); |
217 | read_lock(lock); | 233 | read_lock(lock); |
218 | sk_for_each(sk, node, &head->chain) { | 234 | sk_for_each(sk, node, &head->chain) { |
219 | if (INET_MATCH(sk, net, hash, acookie, | 235 | if (INET_MATCH(sk, net, hash, acookie, |
220 | saddr, daddr, ports, dif)) | 236 | saddr, daddr, ports, dif)) |
221 | goto hit; /* You sunk my battleship! */ | 237 | goto hit; /* You sunk my battleship! */ |
222 | } | 238 | } |
223 | 239 | ||
224 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 240 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
225 | sk_for_each(sk, node, &head->twchain) { | 241 | sk_for_each(sk, node, &head->twchain) { |
226 | if (INET_TW_MATCH(sk, net, hash, acookie, | 242 | if (INET_TW_MATCH(sk, net, hash, acookie, |
227 | saddr, daddr, ports, dif)) | 243 | saddr, daddr, ports, dif)) |
228 | goto hit; | 244 | goto hit; |
229 | } | 245 | } |
230 | sk = NULL; | 246 | sk = NULL; |
231 | out: | 247 | out: |
232 | read_unlock(lock); | 248 | read_unlock(lock); |
233 | return sk; | 249 | return sk; |
234 | hit: | 250 | hit: |
235 | sock_hold(sk); | 251 | sock_hold(sk); |
236 | goto out; | 252 | goto out; |
237 | } | 253 | } |
238 | EXPORT_SYMBOL_GPL(__inet_lookup_established); | 254 | EXPORT_SYMBOL_GPL(__inet_lookup_established); |
239 | 255 | ||
240 | /* called with local bh disabled */ | 256 | /* called with local bh disabled */ |
241 | static int __inet_check_established(struct inet_timewait_death_row *death_row, | 257 | static int __inet_check_established(struct inet_timewait_death_row *death_row, |
242 | struct sock *sk, __u16 lport, | 258 | struct sock *sk, __u16 lport, |
243 | struct inet_timewait_sock **twp) | 259 | struct inet_timewait_sock **twp) |
244 | { | 260 | { |
245 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 261 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
246 | struct inet_sock *inet = inet_sk(sk); | 262 | struct inet_sock *inet = inet_sk(sk); |
247 | __be32 daddr = inet->rcv_saddr; | 263 | __be32 daddr = inet->rcv_saddr; |
248 | __be32 saddr = inet->daddr; | 264 | __be32 saddr = inet->daddr; |
249 | int dif = sk->sk_bound_dev_if; | 265 | int dif = sk->sk_bound_dev_if; |
250 | INET_ADDR_COOKIE(acookie, saddr, daddr) | 266 | INET_ADDR_COOKIE(acookie, saddr, daddr) |
251 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); | 267 | const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); |
252 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); | 268 | unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); |
253 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); | 269 | struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); |
254 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); | 270 | rwlock_t *lock = inet_ehash_lockp(hinfo, hash); |
255 | struct sock *sk2; | 271 | struct sock *sk2; |
256 | const struct hlist_node *node; | 272 | const struct hlist_node *node; |
257 | struct inet_timewait_sock *tw; | 273 | struct inet_timewait_sock *tw; |
258 | struct net *net = sock_net(sk); | 274 | struct net *net = sock_net(sk); |
259 | 275 | ||
260 | prefetch(head->chain.first); | 276 | prefetch(head->chain.first); |
261 | write_lock(lock); | 277 | write_lock(lock); |
262 | 278 | ||
263 | /* Check TIME-WAIT sockets first. */ | 279 | /* Check TIME-WAIT sockets first. */ |
264 | sk_for_each(sk2, node, &head->twchain) { | 280 | sk_for_each(sk2, node, &head->twchain) { |
265 | tw = inet_twsk(sk2); | 281 | tw = inet_twsk(sk2); |
266 | 282 | ||
267 | if (INET_TW_MATCH(sk2, net, hash, acookie, | 283 | if (INET_TW_MATCH(sk2, net, hash, acookie, |
268 | saddr, daddr, ports, dif)) { | 284 | saddr, daddr, ports, dif)) { |
269 | if (twsk_unique(sk, sk2, twp)) | 285 | if (twsk_unique(sk, sk2, twp)) |
270 | goto unique; | 286 | goto unique; |
271 | else | 287 | else |
272 | goto not_unique; | 288 | goto not_unique; |
273 | } | 289 | } |
274 | } | 290 | } |
275 | tw = NULL; | 291 | tw = NULL; |
276 | 292 | ||
277 | /* And established part... */ | 293 | /* And established part... */ |
278 | sk_for_each(sk2, node, &head->chain) { | 294 | sk_for_each(sk2, node, &head->chain) { |
279 | if (INET_MATCH(sk2, net, hash, acookie, | 295 | if (INET_MATCH(sk2, net, hash, acookie, |
280 | saddr, daddr, ports, dif)) | 296 | saddr, daddr, ports, dif)) |
281 | goto not_unique; | 297 | goto not_unique; |
282 | } | 298 | } |
283 | 299 | ||
284 | unique: | 300 | unique: |
285 | /* Must record num and sport now. Otherwise we will see | 301 | /* Must record num and sport now. Otherwise we will see |
286 | * in hash table socket with a funny identity. */ | 302 | * in hash table socket with a funny identity. */ |
287 | inet->num = lport; | 303 | inet->num = lport; |
288 | inet->sport = htons(lport); | 304 | inet->sport = htons(lport); |
289 | sk->sk_hash = hash; | 305 | sk->sk_hash = hash; |
290 | BUG_TRAP(sk_unhashed(sk)); | 306 | BUG_TRAP(sk_unhashed(sk)); |
291 | __sk_add_node(sk, &head->chain); | 307 | __sk_add_node(sk, &head->chain); |
292 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 308 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
293 | write_unlock(lock); | 309 | write_unlock(lock); |
294 | 310 | ||
295 | if (twp) { | 311 | if (twp) { |
296 | *twp = tw; | 312 | *twp = tw; |
297 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 313 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
298 | } else if (tw) { | 314 | } else if (tw) { |
299 | /* Silly. Should hash-dance instead... */ | 315 | /* Silly. Should hash-dance instead... */ |
300 | inet_twsk_deschedule(tw, death_row); | 316 | inet_twsk_deschedule(tw, death_row); |
301 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 317 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
302 | 318 | ||
303 | inet_twsk_put(tw); | 319 | inet_twsk_put(tw); |
304 | } | 320 | } |
305 | 321 | ||
306 | return 0; | 322 | return 0; |
307 | 323 | ||
308 | not_unique: | 324 | not_unique: |
309 | write_unlock(lock); | 325 | write_unlock(lock); |
310 | return -EADDRNOTAVAIL; | 326 | return -EADDRNOTAVAIL; |
311 | } | 327 | } |
312 | 328 | ||
313 | static inline u32 inet_sk_port_offset(const struct sock *sk) | 329 | static inline u32 inet_sk_port_offset(const struct sock *sk) |
314 | { | 330 | { |
315 | const struct inet_sock *inet = inet_sk(sk); | 331 | const struct inet_sock *inet = inet_sk(sk); |
316 | return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, | 332 | return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, |
317 | inet->dport); | 333 | inet->dport); |
318 | } | 334 | } |
319 | 335 | ||
320 | void __inet_hash_nolisten(struct sock *sk) | 336 | void __inet_hash_nolisten(struct sock *sk) |
321 | { | 337 | { |
322 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 338 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
323 | struct hlist_head *list; | 339 | struct hlist_head *list; |
324 | rwlock_t *lock; | 340 | rwlock_t *lock; |
325 | struct inet_ehash_bucket *head; | 341 | struct inet_ehash_bucket *head; |
326 | 342 | ||
327 | BUG_TRAP(sk_unhashed(sk)); | 343 | BUG_TRAP(sk_unhashed(sk)); |
328 | 344 | ||
329 | sk->sk_hash = inet_sk_ehashfn(sk); | 345 | sk->sk_hash = inet_sk_ehashfn(sk); |
330 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); | 346 | head = inet_ehash_bucket(hashinfo, sk->sk_hash); |
331 | list = &head->chain; | 347 | list = &head->chain; |
332 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 348 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
333 | 349 | ||
334 | write_lock(lock); | 350 | write_lock(lock); |
335 | __sk_add_node(sk, list); | 351 | __sk_add_node(sk, list); |
336 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 352 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
337 | write_unlock(lock); | 353 | write_unlock(lock); |
338 | } | 354 | } |
339 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 355 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
340 | 356 | ||
341 | static void __inet_hash(struct sock *sk) | 357 | static void __inet_hash(struct sock *sk) |
342 | { | 358 | { |
343 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 359 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
344 | struct hlist_head *list; | 360 | struct hlist_head *list; |
345 | rwlock_t *lock; | 361 | rwlock_t *lock; |
346 | 362 | ||
347 | if (sk->sk_state != TCP_LISTEN) { | 363 | if (sk->sk_state != TCP_LISTEN) { |
348 | __inet_hash_nolisten(sk); | 364 | __inet_hash_nolisten(sk); |
349 | return; | 365 | return; |
350 | } | 366 | } |
351 | 367 | ||
352 | BUG_TRAP(sk_unhashed(sk)); | 368 | BUG_TRAP(sk_unhashed(sk)); |
353 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; | 369 | list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; |
354 | lock = &hashinfo->lhash_lock; | 370 | lock = &hashinfo->lhash_lock; |
355 | 371 | ||
356 | inet_listen_wlock(hashinfo); | 372 | inet_listen_wlock(hashinfo); |
357 | __sk_add_node(sk, list); | 373 | __sk_add_node(sk, list); |
358 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 374 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
359 | write_unlock(lock); | 375 | write_unlock(lock); |
360 | wake_up(&hashinfo->lhash_wait); | 376 | wake_up(&hashinfo->lhash_wait); |
361 | } | 377 | } |
362 | 378 | ||
363 | void inet_hash(struct sock *sk) | 379 | void inet_hash(struct sock *sk) |
364 | { | 380 | { |
365 | if (sk->sk_state != TCP_CLOSE) { | 381 | if (sk->sk_state != TCP_CLOSE) { |
366 | local_bh_disable(); | 382 | local_bh_disable(); |
367 | __inet_hash(sk); | 383 | __inet_hash(sk); |
368 | local_bh_enable(); | 384 | local_bh_enable(); |
369 | } | 385 | } |
370 | } | 386 | } |
371 | EXPORT_SYMBOL_GPL(inet_hash); | 387 | EXPORT_SYMBOL_GPL(inet_hash); |
372 | 388 | ||
373 | void inet_unhash(struct sock *sk) | 389 | void inet_unhash(struct sock *sk) |
374 | { | 390 | { |
375 | rwlock_t *lock; | 391 | rwlock_t *lock; |
376 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 392 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
377 | 393 | ||
378 | if (sk_unhashed(sk)) | 394 | if (sk_unhashed(sk)) |
379 | goto out; | 395 | goto out; |
380 | 396 | ||
381 | if (sk->sk_state == TCP_LISTEN) { | 397 | if (sk->sk_state == TCP_LISTEN) { |
382 | local_bh_disable(); | 398 | local_bh_disable(); |
383 | inet_listen_wlock(hashinfo); | 399 | inet_listen_wlock(hashinfo); |
384 | lock = &hashinfo->lhash_lock; | 400 | lock = &hashinfo->lhash_lock; |
385 | } else { | 401 | } else { |
386 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | 402 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); |
387 | write_lock_bh(lock); | 403 | write_lock_bh(lock); |
388 | } | 404 | } |
389 | 405 | ||
390 | if (__sk_del_node_init(sk)) | 406 | if (__sk_del_node_init(sk)) |
391 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); | 407 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
392 | write_unlock_bh(lock); | 408 | write_unlock_bh(lock); |
393 | out: | 409 | out: |
394 | if (sk->sk_state == TCP_LISTEN) | 410 | if (sk->sk_state == TCP_LISTEN) |
395 | wake_up(&hashinfo->lhash_wait); | 411 | wake_up(&hashinfo->lhash_wait); |
396 | } | 412 | } |
397 | EXPORT_SYMBOL_GPL(inet_unhash); | 413 | EXPORT_SYMBOL_GPL(inet_unhash); |
398 | 414 | ||
399 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, | 415 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
400 | struct sock *sk, u32 port_offset, | 416 | struct sock *sk, u32 port_offset, |
401 | int (*check_established)(struct inet_timewait_death_row *, | 417 | int (*check_established)(struct inet_timewait_death_row *, |
402 | struct sock *, __u16, struct inet_timewait_sock **), | 418 | struct sock *, __u16, struct inet_timewait_sock **), |
403 | void (*hash)(struct sock *sk)) | 419 | void (*hash)(struct sock *sk)) |
404 | { | 420 | { |
405 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 421 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
406 | const unsigned short snum = inet_sk(sk)->num; | 422 | const unsigned short snum = inet_sk(sk)->num; |
407 | struct inet_bind_hashbucket *head; | 423 | struct inet_bind_hashbucket *head; |
408 | struct inet_bind_bucket *tb; | 424 | struct inet_bind_bucket *tb; |
409 | int ret; | 425 | int ret; |
410 | struct net *net = sock_net(sk); | 426 | struct net *net = sock_net(sk); |
411 | 427 | ||
412 | if (!snum) { | 428 | if (!snum) { |
413 | int i, remaining, low, high, port; | 429 | int i, remaining, low, high, port; |
414 | static u32 hint; | 430 | static u32 hint; |
415 | u32 offset = hint + port_offset; | 431 | u32 offset = hint + port_offset; |
416 | struct hlist_node *node; | 432 | struct hlist_node *node; |
417 | struct inet_timewait_sock *tw = NULL; | 433 | struct inet_timewait_sock *tw = NULL; |
418 | 434 | ||
419 | inet_get_local_port_range(&low, &high); | 435 | inet_get_local_port_range(&low, &high); |
420 | remaining = (high - low) + 1; | 436 | remaining = (high - low) + 1; |
421 | 437 | ||
422 | local_bh_disable(); | 438 | local_bh_disable(); |
423 | for (i = 1; i <= remaining; i++) { | 439 | for (i = 1; i <= remaining; i++) { |
424 | port = low + (i + offset) % remaining; | 440 | port = low + (i + offset) % remaining; |
425 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; | 441 | head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; |
426 | spin_lock(&head->lock); | 442 | spin_lock(&head->lock); |
427 | 443 | ||
428 | /* Does not bother with rcv_saddr checks, | 444 | /* Does not bother with rcv_saddr checks, |
429 | * because the established check is already | 445 | * because the established check is already |
430 | * unique enough. | 446 | * unique enough. |
431 | */ | 447 | */ |
432 | inet_bind_bucket_for_each(tb, node, &head->chain) { | 448 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
433 | if (tb->ib_net == net && tb->port == port) { | 449 | if (tb->ib_net == net && tb->port == port) { |
434 | BUG_TRAP(!hlist_empty(&tb->owners)); | 450 | BUG_TRAP(!hlist_empty(&tb->owners)); |
435 | if (tb->fastreuse >= 0) | 451 | if (tb->fastreuse >= 0) |
436 | goto next_port; | 452 | goto next_port; |
437 | if (!check_established(death_row, sk, | 453 | if (!check_established(death_row, sk, |
438 | port, &tw)) | 454 | port, &tw)) |
439 | goto ok; | 455 | goto ok; |
440 | goto next_port; | 456 | goto next_port; |
441 | } | 457 | } |
442 | } | 458 | } |
443 | 459 | ||
444 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, | 460 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, |
445 | net, head, port); | 461 | net, head, port); |
446 | if (!tb) { | 462 | if (!tb) { |
447 | spin_unlock(&head->lock); | 463 | spin_unlock(&head->lock); |
448 | break; | 464 | break; |
449 | } | 465 | } |
450 | tb->fastreuse = -1; | 466 | tb->fastreuse = -1; |
451 | goto ok; | 467 | goto ok; |
452 | 468 | ||
453 | next_port: | 469 | next_port: |
454 | spin_unlock(&head->lock); | 470 | spin_unlock(&head->lock); |
455 | } | 471 | } |
456 | local_bh_enable(); | 472 | local_bh_enable(); |
457 | 473 | ||
458 | return -EADDRNOTAVAIL; | 474 | return -EADDRNOTAVAIL; |
459 | 475 | ||
460 | ok: | 476 | ok: |
461 | hint += i; | 477 | hint += i; |
462 | 478 | ||
463 | /* Head lock still held and bh's disabled */ | 479 | /* Head lock still held and bh's disabled */ |
464 | inet_bind_hash(sk, tb, port); | 480 | inet_bind_hash(sk, tb, port); |
465 | if (sk_unhashed(sk)) { | 481 | if (sk_unhashed(sk)) { |
466 | inet_sk(sk)->sport = htons(port); | 482 | inet_sk(sk)->sport = htons(port); |
467 | hash(sk); | 483 | hash(sk); |
468 | } | 484 | } |
469 | spin_unlock(&head->lock); | 485 | spin_unlock(&head->lock); |
470 | 486 | ||
471 | if (tw) { | 487 | if (tw) { |
472 | inet_twsk_deschedule(tw, death_row); | 488 | inet_twsk_deschedule(tw, death_row); |
473 | inet_twsk_put(tw); | 489 | inet_twsk_put(tw); |
474 | } | 490 | } |
475 | 491 | ||
476 | ret = 0; | 492 | ret = 0; |
477 | goto out; | 493 | goto out; |
478 | } | 494 | } |
479 | 495 | ||
480 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; | 496 | head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; |
481 | tb = inet_csk(sk)->icsk_bind_hash; | 497 | tb = inet_csk(sk)->icsk_bind_hash; |
482 | spin_lock_bh(&head->lock); | 498 | spin_lock_bh(&head->lock); |
483 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 499 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
484 | hash(sk); | 500 | hash(sk); |
485 | spin_unlock_bh(&head->lock); | 501 | spin_unlock_bh(&head->lock); |
486 | return 0; | 502 | return 0; |
487 | } else { | 503 | } else { |
488 | spin_unlock(&head->lock); | 504 | spin_unlock(&head->lock); |
489 | /* No definite answer... Walk to established hash table */ | 505 | /* No definite answer... Walk to established hash table */ |
490 | ret = check_established(death_row, sk, snum, NULL); | 506 | ret = check_established(death_row, sk, snum, NULL); |
491 | out: | 507 | out: |
492 | local_bh_enable(); | 508 | local_bh_enable(); |
493 | return ret; | 509 | return ret; |
494 | } | 510 | } |
495 | } | 511 | } |
496 | 512 | ||
497 | /* | 513 | /* |
498 | * Bind a port for a connect operation and hash it. | 514 | * Bind a port for a connect operation and hash it. |
499 | */ | 515 | */ |
500 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | 516 | int inet_hash_connect(struct inet_timewait_death_row *death_row, |
501 | struct sock *sk) | 517 | struct sock *sk) |
502 | { | 518 | { |
503 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), | 519 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), |
504 | __inet_check_established, __inet_hash_nolisten); | 520 | __inet_check_established, __inet_hash_nolisten); |
505 | } | 521 | } |
506 | 522 | ||
507 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 523 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
508 | 524 |