Blame view

net/ipv4/inet_hashtables.c 21.2 KB
2874c5fd2   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-or-later
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
2
3
4
5
6
7
8
9
  /*
   * INET		An implementation of the TCP/IP protocol suite for the LINUX
   *		operating system.  INET is implemented using the BSD Socket
   *		interface as the means of communication with the user level.
   *
   *		Generic INET transport hashtables
   *
   * Authors:	Lotsa people, from code originally in tcp
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
10
   */
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
11
  #include <linux/module.h>
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
12
  #include <linux/random.h>
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
13
  #include <linux/sched.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
14
  #include <linux/slab.h>
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
15
  #include <linux/wait.h>
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
16
  #include <linux/vmalloc.h>
57c8a661d   Mike Rapoport   mm: remove includ...
17
  #include <linux/memblock.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
18

c125e80b8   Craig Gallek   soreuseport: fast...
19
  #include <net/addrconf.h>
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
20
  #include <net/inet_connection_sock.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
21
  #include <net/inet_hashtables.h>
6e5714eaf   David S. Miller   net: Compute prot...
22
  #include <net/secure_seq.h>
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
23
  #include <net/ip.h>
a04a480d4   David Ahern   net: Require exac...
24
  #include <net/tcp.h>
c125e80b8   Craig Gallek   soreuseport: fast...
25
  #include <net/sock_reuseport.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
26

6eada0110   Eric Dumazet   netns: constify n...
27
28
29
  static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
  			const __u16 lport, const __be32 faddr,
  			const __be16 fport)
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
30
  {
1bbdceef1   Hannes Frederic Sowa   inet: convert ine...
31
32
33
  	static u32 inet_ehash_secret __read_mostly;
  
  	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
34
35
36
  	return __inet_ehashfn(laddr, lport, faddr, fport,
  			      inet_ehash_secret + net_hash_mix(net));
  }
d1e559d0b   Eric Dumazet   inet: add IPv6 su...
37
38
39
  /* This function handles inet_sock, but also timewait and request sockets
   * for IPv4/IPv6.
   */
784c372a8   Eric Dumazet   net: make sk_ehas...
40
  static u32 sk_ehashfn(const struct sock *sk)
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
41
  {
d1e559d0b   Eric Dumazet   inet: add IPv6 su...
42
43
44
45
46
47
48
  #if IS_ENABLED(CONFIG_IPV6)
  	if (sk->sk_family == AF_INET6 &&
  	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
  		return inet6_ehashfn(sock_net(sk),
  				     &sk->sk_v6_rcv_saddr, sk->sk_num,
  				     &sk->sk_v6_daddr, sk->sk_dport);
  #endif
5b441f76f   Eric Dumazet   net: introduce sk...
49
50
51
  	return inet_ehashfn(sock_net(sk),
  			    sk->sk_rcv_saddr, sk->sk_num,
  			    sk->sk_daddr, sk->sk_dport);
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
52
  }
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
53
54
55
56
  /*
   * Allocate and initialize a new local port bind bucket.
   * The bindhash mutex for snum's hash chain must be held here.
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
57
  struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
941b1d22c   Pavel Emelyanov   [NETNS]: Make bin...
58
  						 struct net *net,
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
59
  						 struct inet_bind_hashbucket *head,
3c82a21f4   Robert Shearman   net: allow bindin...
60
61
  						 const unsigned short snum,
  						 int l3mdev)
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
62
  {
54e6ecb23   Christoph Lameter   [PATCH] slab: rem...
63
  	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
64

00db41243   Ian Morris   ipv4: coding styl...
65
  	if (tb) {
efd7ef1c1   Eric W. Biederman   net: Kill hold_ne...
66
  		write_pnet(&tb->ib_net, net);
3c82a21f4   Robert Shearman   net: allow bindin...
67
  		tb->l3mdev    = l3mdev;
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
68
69
  		tb->port      = snum;
  		tb->fastreuse = 0;
da5e36308   Tom Herbert   soreuseport: TCP/...
70
  		tb->fastreuseport = 0;
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
71
72
73
74
75
  		INIT_HLIST_HEAD(&tb->owners);
  		hlist_add_head(&tb->node, &head->chain);
  	}
  	return tb;
  }
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
76
77
78
  /*
   * Caller must hold hashbucket lock for this tb with local BH disabled
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
79
  void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
80
81
82
83
84
85
  {
  	if (hlist_empty(&tb->owners)) {
  		__hlist_del(&tb->node);
  		kmem_cache_free(cachep, tb);
  	}
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
86
87
88
89
  
  void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
  		    const unsigned short snum)
  {
c720c7e83   Eric Dumazet   inet: rename some...
90
  	inet_sk(sk)->inet_num = snum;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
91
  	sk_add_bind_node(sk, &tb->owners);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
92
  	inet_csk(sk)->icsk_bind_hash = tb;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
93
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
94
95
96
  /*
   * Get rid of any references to a local port held by the given sock.
   */
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
97
  static void __inet_put_port(struct sock *sk)
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
98
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
99
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
c720c7e83   Eric Dumazet   inet: rename some...
100
  	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
7f635ab71   Pavel Emelyanov   inet: add struct ...
101
  			hashinfo->bhash_size);
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
102
103
104
105
  	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
  	struct inet_bind_bucket *tb;
  
  	spin_lock(&head->lock);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
106
  	tb = inet_csk(sk)->icsk_bind_hash;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
107
  	__sk_del_bind_node(sk);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
108
  	inet_csk(sk)->icsk_bind_hash = NULL;
c720c7e83   Eric Dumazet   inet: rename some...
109
  	inet_sk(sk)->inet_num = 0;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
110
111
112
  	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
  	spin_unlock(&head->lock);
  }
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
113
  void inet_put_port(struct sock *sk)
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
114
115
  {
  	local_bh_disable();
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
116
  	__inet_put_port(sk);
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
117
118
  	local_bh_enable();
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
119
  EXPORT_SYMBOL(inet_put_port);
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
120

1ce31c9e0   Eric Dumazet   inet: constify __...
121
  int __inet_inherit_port(const struct sock *sk, struct sock *child)
53083773d   Pavel Emelyanov   [INET]: Uninline ...
122
123
  {
  	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
093d28232   Balazs Scheidler   tproxy: fix hash ...
124
125
  	unsigned short port = inet_sk(child)->inet_num;
  	const int bhash = inet_bhashfn(sock_net(sk), port,
7f635ab71   Pavel Emelyanov   inet: add struct ...
126
  			table->bhash_size);
53083773d   Pavel Emelyanov   [INET]: Uninline ...
127
128
  	struct inet_bind_hashbucket *head = &table->bhash[bhash];
  	struct inet_bind_bucket *tb;
3c82a21f4   Robert Shearman   net: allow bindin...
129
  	int l3mdev;
53083773d   Pavel Emelyanov   [INET]: Uninline ...
130
131
132
  
  	spin_lock(&head->lock);
  	tb = inet_csk(sk)->icsk_bind_hash;
c2f34a65a   Eric Dumazet   tcp/dccp: fix pot...
133
134
135
136
  	if (unlikely(!tb)) {
  		spin_unlock(&head->lock);
  		return -ENOENT;
  	}
093d28232   Balazs Scheidler   tproxy: fix hash ...
137
  	if (tb->port != port) {
3c82a21f4   Robert Shearman   net: allow bindin...
138
  		l3mdev = inet_sk_bound_l3mdev(sk);
093d28232   Balazs Scheidler   tproxy: fix hash ...
139
140
141
142
143
  		/* NOTE: using tproxy and redirecting skbs to a proxy
  		 * on a different listener port breaks the assumption
  		 * that the listener socket's icsk_bind_hash is the same
  		 * as that of the child socket. We have to look up or
  		 * create a new bind bucket for the child here. */
b67bfe0d4   Sasha Levin   hlist: drop the n...
144
  		inet_bind_bucket_for_each(tb, &head->chain) {
093d28232   Balazs Scheidler   tproxy: fix hash ...
145
  			if (net_eq(ib_net(tb), sock_net(sk)) &&
3c82a21f4   Robert Shearman   net: allow bindin...
146
  			    tb->l3mdev == l3mdev && tb->port == port)
093d28232   Balazs Scheidler   tproxy: fix hash ...
147
148
  				break;
  		}
b67bfe0d4   Sasha Levin   hlist: drop the n...
149
  		if (!tb) {
093d28232   Balazs Scheidler   tproxy: fix hash ...
150
  			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
3c82a21f4   Robert Shearman   net: allow bindin...
151
152
  						     sock_net(sk), head, port,
  						     l3mdev);
093d28232   Balazs Scheidler   tproxy: fix hash ...
153
154
155
156
157
158
  			if (!tb) {
  				spin_unlock(&head->lock);
  				return -ENOMEM;
  			}
  		}
  	}
b4ff3c90e   Nagendra Tomar   inet: Fix __inet_...
159
  	inet_bind_hash(child, tb, port);
53083773d   Pavel Emelyanov   [INET]: Uninline ...
160
  	spin_unlock(&head->lock);
093d28232   Balazs Scheidler   tproxy: fix hash ...
161
162
  
  	return 0;
53083773d   Pavel Emelyanov   [INET]: Uninline ...
163
  }
53083773d   Pavel Emelyanov   [INET]: Uninline ...
164
  EXPORT_SYMBOL_GPL(__inet_inherit_port);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  static struct inet_listen_hashbucket *
  inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
  {
  	u32 hash;
  
  #if IS_ENABLED(CONFIG_IPV6)
  	if (sk->sk_family == AF_INET6)
  		hash = ipv6_portaddr_hash(sock_net(sk),
  					  &sk->sk_v6_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	else
  #endif
  		hash = ipv4_portaddr_hash(sock_net(sk),
  					  inet_sk(sk)->inet_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	return inet_lhash2_bucket(h, hash);
  }
  
  static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
  {
  	struct inet_listen_hashbucket *ilb2;
  
  	if (!h->lhash2)
  		return;
  
  	ilb2 = inet_lhash2_bucket_sk(h, sk);
  
  	spin_lock(&ilb2->lock);
  	if (sk->sk_reuseport && sk->sk_family == AF_INET6)
  		hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
  				   &ilb2->head);
  	else
  		hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
  				   &ilb2->head);
  	ilb2->count++;
  	spin_unlock(&ilb2->lock);
  }
  
  static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
  {
  	struct inet_listen_hashbucket *ilb2;
  
  	if (!h->lhash2 ||
  	    WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
  		return;
  
  	ilb2 = inet_lhash2_bucket_sk(h, sk);
  
  	spin_lock(&ilb2->lock);
  	hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
  	ilb2->count--;
  	spin_unlock(&ilb2->lock);
  }
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
218
219
  static inline int compute_score(struct sock *sk, struct net *net,
  				const unsigned short hnum, const __be32 daddr,
3fa6f616a   David Ahern   net: ipv4: add se...
220
  				const int dif, const int sdif, bool exact_dif)
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
221
222
  {
  	int score = -1;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
223

d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
224
  	if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
225
  			!ipv6_only_sock(sk)) {
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
226
227
228
229
  		if (sk->sk_rcv_saddr != daddr)
  			return -1;
  
  		if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
e78190581   Mike Manning   net: ensure unbou...
230
  			return -1;
3fa6f616a   David Ahern   net: ipv4: add se...
231

d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
232
  		score = sk->sk_family == PF_INET ? 2 : 1;
7170a9777   Eric Dumazet   net: annotate acc...
233
  		if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
70da268b5   Eric Dumazet   net: SO_INCOMING_...
234
  			score++;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
235
236
237
  	}
  	return score;
  }
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
238
  /*
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
239
240
   * Here are some nice properties to exploit here. The BSD API
   * does not allow a listening sock to specify the remote port nor the
33b622319   Arnaldo Carvalho de Melo   [INET]: Generalis...
241
242
243
   * remote address for the connection. So always assume those are both
   * wildcarded during the search since they can never be otherwise.
   */
e48c414ee   Arnaldo Carvalho de Melo   [INET]: Generalis...
244

3b24d854c   Eric Dumazet   tcp/dccp: do not ...
245
  /* called with rcu_read_lock() : No refcount taken on the socket */
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  static struct sock *inet_lhash2_lookup(struct net *net,
  				struct inet_listen_hashbucket *ilb2,
  				struct sk_buff *skb, int doff,
  				const __be32 saddr, __be16 sport,
  				const __be32 daddr, const unsigned short hnum,
  				const int dif, const int sdif)
  {
  	bool exact_dif = inet_exact_dif_match(net, skb);
  	struct inet_connection_sock *icsk;
  	struct sock *sk, *result = NULL;
  	int score, hiscore = 0;
  	u32 phash = 0;
  
  	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
  		sk = (struct sock *)icsk;
  		score = compute_score(sk, net, hnum, daddr,
  				      dif, sdif, exact_dif);
  		if (score > hiscore) {
  			if (sk->sk_reuseport) {
  				phash = inet_ehashfn(net, daddr, hnum,
  						     saddr, sport);
  				result = reuseport_select_sock(sk, phash,
  							       skb, doff);
  				if (result)
  					return result;
  			}
  			result = sk;
  			hiscore = score;
  		}
  	}
  
  	return result;
  }
c67499c0e   Pavel Emelyanov   [NETNS]: Tcp-v4 s...
279
280
  struct sock *__inet_lookup_listener(struct net *net,
  				    struct inet_hashinfo *hashinfo,
a583636a8   Craig Gallek   inet: refactor in...
281
  				    struct sk_buff *skb, int doff,
da5e36308   Tom Herbert   soreuseport: TCP/...
282
  				    const __be32 saddr, __be16 sport,
fb99c848e   Al Viro   [IPV4]: annotate ...
283
  				    const __be32 daddr, const unsigned short hnum,
3fa6f616a   David Ahern   net: ipv4: add se...
284
  				    const int dif, const int sdif)
99a92ff50   Herbert Xu   [IPV4]: Uninline ...
285
  {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
286
  	struct inet_listen_hashbucket *ilb2;
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
287
  	struct sock *result = NULL;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
288
  	unsigned int hash2;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
289
290
291
  
  	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
  	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
292
293
294
295
296
  
  	result = inet_lhash2_lookup(net, ilb2, skb, doff,
  				    saddr, sport, daddr, hnum,
  				    dif, sdif);
  	if (result)
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
297
  		goto done;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
298
299
  
  	/* Lookup lhash2 with INADDR_ANY */
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
300
301
  	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
  	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
302

8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
303
  	result = inet_lhash2_lookup(net, ilb2, skb, doff,
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
304
  				    saddr, sport, htonl(INADDR_ANY), hnum,
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
305
  				    dif, sdif);
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
306
  done:
88e235b80   Enrico Weigelt   net: ipv4: drop u...
307
  	if (IS_ERR(result))
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
308
  		return NULL;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
309
  	return result;
99a92ff50   Herbert Xu   [IPV4]: Uninline ...
310
  }
8f491069b   Herbert Xu   [IPV4]: Use netwo...
311
  EXPORT_SYMBOL_GPL(__inet_lookup_listener);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
312

05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
313
314
315
  /* All sockets share common refcount, but have different destructors */
  void sock_gen_put(struct sock *sk)
  {
41c6d650f   Reshetova, Elena   net: convert sock...
316
  	if (!refcount_dec_and_test(&sk->sk_refcnt))
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
317
318
319
320
  		return;
  
  	if (sk->sk_state == TCP_TIME_WAIT)
  		inet_twsk_free(inet_twsk(sk));
41b822c59   Eric Dumazet   inet: prepare soc...
321
322
  	else if (sk->sk_state == TCP_NEW_SYN_RECV)
  		reqsk_free(inet_reqsk(sk));
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
323
324
325
326
  	else
  		sk_free(sk);
  }
  EXPORT_SYMBOL_GPL(sock_gen_put);
2c13270b4   Eric Dumazet   inet: factorize s...
327
328
329
330
331
  void sock_edemux(struct sk_buff *skb)
  {
  	sock_gen_put(skb->sk);
  }
  EXPORT_SYMBOL(sock_edemux);
5e73ea1a3   Daniel Baluta   ipv4: fix checkpa...
332
  struct sock *__inet_lookup_established(struct net *net,
c67499c0e   Pavel Emelyanov   [NETNS]: Tcp-v4 s...
333
  				  struct inet_hashinfo *hashinfo,
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
334
335
  				  const __be32 saddr, const __be16 sport,
  				  const __be32 daddr, const u16 hnum,
3fa6f616a   David Ahern   net: ipv4: add se...
336
  				  const int dif, const int sdif)
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
337
  {
c72283174   Joe Perches   net: Use a more s...
338
  	INET_ADDR_COOKIE(acookie, saddr, daddr);
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
339
340
  	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
  	struct sock *sk;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
341
  	const struct hlist_nulls_node *node;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
342
343
344
  	/* Optimize here for direct hit, only listening connections can
  	 * have wildcards anyways.
  	 */
9f26b3add   Pavel Emelyanov   inet: add struct ...
345
  	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
f373b53b5   Eric Dumazet   tcp: replace ehas...
346
  	unsigned int slot = hash & hashinfo->ehash_mask;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
347
  	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
348

3ab5aee7f   Eric Dumazet   net: Convert TCP ...
349
350
  begin:
  	sk_nulls_for_each_rcu(sk, node, &head->chain) {
ce43b03e8   Eric Dumazet   net: move inet_dp...
351
352
353
  		if (sk->sk_hash != hash)
  			continue;
  		if (likely(INET_MATCH(sk, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
354
  				      saddr, daddr, ports, dif, sdif))) {
41c6d650f   Reshetova, Elena   net: convert sock...
355
  			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
356
  				goto out;
ce43b03e8   Eric Dumazet   net: move inet_dp...
357
  			if (unlikely(!INET_MATCH(sk, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
358
359
  						 saddr, daddr, ports,
  						 dif, sdif))) {
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
360
  				sock_gen_put(sk);
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
361
362
  				goto begin;
  			}
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
363
  			goto found;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
364
  		}
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
365
  	}
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
366
367
368
369
370
371
372
  	/*
  	 * if the nulls value we got at the end of this lookup is
  	 * not the expected one, we must restart lookup.
  	 * We probably met an item that was moved to another chain.
  	 */
  	if (get_nulls_value(node) != slot)
  		goto begin;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
373
  out:
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
374
375
  	sk = NULL;
  found:
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
376
  	return sk;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
377
378
  }
  EXPORT_SYMBOL_GPL(__inet_lookup_established);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
379
380
381
382
383
384
385
  /* called with local bh disabled */
  static int __inet_check_established(struct inet_timewait_death_row *death_row,
  				    struct sock *sk, __u16 lport,
  				    struct inet_timewait_sock **twp)
  {
  	struct inet_hashinfo *hinfo = death_row->hashinfo;
  	struct inet_sock *inet = inet_sk(sk);
c720c7e83   Eric Dumazet   inet: rename some...
386
387
  	__be32 daddr = inet->inet_rcv_saddr;
  	__be32 saddr = inet->inet_daddr;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
388
  	int dif = sk->sk_bound_dev_if;
3fa6f616a   David Ahern   net: ipv4: add se...
389
390
  	struct net *net = sock_net(sk);
  	int sdif = l3mdev_master_ifindex_by_index(net, dif);
c72283174   Joe Perches   net: Use a more s...
391
  	INET_ADDR_COOKIE(acookie, saddr, daddr);
c720c7e83   Eric Dumazet   inet: rename some...
392
  	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
c720c7e83   Eric Dumazet   inet: rename some...
393
394
  	unsigned int hash = inet_ehashfn(net, daddr, lport,
  					 saddr, inet->inet_dport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
395
  	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
9db66bdcc   Eric Dumazet   net: convert TCP/...
396
  	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
397
  	struct sock *sk2;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
398
  	const struct hlist_nulls_node *node;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
399
  	struct inet_timewait_sock *tw = NULL;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
400

9db66bdcc   Eric Dumazet   net: convert TCP/...
401
  	spin_lock(lock);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
402

3ab5aee7f   Eric Dumazet   net: Convert TCP ...
403
  	sk_nulls_for_each(sk2, node, &head->chain) {
ce43b03e8   Eric Dumazet   net: move inet_dp...
404
405
  		if (sk2->sk_hash != hash)
  			continue;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
406

ce43b03e8   Eric Dumazet   net: move inet_dp...
407
  		if (likely(INET_MATCH(sk2, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
408
  					 saddr, daddr, ports, dif, sdif))) {
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
409
410
411
412
413
  			if (sk2->sk_state == TCP_TIME_WAIT) {
  				tw = inet_twsk(sk2);
  				if (twsk_unique(sk, sk2, twp))
  					break;
  			}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
414
  			goto not_unique;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
415
  		}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
416
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
417
  	/* Must record num and sport now. Otherwise we will see
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
418
419
  	 * in hash table socket with a funny identity.
  	 */
c720c7e83   Eric Dumazet   inet: rename some...
420
421
  	inet->inet_num = lport;
  	inet->inet_sport = htons(lport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
422
  	sk->sk_hash = hash;
547b792ca   Ilpo Järvinen   net: convert BUG_...
423
  	WARN_ON(!sk_unhashed(sk));
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
424
  	__sk_nulls_add_node_rcu(sk, &head->chain);
13475a30b   Eric Dumazet   tcp: connect() ra...
425
  	if (tw) {
fc01538f9   Eric Dumazet   inet: simplify ti...
426
  		sk_nulls_del_node_init_rcu((struct sock *)tw);
02a1d6e7a   Eric Dumazet   net: rename NET_{...
427
  		__NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
13475a30b   Eric Dumazet   tcp: connect() ra...
428
  	}
9db66bdcc   Eric Dumazet   net: convert TCP/...
429
  	spin_unlock(lock);
c29a0bc4d   Pavel Emelyanov   [SOCK][NETNS]: Ad...
430
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
431
432
433
  
  	if (twp) {
  		*twp = tw;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
434
435
  	} else if (tw) {
  		/* Silly. Should hash-dance instead... */
dbe7faa40   Eric Dumazet   inet: inet_twsk_d...
436
  		inet_twsk_deschedule_put(tw);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
437
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
438
439
440
  	return 0;
  
  not_unique:
9db66bdcc   Eric Dumazet   net: convert TCP/...
441
  	spin_unlock(lock);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
442
443
  	return -EADDRNOTAVAIL;
  }
e2baad9e4   Eric Dumazet   tcp: connect() fr...
444
  static u32 inet_sk_port_offset(const struct sock *sk)
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
445
446
  {
  	const struct inet_sock *inet = inet_sk(sk);
e2baad9e4   Eric Dumazet   tcp: connect() fr...
447

c720c7e83   Eric Dumazet   inet: rename some...
448
449
450
  	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
  					  inet->inet_daddr,
  					  inet->inet_dport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
451
  }
079096f10   Eric Dumazet   tcp/dccp: install...
452
453
454
  /* insert a socket into ehash, and eventually remove another one
   * (The another one can be a SYN_RECV or TIMEWAIT
   */
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
455
  bool inet_ehash_insert(struct sock *sk, struct sock *osk)
152da81de   Pavel Emelyanov   [INET]: Uninline ...
456
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
457
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
458
  	struct hlist_nulls_head *list;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
459
  	struct inet_ehash_bucket *head;
5b441f76f   Eric Dumazet   net: introduce sk...
460
  	spinlock_t *lock;
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
461
  	bool ret = true;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
462

079096f10   Eric Dumazet   tcp/dccp: install...
463
  	WARN_ON_ONCE(!sk_unhashed(sk));
152da81de   Pavel Emelyanov   [INET]: Uninline ...
464

5b441f76f   Eric Dumazet   net: introduce sk...
465
  	sk->sk_hash = sk_ehashfn(sk);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
466
467
468
  	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
  	list = &head->chain;
  	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
9db66bdcc   Eric Dumazet   net: convert TCP/...
469
  	spin_lock(lock);
fc01538f9   Eric Dumazet   inet: simplify ti...
470
  	if (osk) {
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
471
472
  		WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
  		ret = sk_nulls_del_node_init_rcu(osk);
9327f7053   Eric Dumazet   tcp: Fix a connec...
473
  	}
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
474
475
  	if (ret)
  		__sk_nulls_add_node_rcu(sk, list);
9db66bdcc   Eric Dumazet   net: convert TCP/...
476
  	spin_unlock(lock);
079096f10   Eric Dumazet   tcp/dccp: install...
477
478
  	return ret;
  }
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
479
  bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
079096f10   Eric Dumazet   tcp/dccp: install...
480
  {
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
481
482
483
484
485
486
  	bool ok = inet_ehash_insert(sk, osk);
  
  	if (ok) {
  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  	} else {
  		percpu_counter_inc(sk->sk_prot->orphan_count);
563e0bb0d   Yafang Shao   net: tracepoint: ...
487
  		inet_sk_set_state(sk, TCP_CLOSE);
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
488
489
490
491
  		sock_set_flag(sk, SOCK_DEAD);
  		inet_csk_destroy_sock(sk);
  	}
  	return ok;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
492
  }
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
493
  EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
494

c125e80b8   Craig Gallek   soreuseport: fast...
495
  static int inet_reuseport_add_sock(struct sock *sk,
fe38d2a1c   Josef Bacik   inet: collapse ip...
496
  				   struct inet_listen_hashbucket *ilb)
c125e80b8   Craig Gallek   soreuseport: fast...
497
  {
90e5d0db2   Craig Gallek   soreuseport: Fix ...
498
  	struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
499
  	const struct hlist_nulls_node *node;
c125e80b8   Craig Gallek   soreuseport: fast...
500
  	struct sock *sk2;
c125e80b8   Craig Gallek   soreuseport: fast...
501
  	kuid_t uid = sock_i_uid(sk);
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
502
  	sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
c125e80b8   Craig Gallek   soreuseport: fast...
503
504
505
506
  		if (sk2 != sk &&
  		    sk2->sk_family == sk->sk_family &&
  		    ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
  		    sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
90e5d0db2   Craig Gallek   soreuseport: Fix ...
507
  		    inet_csk(sk2)->icsk_bind_hash == tb &&
c125e80b8   Craig Gallek   soreuseport: fast...
508
  		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
fe38d2a1c   Josef Bacik   inet: collapse ip...
509
  		    inet_rcv_saddr_equal(sk, sk2, false))
2dbb9b9e6   Martin KaFai Lau   bpf: Introduce BP...
510
511
  			return reuseport_add_sock(sk, sk2,
  						  inet_rcv_saddr_any(sk));
c125e80b8   Craig Gallek   soreuseport: fast...
512
  	}
2dbb9b9e6   Martin KaFai Lau   bpf: Introduce BP...
513
  	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
c125e80b8   Craig Gallek   soreuseport: fast...
514
  }
fe38d2a1c   Josef Bacik   inet: collapse ip...
515
  int __inet_hash(struct sock *sk, struct sock *osk)
152da81de   Pavel Emelyanov   [INET]: Uninline ...
516
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
517
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
5caea4ea7   Eric Dumazet   net: listening_ha...
518
  	struct inet_listen_hashbucket *ilb;
c125e80b8   Craig Gallek   soreuseport: fast...
519
  	int err = 0;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
520

5e0724d02   Eric Dumazet   tcp/dccp: fix has...
521
522
  	if (sk->sk_state != TCP_LISTEN) {
  		inet_ehash_nolisten(sk, osk);
c125e80b8   Craig Gallek   soreuseport: fast...
523
  		return 0;
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
524
  	}
547b792ca   Ilpo Järvinen   net: convert BUG_...
525
  	WARN_ON(!sk_unhashed(sk));
5caea4ea7   Eric Dumazet   net: listening_ha...
526
  	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
152da81de   Pavel Emelyanov   [INET]: Uninline ...
527

5caea4ea7   Eric Dumazet   net: listening_ha...
528
  	spin_lock(&ilb->lock);
c125e80b8   Craig Gallek   soreuseport: fast...
529
  	if (sk->sk_reuseport) {
fe38d2a1c   Josef Bacik   inet: collapse ip...
530
  		err = inet_reuseport_add_sock(sk, ilb);
c125e80b8   Craig Gallek   soreuseport: fast...
531
532
533
  		if (err)
  			goto unlock;
  	}
d296ba60d   Craig Gallek   soreuseport: Reso...
534
535
  	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
  		sk->sk_family == AF_INET6)
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
536
  		__sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
d296ba60d   Craig Gallek   soreuseport: Reso...
537
  	else
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
538
  		__sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
539
  	inet_hash2(hashinfo, sk);
76d013b20   Martin KaFai Lau   inet: Add a count...
540
  	ilb->count++;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
541
  	sock_set_flag(sk, SOCK_RCU_FREE);
c29a0bc4d   Pavel Emelyanov   [SOCK][NETNS]: Ad...
542
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
c125e80b8   Craig Gallek   soreuseport: fast...
543
  unlock:
5caea4ea7   Eric Dumazet   net: listening_ha...
544
  	spin_unlock(&ilb->lock);
c125e80b8   Craig Gallek   soreuseport: fast...
545
546
  
  	return err;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
547
  }
77a6a471b   Eric Dumazet   ipv6: get rid of ...
548
  EXPORT_SYMBOL(__inet_hash);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
549

086c653f5   Craig Gallek   sock: struct prot...
550
  int inet_hash(struct sock *sk)
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
551
  {
c125e80b8   Craig Gallek   soreuseport: fast...
552
  	int err = 0;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
553
554
  	if (sk->sk_state != TCP_CLOSE) {
  		local_bh_disable();
fe38d2a1c   Josef Bacik   inet: collapse ip...
555
  		err = __inet_hash(sk, NULL);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
556
557
  		local_bh_enable();
  	}
086c653f5   Craig Gallek   sock: struct prot...
558

c125e80b8   Craig Gallek   soreuseport: fast...
559
  	return err;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
560
561
562
563
564
  }
  EXPORT_SYMBOL_GPL(inet_hash);
  
  void inet_unhash(struct sock *sk)
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
565
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
0ba987181   Geert Uytterhoeven   inet: Avoid uniti...
566
  	struct inet_listen_hashbucket *ilb = NULL;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
567
  	spinlock_t *lock;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
568
569
  
  	if (sk_unhashed(sk))
5caea4ea7   Eric Dumazet   net: listening_ha...
570
  		return;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
571

3b24d854c   Eric Dumazet   tcp/dccp: do not ...
572
  	if (sk->sk_state == TCP_LISTEN) {
76d013b20   Martin KaFai Lau   inet: Add a count...
573
574
  		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
  		lock = &ilb->lock;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
575
  	} else {
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
576
  		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
577
  	}
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
578
  	spin_lock_bh(lock);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
579
580
  	if (sk_unhashed(sk))
  		goto unlock;
c125e80b8   Craig Gallek   soreuseport: fast...
581
582
  	if (rcu_access_pointer(sk->sk_reuseport_cb))
  		reuseport_detach_sock(sk);
0ba987181   Geert Uytterhoeven   inet: Avoid uniti...
583
  	if (ilb) {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
584
  		inet_unhash2(hashinfo, sk);
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
585
  		ilb->count--;
76d013b20   Martin KaFai Lau   inet: Add a count...
586
  	}
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
587
  	__sk_nulls_del_node_init_rcu(sk);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
588
589
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  unlock:
920de804b   Eric Dumazet   net: Make sure BH...
590
  	spin_unlock_bh(lock);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
591
592
  }
  EXPORT_SYMBOL_GPL(inet_unhash);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
593

5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
594
  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
5d8c0aa94   Pavel Emelyanov   [INET]: Fix accid...
595
  		struct sock *sk, u32 port_offset,
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
596
  		int (*check_established)(struct inet_timewait_death_row *,
b4d6444ea   Eric Dumazet   inet: get rid of ...
597
  			struct sock *, __u16, struct inet_timewait_sock **))
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
598
599
  {
  	struct inet_hashinfo *hinfo = death_row->hashinfo;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
600
  	struct inet_timewait_sock *tw = NULL;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
601
  	struct inet_bind_hashbucket *head;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
602
  	int port = inet_sk(sk)->inet_num;
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
603
  	struct net *net = sock_net(sk);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
604
605
606
607
  	struct inet_bind_bucket *tb;
  	u32 remaining, offset;
  	int ret, i, low, high;
  	static u32 hint;
3c82a21f4   Robert Shearman   net: allow bindin...
608
  	int l3mdev;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
  
  	if (port) {
  		head = &hinfo->bhash[inet_bhashfn(net, port,
  						  hinfo->bhash_size)];
  		tb = inet_csk(sk)->icsk_bind_hash;
  		spin_lock_bh(&head->lock);
  		if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
  			inet_ehash_nolisten(sk, NULL);
  			spin_unlock_bh(&head->lock);
  			return 0;
  		}
  		spin_unlock(&head->lock);
  		/* No definite answer... Walk to established hash table */
  		ret = check_established(death_row, sk, port, NULL);
  		local_bh_enable();
  		return ret;
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
626

3c82a21f4   Robert Shearman   net: allow bindin...
627
  	l3mdev = inet_sk_bound_l3mdev(sk);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
628
629
630
631
632
  	inet_get_local_port_range(net, &low, &high);
  	high++; /* [32768, 60999] -> [32768, 61000[ */
  	remaining = high - low;
  	if (likely(remaining > 1))
  		remaining &= ~1U;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
633

1580ab63f   Eric Dumazet   tcp/dccp: better ...
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
  	offset = (hint + port_offset) % remaining;
  	/* In first pass we try ports of @low parity.
  	 * inet_csk_get_port() does the opposite choice.
  	 */
  	offset &= ~1U;
  other_parity_scan:
  	port = low + offset;
  	for (i = 0; i < remaining; i += 2, port += 2) {
  		if (unlikely(port >= high))
  			port -= remaining;
  		if (inet_is_local_reserved_port(net, port))
  			continue;
  		head = &hinfo->bhash[inet_bhashfn(net, port,
  						  hinfo->bhash_size)];
  		spin_lock_bh(&head->lock);
227b60f51   Stephen Hemminger   [INET]: local por...
649

1580ab63f   Eric Dumazet   tcp/dccp: better ...
650
651
  		/* Does not bother with rcv_saddr checks, because
  		 * the established check is already unique enough.
07f4c9006   Eric Dumazet   tcp/dccp: try to ...
652
  		 */
1580ab63f   Eric Dumazet   tcp/dccp: better ...
653
  		inet_bind_bucket_for_each(tb, &head->chain) {
3c82a21f4   Robert Shearman   net: allow bindin...
654
655
  			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
  			    tb->port == port) {
1580ab63f   Eric Dumazet   tcp/dccp: better ...
656
657
  				if (tb->fastreuse >= 0 ||
  				    tb->fastreuseport >= 0)
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
658
  					goto next_port;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
659
660
661
662
663
  				WARN_ON(hlist_empty(&tb->owners));
  				if (!check_established(death_row, sk,
  						       port, &tw))
  					goto ok;
  				goto next_port;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
664
  			}
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
665
  		}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
666

1580ab63f   Eric Dumazet   tcp/dccp: better ...
667
  		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
3c82a21f4   Robert Shearman   net: allow bindin...
668
  					     net, head, port, l3mdev);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
669
670
671
  		if (!tb) {
  			spin_unlock_bh(&head->lock);
  			return -ENOMEM;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
672
  		}
1580ab63f   Eric Dumazet   tcp/dccp: better ...
673
674
675
676
677
678
679
  		tb->fastreuse = -1;
  		tb->fastreuseport = -1;
  		goto ok;
  next_port:
  		spin_unlock_bh(&head->lock);
  		cond_resched();
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
680

1580ab63f   Eric Dumazet   tcp/dccp: better ...
681
682
683
  	offset++;
  	if ((offset & 1) && remaining > 1)
  		goto other_parity_scan;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
684

1580ab63f   Eric Dumazet   tcp/dccp: better ...
685
  	return -EADDRNOTAVAIL;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
686

1580ab63f   Eric Dumazet   tcp/dccp: better ...
687
688
689
690
691
692
693
694
  ok:
  	hint += i + 2;
  
  	/* Head lock still held and bh's disabled */
  	inet_bind_hash(sk, tb, port);
  	if (sk_unhashed(sk)) {
  		inet_sk(sk)->inet_sport = htons(port);
  		inet_ehash_nolisten(sk, (struct sock *)tw);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
695
  	}
1580ab63f   Eric Dumazet   tcp/dccp: better ...
696
697
698
699
700
701
702
  	if (tw)
  		inet_twsk_bind_unhash(tw, hinfo);
  	spin_unlock(&head->lock);
  	if (tw)
  		inet_twsk_deschedule_put(tw);
  	local_bh_enable();
  	return 0;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
703
  }
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
704
705
706
707
708
709
710
  
  /*
   * Bind a port for a connect operation and hash it.
   */
  int inet_hash_connect(struct inet_timewait_death_row *death_row,
  		      struct sock *sk)
  {
e2baad9e4   Eric Dumazet   tcp: connect() fr...
711
712
713
714
715
  	u32 port_offset = 0;
  
  	if (!inet_sk(sk)->inet_num)
  		port_offset = inet_sk_port_offset(sk);
  	return __inet_hash_connect(death_row, sk, port_offset,
b4d6444ea   Eric Dumazet   inet: get rid of ...
716
  				   __inet_check_established);
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
717
  }
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
718
  EXPORT_SYMBOL_GPL(inet_hash_connect);
5caea4ea7   Eric Dumazet   net: listening_ha...
719
720
721
722
  
  void inet_hashinfo_init(struct inet_hashinfo *h)
  {
  	int i;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
723
  	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
5caea4ea7   Eric Dumazet   net: listening_ha...
724
  		spin_lock_init(&h->listening_hash[i].lock);
0a0ee9f2d   Eric Dumazet   tcp/dccp: fix pos...
725
726
  		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
  				      i + LISTENING_NULLS_BASE);
76d013b20   Martin KaFai Lau   inet: Add a count...
727
  		h->listening_hash[i].count = 0;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
728
  	}
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
729
730
  
  	h->lhash2 = NULL;
5caea4ea7   Eric Dumazet   net: listening_ha...
731
  }
5caea4ea7   Eric Dumazet   net: listening_ha...
732
  EXPORT_SYMBOL_GPL(inet_hashinfo_init);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
733

c92c81df9   Peter Oskolkov   net: dccp: fix ke...
734
735
736
737
738
739
740
741
742
743
  static void init_hashinfo_lhash2(struct inet_hashinfo *h)
  {
  	int i;
  
  	for (i = 0; i <= h->lhash2_mask; i++) {
  		spin_lock_init(&h->lhash2[i].lock);
  		INIT_HLIST_HEAD(&h->lhash2[i].head);
  		h->lhash2[i].count = 0;
  	}
  }
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
744
745
746
747
748
  void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
  				unsigned long numentries, int scale,
  				unsigned long low_limit,
  				unsigned long high_limit)
  {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
749
750
751
752
753
754
755
756
757
  	h->lhash2 = alloc_large_system_hash(name,
  					    sizeof(*h->lhash2),
  					    numentries,
  					    scale,
  					    0,
  					    NULL,
  					    &h->lhash2_mask,
  					    low_limit,
  					    high_limit);
c92c81df9   Peter Oskolkov   net: dccp: fix ke...
758
759
  	init_hashinfo_lhash2(h);
  }
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
760

c92c81df9   Peter Oskolkov   net: dccp: fix ke...
761
762
763
764
765
766
767
768
769
770
771
772
  int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
  {
  	h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
  	if (!h->lhash2)
  		return -ENOMEM;
  
  	h->lhash2_mask = INET_LHTABLE_SIZE - 1;
  	/* INET_LHTABLE_SIZE must be a power of 2 */
  	BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
  
  	init_hashinfo_lhash2(h);
  	return 0;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
773
  }
c92c81df9   Peter Oskolkov   net: dccp: fix ke...
774
  EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
775

095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
776
777
  int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
  {
89e478a2a   Eric Dumazet   tcp: suppress a d...
778
  	unsigned int locksz = sizeof(spinlock_t);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
779
  	unsigned int i, nblocks = 1;
89e478a2a   Eric Dumazet   tcp: suppress a d...
780
  	if (locksz != 0) {
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
781
  		/* allocate 2 cache lines or at least one spinlock per cpu */
89e478a2a   Eric Dumazet   tcp: suppress a d...
782
  		nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
783
784
785
786
  		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
  
  		/* no more locks than number of hash buckets */
  		nblocks = min(nblocks, hashinfo->ehash_mask + 1);
752ade68c   Michal Hocko   treewide: use kv[...
787
  		hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
788
789
790
791
792
793
794
795
796
797
  		if (!hashinfo->ehash_locks)
  			return -ENOMEM;
  
  		for (i = 0; i < nblocks; i++)
  			spin_lock_init(&hashinfo->ehash_locks[i]);
  	}
  	hashinfo->ehash_locks_mask = nblocks - 1;
  	return 0;
  }
  EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);