Blame view

net/ipv4/inet_hashtables.c 23.6 KB
2874c5fd2   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-or-later
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
2
3
4
5
6
7
8
9
  /*
   * INET		An implementation of the TCP/IP protocol suite for the LINUX
   *		operating system.  INET is implemented using the BSD Socket
   *		interface as the means of communication with the user level.
   *
   *		Generic INET transport hashtables
   *
   * Authors:	Lotsa people, from code originally in tcp
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
10
   */
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
11
  #include <linux/module.h>
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
12
  #include <linux/random.h>
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
13
  #include <linux/sched.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
14
  #include <linux/slab.h>
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
15
  #include <linux/wait.h>
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
16
  #include <linux/vmalloc.h>
57c8a661d   Mike Rapoport   mm: remove includ...
17
  #include <linux/memblock.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
18

c125e80b8   Craig Gallek   soreuseport: fast...
19
  #include <net/addrconf.h>
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
20
  #include <net/inet_connection_sock.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
21
  #include <net/inet_hashtables.h>
01770a166   Ricardo Dias   tcp: fix race con...
22
23
24
  #if IS_ENABLED(CONFIG_IPV6)
  #include <net/inet6_hashtables.h>
  #endif
6e5714eaf   David S. Miller   net: Compute prot...
25
  #include <net/secure_seq.h>
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
26
  #include <net/ip.h>
a04a480d4   David Ahern   net: Require exac...
27
  #include <net/tcp.h>
c125e80b8   Craig Gallek   soreuseport: fast...
28
  #include <net/sock_reuseport.h>
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
29

6eada0110   Eric Dumazet   netns: constify n...
30
31
32
  static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
  			const __u16 lport, const __be32 faddr,
  			const __be16 fport)
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
33
  {
1bbdceef1   Hannes Frederic Sowa   inet: convert ine...
34
35
36
  	static u32 inet_ehash_secret __read_mostly;
  
  	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
37
38
39
  	return __inet_ehashfn(laddr, lport, faddr, fport,
  			      inet_ehash_secret + net_hash_mix(net));
  }
d1e559d0b   Eric Dumazet   inet: add IPv6 su...
40
41
42
  /* This function handles inet_sock, but also timewait and request sockets
   * for IPv4/IPv6.
   */
784c372a8   Eric Dumazet   net: make sk_ehas...
43
  static u32 sk_ehashfn(const struct sock *sk)
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
44
  {
d1e559d0b   Eric Dumazet   inet: add IPv6 su...
45
46
47
48
49
50
51
  #if IS_ENABLED(CONFIG_IPV6)
  	if (sk->sk_family == AF_INET6 &&
  	    !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
  		return inet6_ehashfn(sock_net(sk),
  				     &sk->sk_v6_rcv_saddr, sk->sk_num,
  				     &sk->sk_v6_daddr, sk->sk_dport);
  #endif
5b441f76f   Eric Dumazet   net: introduce sk...
52
53
54
  	return inet_ehashfn(sock_net(sk),
  			    sk->sk_rcv_saddr, sk->sk_num,
  			    sk->sk_daddr, sk->sk_dport);
65cd8033f   Hannes Frederic Sowa   ipv4: split inet_...
55
  }
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
56
57
58
59
  /*
   * Allocate and initialize a new local port bind bucket.
   * The bindhash mutex for snum's hash chain must be held here.
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
60
  struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
941b1d22c   Pavel Emelyanov   [NETNS]: Make bin...
61
  						 struct net *net,
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
62
  						 struct inet_bind_hashbucket *head,
3c82a21f4   Robert Shearman   net: allow bindin...
63
64
  						 const unsigned short snum,
  						 int l3mdev)
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
65
  {
54e6ecb23   Christoph Lameter   [PATCH] slab: rem...
66
  	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
67

00db41243   Ian Morris   ipv4: coding styl...
68
  	if (tb) {
efd7ef1c1   Eric W. Biederman   net: Kill hold_ne...
69
  		write_pnet(&tb->ib_net, net);
3c82a21f4   Robert Shearman   net: allow bindin...
70
  		tb->l3mdev    = l3mdev;
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
71
72
  		tb->port      = snum;
  		tb->fastreuse = 0;
da5e36308   Tom Herbert   soreuseport: TCP/...
73
  		tb->fastreuseport = 0;
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
74
75
76
77
78
  		INIT_HLIST_HEAD(&tb->owners);
  		hlist_add_head(&tb->node, &head->chain);
  	}
  	return tb;
  }
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
79
80
81
  /*
   * Caller must hold hashbucket lock for this tb with local BH disabled
   */
e18b890bb   Christoph Lameter   [PATCH] slab: rem...
82
  void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
77d8bf9c6   Arnaldo Carvalho de Melo   [INET]: Move the ...
83
84
85
86
87
88
  {
  	if (hlist_empty(&tb->owners)) {
  		__hlist_del(&tb->node);
  		kmem_cache_free(cachep, tb);
  	}
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
89
90
91
92
  
  void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
  		    const unsigned short snum)
  {
c720c7e83   Eric Dumazet   inet: rename some...
93
  	inet_sk(sk)->inet_num = snum;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
94
  	sk_add_bind_node(sk, &tb->owners);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
95
  	inet_csk(sk)->icsk_bind_hash = tb;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
96
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
97
98
99
  /*
   * Get rid of any references to a local port held by the given sock.
   */
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
100
  static void __inet_put_port(struct sock *sk)
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
101
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
102
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
c720c7e83   Eric Dumazet   inet: rename some...
103
  	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
7f635ab71   Pavel Emelyanov   inet: add struct ...
104
  			hashinfo->bhash_size);
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
105
106
107
108
  	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
  	struct inet_bind_bucket *tb;
  
  	spin_lock(&head->lock);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
109
  	tb = inet_csk(sk)->icsk_bind_hash;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
110
  	__sk_del_bind_node(sk);
463c84b97   Arnaldo Carvalho de Melo   [NET]: Introduce ...
111
  	inet_csk(sk)->icsk_bind_hash = NULL;
c720c7e83   Eric Dumazet   inet: rename some...
112
  	inet_sk(sk)->inet_num = 0;
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
113
114
115
  	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
  	spin_unlock(&head->lock);
  }
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
116
  void inet_put_port(struct sock *sk)
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
117
118
  {
  	local_bh_disable();
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
119
  	__inet_put_port(sk);
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
120
121
  	local_bh_enable();
  }
2d8c4ce51   Arnaldo Carvalho de Melo   [INET]: Generalis...
122
  EXPORT_SYMBOL(inet_put_port);
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
123

1ce31c9e0   Eric Dumazet   inet: constify __...
124
  int __inet_inherit_port(const struct sock *sk, struct sock *child)
53083773d   Pavel Emelyanov   [INET]: Uninline ...
125
126
  {
  	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
093d28232   Balazs Scheidler   tproxy: fix hash ...
127
128
  	unsigned short port = inet_sk(child)->inet_num;
  	const int bhash = inet_bhashfn(sock_net(sk), port,
7f635ab71   Pavel Emelyanov   inet: add struct ...
129
  			table->bhash_size);
53083773d   Pavel Emelyanov   [INET]: Uninline ...
130
131
  	struct inet_bind_hashbucket *head = &table->bhash[bhash];
  	struct inet_bind_bucket *tb;
3c82a21f4   Robert Shearman   net: allow bindin...
132
  	int l3mdev;
53083773d   Pavel Emelyanov   [INET]: Uninline ...
133
134
135
  
  	spin_lock(&head->lock);
  	tb = inet_csk(sk)->icsk_bind_hash;
c2f34a65a   Eric Dumazet   tcp/dccp: fix pot...
136
137
138
139
  	if (unlikely(!tb)) {
  		spin_unlock(&head->lock);
  		return -ENOENT;
  	}
093d28232   Balazs Scheidler   tproxy: fix hash ...
140
  	if (tb->port != port) {
3c82a21f4   Robert Shearman   net: allow bindin...
141
  		l3mdev = inet_sk_bound_l3mdev(sk);
093d28232   Balazs Scheidler   tproxy: fix hash ...
142
143
144
145
146
  		/* NOTE: using tproxy and redirecting skbs to a proxy
  		 * on a different listener port breaks the assumption
  		 * that the listener socket's icsk_bind_hash is the same
  		 * as that of the child socket. We have to look up or
  		 * create a new bind bucket for the child here. */
b67bfe0d4   Sasha Levin   hlist: drop the n...
147
  		inet_bind_bucket_for_each(tb, &head->chain) {
093d28232   Balazs Scheidler   tproxy: fix hash ...
148
  			if (net_eq(ib_net(tb), sock_net(sk)) &&
3c82a21f4   Robert Shearman   net: allow bindin...
149
  			    tb->l3mdev == l3mdev && tb->port == port)
093d28232   Balazs Scheidler   tproxy: fix hash ...
150
151
  				break;
  		}
b67bfe0d4   Sasha Levin   hlist: drop the n...
152
  		if (!tb) {
093d28232   Balazs Scheidler   tproxy: fix hash ...
153
  			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
3c82a21f4   Robert Shearman   net: allow bindin...
154
155
  						     sock_net(sk), head, port,
  						     l3mdev);
093d28232   Balazs Scheidler   tproxy: fix hash ...
156
157
158
159
160
  			if (!tb) {
  				spin_unlock(&head->lock);
  				return -ENOMEM;
  			}
  		}
d76f3351c   Tim Froidcoeur   net: initialize f...
161
  		inet_csk_update_fastreuse(tb, child);
093d28232   Balazs Scheidler   tproxy: fix hash ...
162
  	}
b4ff3c90e   Nagendra Tomar   inet: Fix __inet_...
163
  	inet_bind_hash(child, tb, port);
53083773d   Pavel Emelyanov   [INET]: Uninline ...
164
  	spin_unlock(&head->lock);
093d28232   Balazs Scheidler   tproxy: fix hash ...
165
166
  
  	return 0;
53083773d   Pavel Emelyanov   [INET]: Uninline ...
167
  }
53083773d   Pavel Emelyanov   [INET]: Uninline ...
168
  EXPORT_SYMBOL_GPL(__inet_inherit_port);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
  static struct inet_listen_hashbucket *
  inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
  {
  	u32 hash;
  
  #if IS_ENABLED(CONFIG_IPV6)
  	if (sk->sk_family == AF_INET6)
  		hash = ipv6_portaddr_hash(sock_net(sk),
  					  &sk->sk_v6_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	else
  #endif
  		hash = ipv4_portaddr_hash(sock_net(sk),
  					  inet_sk(sk)->inet_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	return inet_lhash2_bucket(h, hash);
  }
  
  static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
  {
  	struct inet_listen_hashbucket *ilb2;
  
  	if (!h->lhash2)
  		return;
  
  	ilb2 = inet_lhash2_bucket_sk(h, sk);
  
  	spin_lock(&ilb2->lock);
  	if (sk->sk_reuseport && sk->sk_family == AF_INET6)
  		hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
  				   &ilb2->head);
  	else
  		hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
  				   &ilb2->head);
  	ilb2->count++;
  	spin_unlock(&ilb2->lock);
  }
  
  static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
  {
  	struct inet_listen_hashbucket *ilb2;
  
  	if (!h->lhash2 ||
  	    WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
  		return;
  
  	ilb2 = inet_lhash2_bucket_sk(h, sk);
  
  	spin_lock(&ilb2->lock);
  	hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
  	ilb2->count--;
  	spin_unlock(&ilb2->lock);
  }
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
222
223
  static inline int compute_score(struct sock *sk, struct net *net,
  				const unsigned short hnum, const __be32 daddr,
34e1ec319   Miaohe Lin   net: ipv4: remove...
224
  				const int dif, const int sdif)
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
225
226
  {
  	int score = -1;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
227

d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
228
  	if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
229
  			!ipv6_only_sock(sk)) {
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
230
231
232
233
  		if (sk->sk_rcv_saddr != daddr)
  			return -1;
  
  		if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
e78190581   Mike Manning   net: ensure unbou...
234
  			return -1;
3fa6f616a   David Ahern   net: ipv4: add se...
235

d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
236
  		score = sk->sk_family == PF_INET ? 2 : 1;
7170a9777   Eric Dumazet   net: annotate acc...
237
  		if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
70da268b5   Eric Dumazet   net: SO_INCOMING_...
238
  			score++;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
239
240
241
  	}
  	return score;
  }
80b373f74   Jakub Sitnicki   inet: Extract hel...
242
243
244
245
246
247
248
249
250
251
252
253
254
255
  static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
  					    struct sk_buff *skb, int doff,
  					    __be32 saddr, __be16 sport,
  					    __be32 daddr, unsigned short hnum)
  {
  	struct sock *reuse_sk = NULL;
  	u32 phash;
  
  	if (sk->sk_reuseport) {
  		phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
  		reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
  	}
  	return reuse_sk;
  }
f3f05f704   Arnaldo Carvalho de Melo   [INET]: Generalis...
256
  /*
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
257
258
   * Here are some nice properties to exploit here. The BSD API
   * does not allow a listening sock to specify the remote port nor the
33b622319   Arnaldo Carvalho de Melo   [INET]: Generalis...
259
260
261
   * remote address for the connection. So always assume those are both
   * wildcarded during the search since they can never be otherwise.
   */
e48c414ee   Arnaldo Carvalho de Melo   [INET]: Generalis...
262

3b24d854c   Eric Dumazet   tcp/dccp: do not ...
263
  /* called with rcu_read_lock() : No refcount taken on the socket */
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
264
265
266
267
268
269
270
  static struct sock *inet_lhash2_lookup(struct net *net,
  				struct inet_listen_hashbucket *ilb2,
  				struct sk_buff *skb, int doff,
  				const __be32 saddr, __be16 sport,
  				const __be32 daddr, const unsigned short hnum,
  				const int dif, const int sdif)
  {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
271
272
273
  	struct inet_connection_sock *icsk;
  	struct sock *sk, *result = NULL;
  	int score, hiscore = 0;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
274
275
276
  
  	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
  		sk = (struct sock *)icsk;
34e1ec319   Miaohe Lin   net: ipv4: remove...
277
  		score = compute_score(sk, net, hnum, daddr, dif, sdif);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
278
  		if (score > hiscore) {
80b373f74   Jakub Sitnicki   inet: Extract hel...
279
280
281
282
  			result = lookup_reuseport(net, sk, skb, doff,
  						  saddr, sport, daddr, hnum);
  			if (result)
  				return result;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
283
284
285
286
287
288
289
  			result = sk;
  			hiscore = score;
  		}
  	}
  
  	return result;
  }
1559b4aa1   Jakub Sitnicki   inet: Run SK_LOOK...
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
  static inline struct sock *inet_lookup_run_bpf(struct net *net,
  					       struct inet_hashinfo *hashinfo,
  					       struct sk_buff *skb, int doff,
  					       __be32 saddr, __be16 sport,
  					       __be32 daddr, u16 hnum)
  {
  	struct sock *sk, *reuse_sk;
  	bool no_reuseport;
  
  	if (hashinfo != &tcp_hashinfo)
  		return NULL; /* only TCP is supported */
  
  	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
  					    saddr, sport, daddr, hnum, &sk);
  	if (no_reuseport || IS_ERR_OR_NULL(sk))
  		return sk;
  
  	reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
  	if (reuse_sk)
  		sk = reuse_sk;
  	return sk;
  }
c67499c0e   Pavel Emelyanov   [NETNS]: Tcp-v4 s...
312
313
  struct sock *__inet_lookup_listener(struct net *net,
  				    struct inet_hashinfo *hashinfo,
a583636a8   Craig Gallek   inet: refactor in...
314
  				    struct sk_buff *skb, int doff,
da5e36308   Tom Herbert   soreuseport: TCP/...
315
  				    const __be32 saddr, __be16 sport,
fb99c848e   Al Viro   [IPV4]: annotate ...
316
  				    const __be32 daddr, const unsigned short hnum,
3fa6f616a   David Ahern   net: ipv4: add se...
317
  				    const int dif, const int sdif)
99a92ff50   Herbert Xu   [IPV4]: Uninline ...
318
  {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
319
  	struct inet_listen_hashbucket *ilb2;
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
320
  	struct sock *result = NULL;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
321
  	unsigned int hash2;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
322

1559b4aa1   Jakub Sitnicki   inet: Run SK_LOOK...
323
324
325
326
327
328
329
  	/* Lookup redirect from BPF */
  	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
  		result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
  					     saddr, sport, daddr, hnum);
  		if (result)
  			goto done;
  	}
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
330
331
  	hash2 = ipv4_portaddr_hash(net, daddr, hnum);
  	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
332
333
334
335
336
  
  	result = inet_lhash2_lookup(net, ilb2, skb, doff,
  				    saddr, sport, daddr, hnum,
  				    dif, sdif);
  	if (result)
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
337
  		goto done;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
338
339
  
  	/* Lookup lhash2 with INADDR_ANY */
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
340
341
  	hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
  	ilb2 = inet_lhash2_bucket(hashinfo, hash2);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
342

8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
343
  	result = inet_lhash2_lookup(net, ilb2, skb, doff,
d9fbc7f64   Peter Oskolkov   net: tcp: prefer ...
344
  				    saddr, sport, htonl(INADDR_ANY), hnum,
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
345
  				    dif, sdif);
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
346
  done:
88e235b80   Enrico Weigelt   net: ipv4: drop u...
347
  	if (IS_ERR(result))
8217ca653   Martin KaFai Lau   bpf: Enable BPF_P...
348
  		return NULL;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
349
  	return result;
99a92ff50   Herbert Xu   [IPV4]: Uninline ...
350
  }
8f491069b   Herbert Xu   [IPV4]: Use netwo...
351
  EXPORT_SYMBOL_GPL(__inet_lookup_listener);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
352

05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
353
354
355
  /* All sockets share common refcount, but have different destructors */
  void sock_gen_put(struct sock *sk)
  {
41c6d650f   Reshetova, Elena   net: convert sock...
356
  	if (!refcount_dec_and_test(&sk->sk_refcnt))
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
357
358
359
360
  		return;
  
  	if (sk->sk_state == TCP_TIME_WAIT)
  		inet_twsk_free(inet_twsk(sk));
41b822c59   Eric Dumazet   inet: prepare soc...
361
362
  	else if (sk->sk_state == TCP_NEW_SYN_RECV)
  		reqsk_free(inet_reqsk(sk));
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
363
364
365
366
  	else
  		sk_free(sk);
  }
  EXPORT_SYMBOL_GPL(sock_gen_put);
2c13270b4   Eric Dumazet   inet: factorize s...
367
368
369
370
371
  void sock_edemux(struct sk_buff *skb)
  {
  	sock_gen_put(skb->sk);
  }
  EXPORT_SYMBOL(sock_edemux);
5e73ea1a3   Daniel Baluta   ipv4: fix checkpa...
372
  struct sock *__inet_lookup_established(struct net *net,
c67499c0e   Pavel Emelyanov   [NETNS]: Tcp-v4 s...
373
  				  struct inet_hashinfo *hashinfo,
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
374
375
  				  const __be32 saddr, const __be16 sport,
  				  const __be32 daddr, const u16 hnum,
3fa6f616a   David Ahern   net: ipv4: add se...
376
  				  const int dif, const int sdif)
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
377
  {
c72283174   Joe Perches   net: Use a more s...
378
  	INET_ADDR_COOKIE(acookie, saddr, daddr);
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
379
380
  	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
  	struct sock *sk;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
381
  	const struct hlist_nulls_node *node;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
382
383
384
  	/* Optimize here for direct hit, only listening connections can
  	 * have wildcards anyways.
  	 */
9f26b3add   Pavel Emelyanov   inet: add struct ...
385
  	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
f373b53b5   Eric Dumazet   tcp: replace ehas...
386
  	unsigned int slot = hash & hashinfo->ehash_mask;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
387
  	struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
388

3ab5aee7f   Eric Dumazet   net: Convert TCP ...
389
390
  begin:
  	sk_nulls_for_each_rcu(sk, node, &head->chain) {
ce43b03e8   Eric Dumazet   net: move inet_dp...
391
392
393
  		if (sk->sk_hash != hash)
  			continue;
  		if (likely(INET_MATCH(sk, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
394
  				      saddr, daddr, ports, dif, sdif))) {
41c6d650f   Reshetova, Elena   net: convert sock...
395
  			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
396
  				goto out;
ce43b03e8   Eric Dumazet   net: move inet_dp...
397
  			if (unlikely(!INET_MATCH(sk, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
398
399
  						 saddr, daddr, ports,
  						 dif, sdif))) {
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
400
  				sock_gen_put(sk);
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
401
402
  				goto begin;
  			}
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
403
  			goto found;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
404
  		}
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
405
  	}
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
406
407
408
409
410
411
412
  	/*
  	 * if the nulls value we got at the end of this lookup is
  	 * not the expected one, we must restart lookup.
  	 * We probably met an item that was moved to another chain.
  	 */
  	if (get_nulls_value(node) != slot)
  		goto begin;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
413
  out:
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
414
415
  	sk = NULL;
  found:
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
416
  	return sk;
77a5ba55d   Pavel Emelyanov   [INET]: Uninline ...
417
418
  }
  EXPORT_SYMBOL_GPL(__inet_lookup_established);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
419
420
421
422
423
424
425
  /* called with local bh disabled */
  static int __inet_check_established(struct inet_timewait_death_row *death_row,
  				    struct sock *sk, __u16 lport,
  				    struct inet_timewait_sock **twp)
  {
  	struct inet_hashinfo *hinfo = death_row->hashinfo;
  	struct inet_sock *inet = inet_sk(sk);
c720c7e83   Eric Dumazet   inet: rename some...
426
427
  	__be32 daddr = inet->inet_rcv_saddr;
  	__be32 saddr = inet->inet_daddr;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
428
  	int dif = sk->sk_bound_dev_if;
3fa6f616a   David Ahern   net: ipv4: add se...
429
430
  	struct net *net = sock_net(sk);
  	int sdif = l3mdev_master_ifindex_by_index(net, dif);
c72283174   Joe Perches   net: Use a more s...
431
  	INET_ADDR_COOKIE(acookie, saddr, daddr);
c720c7e83   Eric Dumazet   inet: rename some...
432
  	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
c720c7e83   Eric Dumazet   inet: rename some...
433
434
  	unsigned int hash = inet_ehashfn(net, daddr, lport,
  					 saddr, inet->inet_dport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
435
  	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
9db66bdcc   Eric Dumazet   net: convert TCP/...
436
  	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
437
  	struct sock *sk2;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
438
  	const struct hlist_nulls_node *node;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
439
  	struct inet_timewait_sock *tw = NULL;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
440

9db66bdcc   Eric Dumazet   net: convert TCP/...
441
  	spin_lock(lock);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
442

3ab5aee7f   Eric Dumazet   net: Convert TCP ...
443
  	sk_nulls_for_each(sk2, node, &head->chain) {
ce43b03e8   Eric Dumazet   net: move inet_dp...
444
445
  		if (sk2->sk_hash != hash)
  			continue;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
446

ce43b03e8   Eric Dumazet   net: move inet_dp...
447
  		if (likely(INET_MATCH(sk2, net, acookie,
3fa6f616a   David Ahern   net: ipv4: add se...
448
  					 saddr, daddr, ports, dif, sdif))) {
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
449
450
451
452
453
  			if (sk2->sk_state == TCP_TIME_WAIT) {
  				tw = inet_twsk(sk2);
  				if (twsk_unique(sk, sk2, twp))
  					break;
  			}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
454
  			goto not_unique;
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
455
  		}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
456
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
457
  	/* Must record num and sport now. Otherwise we will see
05dbc7b59   Eric Dumazet   tcp/dccp: remove ...
458
459
  	 * in hash table socket with a funny identity.
  	 */
c720c7e83   Eric Dumazet   inet: rename some...
460
461
  	inet->inet_num = lport;
  	inet->inet_sport = htons(lport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
462
  	sk->sk_hash = hash;
547b792ca   Ilpo Järvinen   net: convert BUG_...
463
  	WARN_ON(!sk_unhashed(sk));
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
464
  	__sk_nulls_add_node_rcu(sk, &head->chain);
13475a30b   Eric Dumazet   tcp: connect() ra...
465
  	if (tw) {
fc01538f9   Eric Dumazet   inet: simplify ti...
466
  		sk_nulls_del_node_init_rcu((struct sock *)tw);
02a1d6e7a   Eric Dumazet   net: rename NET_{...
467
  		__NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
13475a30b   Eric Dumazet   tcp: connect() ra...
468
  	}
9db66bdcc   Eric Dumazet   net: convert TCP/...
469
  	spin_unlock(lock);
c29a0bc4d   Pavel Emelyanov   [SOCK][NETNS]: Ad...
470
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
471
472
473
  
  	if (twp) {
  		*twp = tw;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
474
475
  	} else if (tw) {
  		/* Silly. Should hash-dance instead... */
dbe7faa40   Eric Dumazet   inet: inet_twsk_d...
476
  		inet_twsk_deschedule_put(tw);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
477
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
478
479
480
  	return 0;
  
  not_unique:
9db66bdcc   Eric Dumazet   net: convert TCP/...
481
  	spin_unlock(lock);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
482
483
  	return -EADDRNOTAVAIL;
  }
e2baad9e4   Eric Dumazet   tcp: connect() fr...
484
  static u32 inet_sk_port_offset(const struct sock *sk)
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
485
486
  {
  	const struct inet_sock *inet = inet_sk(sk);
e2baad9e4   Eric Dumazet   tcp: connect() fr...
487

c720c7e83   Eric Dumazet   inet: rename some...
488
489
490
  	return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
  					  inet->inet_daddr,
  					  inet->inet_dport);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
491
  }
01770a166   Ricardo Dias   tcp: fix race con...
492
493
  /* Searches for an exsiting socket in the ehash bucket list.
   * Returns true if found, false otherwise.
079096f10   Eric Dumazet   tcp/dccp: install...
494
   */
01770a166   Ricardo Dias   tcp: fix race con...
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
  static bool inet_ehash_lookup_by_sk(struct sock *sk,
  				    struct hlist_nulls_head *list)
  {
  	const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
  	const int sdif = sk->sk_bound_dev_if;
  	const int dif = sk->sk_bound_dev_if;
  	const struct hlist_nulls_node *node;
  	struct net *net = sock_net(sk);
  	struct sock *esk;
  
  	INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
  
  	sk_nulls_for_each_rcu(esk, node, list) {
  		if (esk->sk_hash != sk->sk_hash)
  			continue;
  		if (sk->sk_family == AF_INET) {
  			if (unlikely(INET_MATCH(esk, net, acookie,
  						sk->sk_daddr,
  						sk->sk_rcv_saddr,
  						ports, dif, sdif))) {
  				return true;
  			}
  		}
  #if IS_ENABLED(CONFIG_IPV6)
  		else if (sk->sk_family == AF_INET6) {
  			if (unlikely(INET6_MATCH(esk, net,
  						 &sk->sk_v6_daddr,
  						 &sk->sk_v6_rcv_saddr,
  						 ports, dif, sdif))) {
  				return true;
  			}
  		}
  #endif
  	}
  	return false;
  }
  
  /* Insert a socket into ehash, and eventually remove another one
   * (The another one can be a SYN_RECV or TIMEWAIT)
   * If an existing socket already exists, socket sk is not inserted,
   * and sets found_dup_sk parameter to true.
   */
  bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
152da81de   Pavel Emelyanov   [INET]: Uninline ...
538
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
539
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
3ab5aee7f   Eric Dumazet   net: Convert TCP ...
540
  	struct hlist_nulls_head *list;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
541
  	struct inet_ehash_bucket *head;
5b441f76f   Eric Dumazet   net: introduce sk...
542
  	spinlock_t *lock;
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
543
  	bool ret = true;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
544

079096f10   Eric Dumazet   tcp/dccp: install...
545
  	WARN_ON_ONCE(!sk_unhashed(sk));
152da81de   Pavel Emelyanov   [INET]: Uninline ...
546

5b441f76f   Eric Dumazet   net: introduce sk...
547
  	sk->sk_hash = sk_ehashfn(sk);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
548
549
550
  	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
  	list = &head->chain;
  	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
9db66bdcc   Eric Dumazet   net: convert TCP/...
551
  	spin_lock(lock);
fc01538f9   Eric Dumazet   inet: simplify ti...
552
  	if (osk) {
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
553
554
  		WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
  		ret = sk_nulls_del_node_init_rcu(osk);
01770a166   Ricardo Dias   tcp: fix race con...
555
556
557
558
  	} else if (found_dup_sk) {
  		*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
  		if (*found_dup_sk)
  			ret = false;
9327f7053   Eric Dumazet   tcp: Fix a connec...
559
  	}
01770a166   Ricardo Dias   tcp: fix race con...
560

5e0724d02   Eric Dumazet   tcp/dccp: fix has...
561
562
  	if (ret)
  		__sk_nulls_add_node_rcu(sk, list);
01770a166   Ricardo Dias   tcp: fix race con...
563

9db66bdcc   Eric Dumazet   net: convert TCP/...
564
  	spin_unlock(lock);
01770a166   Ricardo Dias   tcp: fix race con...
565

079096f10   Eric Dumazet   tcp/dccp: install...
566
567
  	return ret;
  }
01770a166   Ricardo Dias   tcp: fix race con...
568
  bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
079096f10   Eric Dumazet   tcp/dccp: install...
569
  {
01770a166   Ricardo Dias   tcp: fix race con...
570
  	bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
571
572
573
574
575
  
  	if (ok) {
  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  	} else {
  		percpu_counter_inc(sk->sk_prot->orphan_count);
563e0bb0d   Yafang Shao   net: tracepoint: ...
576
  		inet_sk_set_state(sk, TCP_CLOSE);
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
577
578
579
580
  		sock_set_flag(sk, SOCK_DEAD);
  		inet_csk_destroy_sock(sk);
  	}
  	return ok;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
581
  }
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
582
  EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
583

c125e80b8   Craig Gallek   soreuseport: fast...
584
  static int inet_reuseport_add_sock(struct sock *sk,
fe38d2a1c   Josef Bacik   inet: collapse ip...
585
  				   struct inet_listen_hashbucket *ilb)
c125e80b8   Craig Gallek   soreuseport: fast...
586
  {
90e5d0db2   Craig Gallek   soreuseport: Fix ...
587
  	struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
588
  	const struct hlist_nulls_node *node;
c125e80b8   Craig Gallek   soreuseport: fast...
589
  	struct sock *sk2;
c125e80b8   Craig Gallek   soreuseport: fast...
590
  	kuid_t uid = sock_i_uid(sk);
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
591
  	sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
c125e80b8   Craig Gallek   soreuseport: fast...
592
593
594
595
  		if (sk2 != sk &&
  		    sk2->sk_family == sk->sk_family &&
  		    ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
  		    sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
90e5d0db2   Craig Gallek   soreuseport: Fix ...
596
  		    inet_csk(sk2)->icsk_bind_hash == tb &&
c125e80b8   Craig Gallek   soreuseport: fast...
597
  		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
fe38d2a1c   Josef Bacik   inet: collapse ip...
598
  		    inet_rcv_saddr_equal(sk, sk2, false))
2dbb9b9e6   Martin KaFai Lau   bpf: Introduce BP...
599
600
  			return reuseport_add_sock(sk, sk2,
  						  inet_rcv_saddr_any(sk));
c125e80b8   Craig Gallek   soreuseport: fast...
601
  	}
2dbb9b9e6   Martin KaFai Lau   bpf: Introduce BP...
602
  	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
c125e80b8   Craig Gallek   soreuseport: fast...
603
  }
fe38d2a1c   Josef Bacik   inet: collapse ip...
604
  int __inet_hash(struct sock *sk, struct sock *osk)
152da81de   Pavel Emelyanov   [INET]: Uninline ...
605
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
606
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
5caea4ea7   Eric Dumazet   net: listening_ha...
607
  	struct inet_listen_hashbucket *ilb;
c125e80b8   Craig Gallek   soreuseport: fast...
608
  	int err = 0;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
609

5e0724d02   Eric Dumazet   tcp/dccp: fix has...
610
  	if (sk->sk_state != TCP_LISTEN) {
01770a166   Ricardo Dias   tcp: fix race con...
611
  		inet_ehash_nolisten(sk, osk, NULL);
c125e80b8   Craig Gallek   soreuseport: fast...
612
  		return 0;
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
613
  	}
547b792ca   Ilpo Järvinen   net: convert BUG_...
614
  	WARN_ON(!sk_unhashed(sk));
5caea4ea7   Eric Dumazet   net: listening_ha...
615
  	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
152da81de   Pavel Emelyanov   [INET]: Uninline ...
616

5caea4ea7   Eric Dumazet   net: listening_ha...
617
  	spin_lock(&ilb->lock);
c125e80b8   Craig Gallek   soreuseport: fast...
618
  	if (sk->sk_reuseport) {
fe38d2a1c   Josef Bacik   inet: collapse ip...
619
  		err = inet_reuseport_add_sock(sk, ilb);
c125e80b8   Craig Gallek   soreuseport: fast...
620
621
622
  		if (err)
  			goto unlock;
  	}
d296ba60d   Craig Gallek   soreuseport: Reso...
623
624
  	if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
  		sk->sk_family == AF_INET6)
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
625
  		__sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
d296ba60d   Craig Gallek   soreuseport: Reso...
626
  	else
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
627
  		__sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
628
  	inet_hash2(hashinfo, sk);
76d013b20   Martin KaFai Lau   inet: Add a count...
629
  	ilb->count++;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
630
  	sock_set_flag(sk, SOCK_RCU_FREE);
c29a0bc4d   Pavel Emelyanov   [SOCK][NETNS]: Ad...
631
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
c125e80b8   Craig Gallek   soreuseport: fast...
632
  unlock:
5caea4ea7   Eric Dumazet   net: listening_ha...
633
  	spin_unlock(&ilb->lock);
c125e80b8   Craig Gallek   soreuseport: fast...
634
635
  
  	return err;
152da81de   Pavel Emelyanov   [INET]: Uninline ...
636
  }
77a6a471b   Eric Dumazet   ipv6: get rid of ...
637
  EXPORT_SYMBOL(__inet_hash);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
638

086c653f5   Craig Gallek   sock: struct prot...
639
  int inet_hash(struct sock *sk)
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
640
  {
c125e80b8   Craig Gallek   soreuseport: fast...
641
  	int err = 0;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
642
643
  	if (sk->sk_state != TCP_CLOSE) {
  		local_bh_disable();
fe38d2a1c   Josef Bacik   inet: collapse ip...
644
  		err = __inet_hash(sk, NULL);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
645
646
  		local_bh_enable();
  	}
086c653f5   Craig Gallek   sock: struct prot...
647

c125e80b8   Craig Gallek   soreuseport: fast...
648
  	return err;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
649
650
651
652
653
  }
  EXPORT_SYMBOL_GPL(inet_hash);
  
  void inet_unhash(struct sock *sk)
  {
39d8cda76   Pavel Emelyanov   [SOCK]: Add udp_h...
654
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
0ba987181   Geert Uytterhoeven   inet: Avoid uniti...
655
  	struct inet_listen_hashbucket *ilb = NULL;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
656
  	spinlock_t *lock;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
657
658
  
  	if (sk_unhashed(sk))
5caea4ea7   Eric Dumazet   net: listening_ha...
659
  		return;
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
660

3b24d854c   Eric Dumazet   tcp/dccp: do not ...
661
  	if (sk->sk_state == TCP_LISTEN) {
76d013b20   Martin KaFai Lau   inet: Add a count...
662
663
  		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
  		lock = &ilb->lock;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
664
  	} else {
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
665
  		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
666
  	}
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
667
  	spin_lock_bh(lock);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
668
669
  	if (sk_unhashed(sk))
  		goto unlock;
c125e80b8   Craig Gallek   soreuseport: fast...
670
671
  	if (rcu_access_pointer(sk->sk_reuseport_cb))
  		reuseport_detach_sock(sk);
0ba987181   Geert Uytterhoeven   inet: Avoid uniti...
672
  	if (ilb) {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
673
  		inet_unhash2(hashinfo, sk);
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
674
  		ilb->count--;
76d013b20   Martin KaFai Lau   inet: Add a count...
675
  	}
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
676
  	__sk_nulls_del_node_init_rcu(sk);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
677
678
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  unlock:
920de804b   Eric Dumazet   net: Make sure BH...
679
  	spin_unlock_bh(lock);
ab1e0a13d   Arnaldo Carvalho de Melo   [SOCK] proto: Add...
680
681
  }
  EXPORT_SYMBOL_GPL(inet_unhash);
152da81de   Pavel Emelyanov   [INET]: Uninline ...
682

5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
683
  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
5d8c0aa94   Pavel Emelyanov   [INET]: Fix accid...
684
  		struct sock *sk, u32 port_offset,
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
685
  		int (*check_established)(struct inet_timewait_death_row *,
b4d6444ea   Eric Dumazet   inet: get rid of ...
686
  			struct sock *, __u16, struct inet_timewait_sock **))
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
687
688
  {
  	struct inet_hashinfo *hinfo = death_row->hashinfo;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
689
  	struct inet_timewait_sock *tw = NULL;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
690
  	struct inet_bind_hashbucket *head;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
691
  	int port = inet_sk(sk)->inet_num;
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
692
  	struct net *net = sock_net(sk);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
693
694
695
696
  	struct inet_bind_bucket *tb;
  	u32 remaining, offset;
  	int ret, i, low, high;
  	static u32 hint;
3c82a21f4   Robert Shearman   net: allow bindin...
697
  	int l3mdev;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
698
699
700
701
702
703
704
  
  	if (port) {
  		head = &hinfo->bhash[inet_bhashfn(net, port,
  						  hinfo->bhash_size)];
  		tb = inet_csk(sk)->icsk_bind_hash;
  		spin_lock_bh(&head->lock);
  		if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
01770a166   Ricardo Dias   tcp: fix race con...
705
  			inet_ehash_nolisten(sk, NULL, NULL);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
706
707
708
709
710
711
712
713
714
  			spin_unlock_bh(&head->lock);
  			return 0;
  		}
  		spin_unlock(&head->lock);
  		/* No definite answer... Walk to established hash table */
  		ret = check_established(death_row, sk, port, NULL);
  		local_bh_enable();
  		return ret;
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
715

3c82a21f4   Robert Shearman   net: allow bindin...
716
  	l3mdev = inet_sk_bound_l3mdev(sk);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
717
718
719
720
721
  	inet_get_local_port_range(net, &low, &high);
  	high++; /* [32768, 60999] -> [32768, 61000[ */
  	remaining = high - low;
  	if (likely(remaining > 1))
  		remaining &= ~1U;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
722

1580ab63f   Eric Dumazet   tcp/dccp: better ...
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
  	offset = (hint + port_offset) % remaining;
  	/* In first pass we try ports of @low parity.
  	 * inet_csk_get_port() does the opposite choice.
  	 */
  	offset &= ~1U;
  other_parity_scan:
  	port = low + offset;
  	for (i = 0; i < remaining; i += 2, port += 2) {
  		if (unlikely(port >= high))
  			port -= remaining;
  		if (inet_is_local_reserved_port(net, port))
  			continue;
  		head = &hinfo->bhash[inet_bhashfn(net, port,
  						  hinfo->bhash_size)];
  		spin_lock_bh(&head->lock);
227b60f51   Stephen Hemminger   [INET]: local por...
738

1580ab63f   Eric Dumazet   tcp/dccp: better ...
739
740
  		/* Does not bother with rcv_saddr checks, because
  		 * the established check is already unique enough.
07f4c9006   Eric Dumazet   tcp/dccp: try to ...
741
  		 */
1580ab63f   Eric Dumazet   tcp/dccp: better ...
742
  		inet_bind_bucket_for_each(tb, &head->chain) {
3c82a21f4   Robert Shearman   net: allow bindin...
743
744
  			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
  			    tb->port == port) {
1580ab63f   Eric Dumazet   tcp/dccp: better ...
745
746
  				if (tb->fastreuse >= 0 ||
  				    tb->fastreuseport >= 0)
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
747
  					goto next_port;
1580ab63f   Eric Dumazet   tcp/dccp: better ...
748
749
750
751
752
  				WARN_ON(hlist_empty(&tb->owners));
  				if (!check_established(death_row, sk,
  						       port, &tw))
  					goto ok;
  				goto next_port;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
753
  			}
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
754
  		}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
755

1580ab63f   Eric Dumazet   tcp/dccp: better ...
756
  		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
3c82a21f4   Robert Shearman   net: allow bindin...
757
  					     net, head, port, l3mdev);
1580ab63f   Eric Dumazet   tcp/dccp: better ...
758
759
760
  		if (!tb) {
  			spin_unlock_bh(&head->lock);
  			return -ENOMEM;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
761
  		}
1580ab63f   Eric Dumazet   tcp/dccp: better ...
762
763
764
765
766
767
768
  		tb->fastreuse = -1;
  		tb->fastreuseport = -1;
  		goto ok;
  next_port:
  		spin_unlock_bh(&head->lock);
  		cond_resched();
  	}
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
769

1580ab63f   Eric Dumazet   tcp/dccp: better ...
770
771
772
  	offset++;
  	if ((offset & 1) && remaining > 1)
  		goto other_parity_scan;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
773

1580ab63f   Eric Dumazet   tcp/dccp: better ...
774
  	return -EADDRNOTAVAIL;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
775

1580ab63f   Eric Dumazet   tcp/dccp: better ...
776
777
778
779
780
781
782
  ok:
  	hint += i + 2;
  
  	/* Head lock still held and bh's disabled */
  	inet_bind_hash(sk, tb, port);
  	if (sk_unhashed(sk)) {
  		inet_sk(sk)->inet_sport = htons(port);
01770a166   Ricardo Dias   tcp: fix race con...
783
  		inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
784
  	}
1580ab63f   Eric Dumazet   tcp/dccp: better ...
785
786
787
788
789
790
791
  	if (tw)
  		inet_twsk_bind_unhash(tw, hinfo);
  	spin_unlock(&head->lock);
  	if (tw)
  		inet_twsk_deschedule_put(tw);
  	local_bh_enable();
  	return 0;
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
792
  }
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
793
794
795
796
797
798
799
  
  /*
   * Bind a port for a connect operation and hash it.
   */
  int inet_hash_connect(struct inet_timewait_death_row *death_row,
  		      struct sock *sk)
  {
e2baad9e4   Eric Dumazet   tcp: connect() fr...
800
801
802
803
804
  	u32 port_offset = 0;
  
  	if (!inet_sk(sk)->inet_num)
  		port_offset = inet_sk_port_offset(sk);
  	return __inet_hash_connect(death_row, sk, port_offset,
b4d6444ea   Eric Dumazet   inet: get rid of ...
805
  				   __inet_check_established);
5ee31fc1e   Pavel Emelyanov   [INET]: Consolida...
806
  }
a7f5e7f16   Arnaldo Carvalho de Melo   [INET]: Generalis...
807
  EXPORT_SYMBOL_GPL(inet_hash_connect);
5caea4ea7   Eric Dumazet   net: listening_ha...
808
809
810
811
  
  void inet_hashinfo_init(struct inet_hashinfo *h)
  {
  	int i;
c25eb3bfb   Eric Dumazet   net: Convert TCP/...
812
  	for (i = 0; i < INET_LHTABLE_SIZE; i++) {
5caea4ea7   Eric Dumazet   net: listening_ha...
813
  		spin_lock_init(&h->listening_hash[i].lock);
8dbd76e79   Eric Dumazet   tcp/dccp: fix pos...
814
815
  		INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
  				      i + LISTENING_NULLS_BASE);
76d013b20   Martin KaFai Lau   inet: Add a count...
816
  		h->listening_hash[i].count = 0;
3b24d854c   Eric Dumazet   tcp/dccp: do not ...
817
  	}
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
818
819
  
  	h->lhash2 = NULL;
5caea4ea7   Eric Dumazet   net: listening_ha...
820
  }
5caea4ea7   Eric Dumazet   net: listening_ha...
821
  EXPORT_SYMBOL_GPL(inet_hashinfo_init);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
822

c92c81df9   Peter Oskolkov   net: dccp: fix ke...
823
824
825
826
827
828
829
830
831
832
  static void init_hashinfo_lhash2(struct inet_hashinfo *h)
  {
  	int i;
  
  	for (i = 0; i <= h->lhash2_mask; i++) {
  		spin_lock_init(&h->lhash2[i].lock);
  		INIT_HLIST_HEAD(&h->lhash2[i].head);
  		h->lhash2[i].count = 0;
  	}
  }
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
833
834
835
836
837
  void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
  				unsigned long numentries, int scale,
  				unsigned long low_limit,
  				unsigned long high_limit)
  {
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
838
839
840
841
842
843
844
845
846
  	h->lhash2 = alloc_large_system_hash(name,
  					    sizeof(*h->lhash2),
  					    numentries,
  					    scale,
  					    0,
  					    NULL,
  					    &h->lhash2_mask,
  					    low_limit,
  					    high_limit);
c92c81df9   Peter Oskolkov   net: dccp: fix ke...
847
848
  	init_hashinfo_lhash2(h);
  }
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
849

c92c81df9   Peter Oskolkov   net: dccp: fix ke...
850
851
852
853
854
855
856
857
858
859
860
861
  int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
  {
  	h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
  	if (!h->lhash2)
  		return -ENOMEM;
  
  	h->lhash2_mask = INET_LHTABLE_SIZE - 1;
  	/* INET_LHTABLE_SIZE must be a power of 2 */
  	BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
  
  	init_hashinfo_lhash2(h);
  	return 0;
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
862
  }
c92c81df9   Peter Oskolkov   net: dccp: fix ke...
863
  EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
61b7c691c   Martin KaFai Lau   inet: Add a 2nd l...
864

095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
865
866
  int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
  {
89e478a2a   Eric Dumazet   tcp: suppress a d...
867
  	unsigned int locksz = sizeof(spinlock_t);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
868
  	unsigned int i, nblocks = 1;
89e478a2a   Eric Dumazet   tcp: suppress a d...
869
  	if (locksz != 0) {
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
870
  		/* allocate 2 cache lines or at least one spinlock per cpu */
89e478a2a   Eric Dumazet   tcp: suppress a d...
871
  		nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
872
873
874
875
  		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
  
  		/* no more locks than number of hash buckets */
  		nblocks = min(nblocks, hashinfo->ehash_mask + 1);
752ade68c   Michal Hocko   treewide: use kv[...
876
  		hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
095dc8e0c   Eric Dumazet   tcp: fix/cleanup ...
877
878
879
880
881
882
883
884
885
886
  		if (!hashinfo->ehash_locks)
  			return -ENOMEM;
  
  		for (i = 0; i < nblocks; i++)
  			spin_lock_init(&hashinfo->ehash_locks[i]);
  	}
  	hashinfo->ehash_locks_mask = nblocks - 1;
  	return 0;
  }
  EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);