Blame view

net/ipv4/tcp_fastopen.c 15.8 KB
b24413180   Greg Kroah-Hartman   License cleanup: ...
1
  // SPDX-License-Identifier: GPL-2.0
cf80e0e47   Herbert Xu   tcp: Use ahash
2
  #include <linux/crypto.h>
104671636   Jerry Chu   tcp: TCP Fast Ope...
3
  #include <linux/err.h>
2100c8d2d   Yuchung Cheng   net-tcp: Fast Ope...
4
5
  #include <linux/init.h>
  #include <linux/kernel.h>
104671636   Jerry Chu   tcp: TCP Fast Ope...
6
7
8
9
10
11
  #include <linux/list.h>
  #include <linux/tcp.h>
  #include <linux/rcupdate.h>
  #include <linux/rculist.h>
  #include <net/inetpeer.h>
  #include <net/tcp.h>
2100c8d2d   Yuchung Cheng   net-tcp: Fast Ope...
12

437138485   Haishuang Yan   ipv4: Namespaceif...
13
  void tcp_fastopen_init_key_once(struct net *net)
222e83d2e   Hannes Frederic Sowa   tcp: switch tcp_f...
14
  {
437138485   Haishuang Yan   ipv4: Namespaceif...
15
16
17
18
19
20
21
22
23
24
  	u8 key[TCP_FASTOPEN_KEY_LENGTH];
  	struct tcp_fastopen_context *ctxt;
  
  	rcu_read_lock();
  	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
  	if (ctxt) {
  		rcu_read_unlock();
  		return;
  	}
  	rcu_read_unlock();
222e83d2e   Hannes Frederic Sowa   tcp: switch tcp_f...
25
26
27
28
29
30
31
  
  	/* tcp_fastopen_reset_cipher publishes the new context
  	 * atomically, so we allow this race happening here.
  	 *
  	 * All call sites of tcp_fastopen_cookie_gen also check
  	 * for a valid cookie, so this is an acceptable risk.
  	 */
437138485   Haishuang Yan   ipv4: Namespaceif...
32
  	get_random_bytes(key, sizeof(key));
438ac8800   Ard Biesheuvel   net: fastopen: ro...
33
  	tcp_fastopen_reset_cipher(net, NULL, key, NULL);
222e83d2e   Hannes Frederic Sowa   tcp: switch tcp_f...
34
  }
104671636   Jerry Chu   tcp: TCP Fast Ope...
35
36
37
38
  static void tcp_fastopen_ctx_free(struct rcu_head *head)
  {
  	struct tcp_fastopen_context *ctx =
  	    container_of(head, struct tcp_fastopen_context, rcu);
9092a76d3   Jason Baron   tcp: add backup T...
39

c681edae3   Ard Biesheuvel   net: ipv4: move t...
40
  	kzfree(ctx);
104671636   Jerry Chu   tcp: TCP Fast Ope...
41
  }
1fba70e5b   Yuchung Cheng   tcp: socket optio...
42
43
44
45
46
47
48
49
50
  void tcp_fastopen_destroy_cipher(struct sock *sk)
  {
  	struct tcp_fastopen_context *ctx;
  
  	ctx = rcu_dereference_protected(
  			inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
  	if (ctx)
  		call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
  }
437138485   Haishuang Yan   ipv4: Namespaceif...
51
52
53
54
55
56
57
58
59
60
61
62
63
64
  void tcp_fastopen_ctx_destroy(struct net *net)
  {
  	struct tcp_fastopen_context *ctxt;
  
  	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
  
  	ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
  				lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
  	rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
  	spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
  
  	if (ctxt)
  		call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
  }
1fba70e5b   Yuchung Cheng   tcp: socket optio...
65
  int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
438ac8800   Ard Biesheuvel   net: fastopen: ro...
66
  			      void *primary_key, void *backup_key)
104671636   Jerry Chu   tcp: TCP Fast Ope...
67
  {
104671636   Jerry Chu   tcp: TCP Fast Ope...
68
  	struct tcp_fastopen_context *ctx, *octx;
1fba70e5b   Yuchung Cheng   tcp: socket optio...
69
  	struct fastopen_queue *q;
9092a76d3   Jason Baron   tcp: add backup T...
70
  	int err = 0;
104671636   Jerry Chu   tcp: TCP Fast Ope...
71

c681edae3   Ard Biesheuvel   net: ipv4: move t...
72
73
74
  	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  	if (!ctx) {
  		err = -ENOMEM;
9092a76d3   Jason Baron   tcp: add backup T...
75
  		goto out;
104671636   Jerry Chu   tcp: TCP Fast Ope...
76
  	}
c681edae3   Ard Biesheuvel   net: ipv4: move t...
77

438ac8800   Ard Biesheuvel   net: fastopen: ro...
78
79
  	ctx->key[0].key[0] = get_unaligned_le64(primary_key);
  	ctx->key[0].key[1] = get_unaligned_le64(primary_key + 8);
c681edae3   Ard Biesheuvel   net: ipv4: move t...
80
  	if (backup_key) {
438ac8800   Ard Biesheuvel   net: fastopen: ro...
81
82
  		ctx->key[1].key[0] = get_unaligned_le64(backup_key);
  		ctx->key[1].key[1] = get_unaligned_le64(backup_key + 8);
c681edae3   Ard Biesheuvel   net: ipv4: move t...
83
84
85
86
  		ctx->num = 2;
  	} else {
  		ctx->num = 1;
  	}
9eba93533   Eric Dumazet   tcp: fix a lockde...
87
  	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
1fba70e5b   Yuchung Cheng   tcp: socket optio...
88
89
  	if (sk) {
  		q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
1fba70e5b   Yuchung Cheng   tcp: socket optio...
90
  		octx = rcu_dereference_protected(q->ctx,
9eba93533   Eric Dumazet   tcp: fix a lockde...
91
  			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
1fba70e5b   Yuchung Cheng   tcp: socket optio...
92
  		rcu_assign_pointer(q->ctx, ctx);
1fba70e5b   Yuchung Cheng   tcp: socket optio...
93
  	} else {
1fba70e5b   Yuchung Cheng   tcp: socket optio...
94
95
96
  		octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
  			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
  		rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
1fba70e5b   Yuchung Cheng   tcp: socket optio...
97
  	}
9eba93533   Eric Dumazet   tcp: fix a lockde...
98
  	spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
104671636   Jerry Chu   tcp: TCP Fast Ope...
99
100
101
  
  	if (octx)
  		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
9092a76d3   Jason Baron   tcp: add backup T...
102
  out:
104671636   Jerry Chu   tcp: TCP Fast Ope...
103
104
  	return err;
  }
483642e5e   Christoph Paasch   tcp: introduce __...
105
106
  static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
  					     struct sk_buff *syn,
438ac8800   Ard Biesheuvel   net: fastopen: ro...
107
  					     const siphash_key_t *key,
483642e5e   Christoph Paasch   tcp: introduce __...
108
  					     struct tcp_fastopen_cookie *foc)
104671636   Jerry Chu   tcp: TCP Fast Ope...
109
  {
c681edae3   Ard Biesheuvel   net: ipv4: move t...
110
  	BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64));
483642e5e   Christoph Paasch   tcp: introduce __...
111
112
  	if (req->rsk_ops->family == AF_INET) {
  		const struct iphdr *iph = ip_hdr(syn);
1fba70e5b   Yuchung Cheng   tcp: socket optio...
113

438ac8800   Ard Biesheuvel   net: fastopen: ro...
114
115
116
117
  		foc->val[0] = cpu_to_le64(siphash(&iph->saddr,
  					  sizeof(iph->saddr) +
  					  sizeof(iph->daddr),
  					  key));
483642e5e   Christoph Paasch   tcp: introduce __...
118
119
120
  		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
  		return true;
  	}
483642e5e   Christoph Paasch   tcp: introduce __...
121
122
123
  #if IS_ENABLED(CONFIG_IPV6)
  	if (req->rsk_ops->family == AF_INET6) {
  		const struct ipv6hdr *ip6h = ipv6_hdr(syn);
c681edae3   Ard Biesheuvel   net: ipv4: move t...
124

438ac8800   Ard Biesheuvel   net: fastopen: ro...
125
126
127
128
  		foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr,
  					  sizeof(ip6h->saddr) +
  					  sizeof(ip6h->daddr),
  					  key));
104671636   Jerry Chu   tcp: TCP Fast Ope...
129
  		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
483642e5e   Christoph Paasch   tcp: introduce __...
130
  		return true;
104671636   Jerry Chu   tcp: TCP Fast Ope...
131
  	}
483642e5e   Christoph Paasch   tcp: introduce __...
132
133
  #endif
  	return false;
3a19ce0ee   Daniel Lee   tcp: IPv6 support...
134
  }
c681edae3   Ard Biesheuvel   net: ipv4: move t...
135
136
  /* Generate the fastopen cookie by applying SipHash to both the source and
   * destination addresses.
3a19ce0ee   Daniel Lee   tcp: IPv6 support...
137
   */
9092a76d3   Jason Baron   tcp: add backup T...
138
  static void tcp_fastopen_cookie_gen(struct sock *sk,
437138485   Haishuang Yan   ipv4: Namespaceif...
139
  				    struct request_sock *req,
3a19ce0ee   Daniel Lee   tcp: IPv6 support...
140
141
142
  				    struct sk_buff *syn,
  				    struct tcp_fastopen_cookie *foc)
  {
483642e5e   Christoph Paasch   tcp: introduce __...
143
  	struct tcp_fastopen_context *ctx;
3a19ce0ee   Daniel Lee   tcp: IPv6 support...
144

483642e5e   Christoph Paasch   tcp: introduce __...
145
  	rcu_read_lock();
9092a76d3   Jason Baron   tcp: add backup T...
146
  	ctx = tcp_fastopen_get_ctx(sk);
483642e5e   Christoph Paasch   tcp: introduce __...
147
  	if (ctx)
438ac8800   Ard Biesheuvel   net: fastopen: ro...
148
  		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[0], foc);
483642e5e   Christoph Paasch   tcp: introduce __...
149
  	rcu_read_unlock();
104671636   Jerry Chu   tcp: TCP Fast Ope...
150
  }
5b7ed0892   Yuchung Cheng   tcp: move fastope...
151

61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
  /* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
   * queue this additional data / FIN.
   */
  void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  
  	if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
  		return;
  
  	skb = skb_clone(skb, GFP_ATOMIC);
  	if (!skb)
  		return;
  
  	skb_dst_drop(skb);
a44d6eacd   Martin KaFai Lau   tcp: Add RFC4898 ...
167
168
169
170
171
172
173
174
  	/* segs_in has been initialized to 1 in tcp_create_openreq_child().
  	 * Hence, reset segs_in to 0 before calling tcp_segs_in()
  	 * to avoid double counting.  Also, tcp_segs_in() expects
  	 * skb->len to include the tcp_hdrlen.  Hence, it should
  	 * be called before __skb_pull().
  	 */
  	tp->segs_in = 0;
  	tcp_segs_in(tp, skb);
61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
175
  	__skb_pull(skb, tcp_hdrlen(skb));
76061f631   Eric Dumazet   tcp: fastopen: av...
176
  	sk_forced_mem_schedule(sk, skb->truesize);
61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
177
  	skb_set_owner_r(skb, sk);
9d691539e   Eric Dumazet   tcp: do not enque...
178
179
  	TCP_SKB_CB(skb)->seq++;
  	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
180
181
182
183
184
185
186
187
  	tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
  	__skb_queue_tail(&sk->sk_receive_queue, skb);
  	tp->syn_data_acked = 1;
  
  	/* u64_stats_update_begin(&tp->syncp) not needed here,
  	 * as we certainly are not changing upper 32bit value (0)
  	 */
  	tp->bytes_received = skb->len;
e3e17b773   Eric Dumazet   tcp: fastopen: ca...
188
189
190
  
  	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
  		tcp_fin(sk);
61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
191
  }
9092a76d3   Jason Baron   tcp: add backup T...
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  /* returns 0 - no key match, 1 for primary, 2 for backup */
  static int tcp_fastopen_cookie_gen_check(struct sock *sk,
  					 struct request_sock *req,
  					 struct sk_buff *syn,
  					 struct tcp_fastopen_cookie *orig,
  					 struct tcp_fastopen_cookie *valid_foc)
  {
  	struct tcp_fastopen_cookie search_foc = { .len = -1 };
  	struct tcp_fastopen_cookie *foc = valid_foc;
  	struct tcp_fastopen_context *ctx;
  	int i, ret = 0;
  
  	rcu_read_lock();
  	ctx = tcp_fastopen_get_ctx(sk);
  	if (!ctx)
  		goto out;
  	for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
438ac8800   Ard Biesheuvel   net: fastopen: ro...
209
  		__tcp_fastopen_cookie_gen_cipher(req, syn, &ctx->key[i], foc);
9092a76d3   Jason Baron   tcp: add backup T...
210
211
212
213
214
215
216
217
218
219
  		if (tcp_fastopen_cookie_match(foc, orig)) {
  			ret = i + 1;
  			goto out;
  		}
  		foc = &search_foc;
  	}
  out:
  	rcu_read_unlock();
  	return ret;
  }
7c85af881   Eric Dumazet   tcp: avoid reorde...
220
221
  static struct sock *tcp_fastopen_create_child(struct sock *sk,
  					      struct sk_buff *skb,
7c85af881   Eric Dumazet   tcp: avoid reorde...
222
  					      struct request_sock *req)
5b7ed0892   Yuchung Cheng   tcp: move fastope...
223
  {
17846376f   Dave Jones   tcp: remove unnec...
224
  	struct tcp_sock *tp;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
225
  	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
226
  	struct sock *child;
5e0724d02   Eric Dumazet   tcp/dccp: fix has...
227
  	bool own_req;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
228

5e0724d02   Eric Dumazet   tcp/dccp: fix has...
229
230
  	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
  							 NULL, &own_req);
51456b291   Ian Morris   ipv4: coding styl...
231
  	if (!child)
7c85af881   Eric Dumazet   tcp: avoid reorde...
232
  		return NULL;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
233

0536fcc03   Eric Dumazet   tcp: prepare fast...
234
235
236
  	spin_lock(&queue->fastopenq.lock);
  	queue->fastopenq.qlen++;
  	spin_unlock(&queue->fastopenq.lock);
5b7ed0892   Yuchung Cheng   tcp: move fastope...
237
238
239
240
241
242
  
  	/* Initialize the child socket. Have to fix some values to take
  	 * into account the child is a Fast Open socket and is created
  	 * only out of the bits carried in the SYN packet.
  	 */
  	tp = tcp_sk(child);
d983ea6f1   Eric Dumazet   tcp: add rcu prot...
243
  	rcu_assign_pointer(tp->fastopen_rsk, req);
9439ce00f   Eric Dumazet   tcp: rename struc...
244
  	tcp_rsk(req)->tfo_listener = true;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
245
246
247
248
249
  
  	/* RFC1323: The window in SYN & SYN/ACK segments is never
  	 * scaled. So correct it appropriately.
  	 */
  	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
0dbd7ff3a   Alexey Kodanev   tcp: initialize m...
250
  	tp->max_window = tp->snd_wnd;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
251
252
  
  	/* Activate the retrans timer so that SYNACK can be retransmitted.
ca6fb0651   Eric Dumazet   tcp: attach SYNAC...
253
  	 * The request socket is not added to the ehash
5b7ed0892   Yuchung Cheng   tcp: move fastope...
254
255
256
257
  	 * because it's been added to the accept queue directly.
  	 */
  	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
  				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
41c6d650f   Reshetova, Elena   net: convert sock...
258
  	refcount_set(&req->rsk_refcnt, 2);
5b7ed0892   Yuchung Cheng   tcp: move fastope...
259
260
  
  	/* Now finish processing the fastopen child socket. */
27204aaa9   Wei Wang   tcp: uniform the ...
261
  	tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
5b7ed0892   Yuchung Cheng   tcp: move fastope...
262

61d2bcae9   Eric Dumazet   tcp: fastopen: ac...
263
264
265
266
267
  	tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
  
  	tcp_fastopen_add_skb(child, skb);
  
  	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
28b346cbc   Neal Cardwell   tcp: fastopen: fi...
268
  	tp->rcv_wup = tp->rcv_nxt;
7656d842d   Eric Dumazet   tcp: fix fastopen...
269
270
  	/* tcp_conn_request() is sending the SYNACK,
  	 * and queues the child into listener accept queue.
7c85af881   Eric Dumazet   tcp: avoid reorde...
271
  	 */
7c85af881   Eric Dumazet   tcp: avoid reorde...
272
  	return child;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
273
  }
5b7ed0892   Yuchung Cheng   tcp: move fastope...
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
  
  static bool tcp_fastopen_queue_check(struct sock *sk)
  {
  	struct fastopen_queue *fastopenq;
  
  	/* Make sure the listener has enabled fastopen, and we don't
  	 * exceed the max # of pending TFO requests allowed before trying
  	 * to validating the cookie in order to avoid burning CPU cycles
  	 * unnecessarily.
  	 *
  	 * XXX (TFO) - The implication of checking the max_qlen before
  	 * processing a cookie request is that clients can't differentiate
  	 * between qlen overflow causing Fast Open to be disabled
  	 * temporarily vs a server not supporting Fast Open at all.
  	 */
0536fcc03   Eric Dumazet   tcp: prepare fast...
289
290
  	fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
  	if (fastopenq->max_qlen == 0)
5b7ed0892   Yuchung Cheng   tcp: move fastope...
291
292
293
294
295
296
  		return false;
  
  	if (fastopenq->qlen >= fastopenq->max_qlen) {
  		struct request_sock *req1;
  		spin_lock(&fastopenq->lock);
  		req1 = fastopenq->rskq_rst_head;
fa76ce732   Eric Dumazet   inet: get rid of ...
297
  		if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
02a1d6e7a   Eric Dumazet   net: rename NET_{...
298
299
  			__NET_INC_STATS(sock_net(sk),
  					LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
c10d9310e   Eric Dumazet   tcp: do not assum...
300
  			spin_unlock(&fastopenq->lock);
5b7ed0892   Yuchung Cheng   tcp: move fastope...
301
302
303
304
305
  			return false;
  		}
  		fastopenq->rskq_rst_head = req1->dl_next;
  		fastopenq->qlen--;
  		spin_unlock(&fastopenq->lock);
13854e5a6   Eric Dumazet   inet: add proper ...
306
  		reqsk_put(req1);
5b7ed0892   Yuchung Cheng   tcp: move fastope...
307
308
309
  	}
  	return true;
  }
71c02379c   Christoph Paasch   tcp: Configure TF...
310
311
312
313
314
315
316
317
  static bool tcp_fastopen_no_cookie(const struct sock *sk,
  				   const struct dst_entry *dst,
  				   int flag)
  {
  	return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
  	       tcp_sk(sk)->fastopen_no_cookie ||
  	       (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
  }
89278c9dc   Yuchung Cheng   tcp: simplify fas...
318
319
320
321
  /* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
   * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
   * cookie request (foc->len == 0).
   */
7c85af881   Eric Dumazet   tcp: avoid reorde...
322
323
  struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
  			      struct request_sock *req,
71c02379c   Christoph Paasch   tcp: Configure TF...
324
325
  			      struct tcp_fastopen_cookie *foc,
  			      const struct dst_entry *dst)
5b7ed0892   Yuchung Cheng   tcp: move fastope...
326
  {
89278c9dc   Yuchung Cheng   tcp: simplify fas...
327
  	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
e1cfcbe82   Haishuang Yan   ipv4: Namespaceif...
328
329
  	int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
  	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
7c85af881   Eric Dumazet   tcp: avoid reorde...
330
  	struct sock *child;
9092a76d3   Jason Baron   tcp: add backup T...
331
  	int ret = 0;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
332

531c94a96   Yuchung Cheng   tcp: don't includ...
333
  	if (foc->len == 0) /* Client requests a cookie */
c10d9310e   Eric Dumazet   tcp: do not assum...
334
  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
531c94a96   Yuchung Cheng   tcp: don't includ...
335

e1cfcbe82   Haishuang Yan   ipv4: Namespaceif...
336
  	if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
89278c9dc   Yuchung Cheng   tcp: simplify fas...
337
338
339
  	      (syn_data || foc->len >= 0) &&
  	      tcp_fastopen_queue_check(sk))) {
  		foc->len = -1;
7c85af881   Eric Dumazet   tcp: avoid reorde...
340
  		return NULL;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
341
  	}
71c02379c   Christoph Paasch   tcp: Configure TF...
342
343
  	if (syn_data &&
  	    tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
89278c9dc   Yuchung Cheng   tcp: simplify fas...
344
  		goto fastopen;
9092a76d3   Jason Baron   tcp: add backup T...
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
  	if (foc->len == 0) {
  		/* Client requests a cookie. */
  		tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
  	} else if (foc->len > 0) {
  		ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
  						    &valid_foc);
  		if (!ret) {
  			NET_INC_STATS(sock_net(sk),
  				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
  		} else {
  			/* Cookie is valid. Create a (full) child socket to
  			 * accept the data in SYN before returning a SYN-ACK to
  			 * ack the data. If we fail to create the socket, fall
  			 * back and ack the ISN only but includes the same
  			 * cookie.
  			 *
  			 * Note: Data-less SYN with valid cookie is allowed to
  			 * send data in SYN_RECV state.
  			 */
89278c9dc   Yuchung Cheng   tcp: simplify fas...
364
  fastopen:
9092a76d3   Jason Baron   tcp: add backup T...
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  			child = tcp_fastopen_create_child(sk, skb, req);
  			if (child) {
  				if (ret == 2) {
  					valid_foc.exp = foc->exp;
  					*foc = valid_foc;
  					NET_INC_STATS(sock_net(sk),
  						      LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
  				} else {
  					foc->len = -1;
  				}
  				NET_INC_STATS(sock_net(sk),
  					      LINUX_MIB_TCPFASTOPENPASSIVE);
  				return child;
  			}
c10d9310e   Eric Dumazet   tcp: do not assum...
379
  			NET_INC_STATS(sock_net(sk),
9092a76d3   Jason Baron   tcp: add backup T...
380
  				      LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
843f4a55e   Yuchung Cheng   tcp: use tcp_v4_s...
381
  		}
9092a76d3   Jason Baron   tcp: add backup T...
382
  	}
7f9b838b7   Daniel Lee   tcp: RFC7413 opti...
383
  	valid_foc.exp = foc->exp;
89278c9dc   Yuchung Cheng   tcp: simplify fas...
384
  	*foc = valid_foc;
7c85af881   Eric Dumazet   tcp: avoid reorde...
385
  	return NULL;
5b7ed0892   Yuchung Cheng   tcp: move fastope...
386
  }
065263f40   Wei Wang   net/tcp-fastopen:...
387
388
389
390
  
  bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
  			       struct tcp_fastopen_cookie *cookie)
  {
71c02379c   Christoph Paasch   tcp: Configure TF...
391
  	const struct dst_entry *dst;
065263f40   Wei Wang   net/tcp-fastopen:...
392

7268586ba   Yuchung Cheng   tcp: pause Fast O...
393
  	tcp_fastopen_cache_get(sk, mss, cookie);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
394
395
396
397
398
399
  
  	/* Firewall blackhole issue check */
  	if (tcp_fastopen_active_should_disable(sk)) {
  		cookie->len = -1;
  		return false;
  	}
71c02379c   Christoph Paasch   tcp: Configure TF...
400
401
402
  	dst = __sk_dst_get(sk);
  
  	if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
065263f40   Wei Wang   net/tcp-fastopen:...
403
404
405
406
407
  		cookie->len = -1;
  		return true;
  	}
  	return cookie->len > 0;
  }
19f6d3f3c   Wei Wang   net/tcp-fastopen:...
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
  
  /* This function checks if we want to defer sending SYN until the first
   * write().  We defer under the following conditions:
   * 1. fastopen_connect sockopt is set
   * 2. we have a valid cookie
   * Return value: return true if we want to defer until application writes data
   *               return false if we want to send out SYN immediately
   */
  bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
  {
  	struct tcp_fastopen_cookie cookie = { .len = 0 };
  	struct tcp_sock *tp = tcp_sk(sk);
  	u16 mss;
  
  	if (tp->fastopen_connect && !tp->fastopen_req) {
  		if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
  			inet_sk(sk)->defer_connect = 1;
  			return true;
  		}
  
  		/* Alloc fastopen_req in order for FO option to be included
  		 * in SYN
  		 */
  		tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
  					   sk->sk_allocation);
  		if (tp->fastopen_req)
  			tp->fastopen_req->cookie = cookie;
  		else
  			*err = -ENOBUFS;
  	}
  	return false;
  }
  EXPORT_SYMBOL(tcp_fastopen_defer_connect);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
441
442
443
444
445
446
447
448
449
  
  /*
   * The following code block is to deal with middle box issues with TFO:
   * Middlebox firewall issues can potentially cause server's data being
   * blackholed after a successful 3WHS using TFO.
   * The proposed solution is to disable active TFO globally under the
   * following circumstances:
   *   1. client side TFO socket receives out of order FIN
   *   2. client side TFO socket receives out of order RST
7268586ba   Yuchung Cheng   tcp: pause Fast O...
450
451
   *   3. client side TFO socket has timed out three times consecutively during
   *      or after handshake
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
452
453
454
455
456
   * We disable active side TFO globally for 1hr at first. Then if it
   * happens again, we disable it for 2h, then 4h, 8h, ...
   * And we reset the timeout back to 1hr when we see a successful active
   * TFO connection with data exchanges.
   */
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
457
458
459
  /* Disable active TFO and record current jiffies and
   * tfo_active_disable_times
   */
46c2fa398   Wei Wang   net/tcp_fastopen:...
460
  void tcp_fastopen_active_disable(struct sock *sk)
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
461
  {
3733be14a   Haishuang Yan   ipv4: Namespaceif...
462
  	struct net *net = sock_net(sk);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
463

3733be14a   Haishuang Yan   ipv4: Namespaceif...
464
465
466
  	atomic_inc(&net->ipv4.tfo_active_disable_times);
  	net->ipv4.tfo_active_disable_stamp = jiffies;
  	NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
467
468
469
470
471
472
473
474
  }
  
  /* Calculate timeout for tfo active disable
   * Return true if we are still in the active TFO disable period
   * Return false if timeout already expired and we should use active TFO
   */
  bool tcp_fastopen_active_should_disable(struct sock *sk)
  {
3733be14a   Haishuang Yan   ipv4: Namespaceif...
475
476
  	unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
  	int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
477
  	unsigned long timeout;
3733be14a   Haishuang Yan   ipv4: Namespaceif...
478
  	int multiplier;
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
479
480
481
482
483
484
  
  	if (!tfo_da_times)
  		return false;
  
  	/* Limit timout to max: 2^6 * initial timeout */
  	multiplier = 1 << min(tfo_da_times - 1, 6);
3733be14a   Haishuang Yan   ipv4: Namespaceif...
485
486
  	timeout = multiplier * tfo_bh_timeout * HZ;
  	if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
  		return true;
  
  	/* Mark check bit so we can check for successful active TFO
  	 * condition and reset tfo_active_disable_times
  	 */
  	tcp_sk(sk)->syn_fastopen_ch = 1;
  	return false;
  }
  
  /* Disable active TFO if FIN is the only packet in the ofo queue
   * and no data is received.
   * Also check if we can reset tfo_active_disable_times if data is
   * received successfully on a marked active TFO sockets opened on
   * a non-loopback interface
   */
  void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
505
  	struct dst_entry *dst;
18a4c0eab   Eric Dumazet   net: add rb_to_sk...
506
  	struct sk_buff *skb;
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
507
508
509
510
511
  
  	if (!tp->syn_fastopen)
  		return;
  
  	if (!tp->data_segs_in) {
18a4c0eab   Eric Dumazet   net: add rb_to_sk...
512
513
  		skb = skb_rb_first(&tp->out_of_order_queue);
  		if (skb && !skb_rb_next(skb)) {
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
514
  			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
46c2fa398   Wei Wang   net/tcp_fastopen:...
515
  				tcp_fastopen_active_disable(sk);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
516
517
518
519
  				return;
  			}
  		}
  	} else if (tp->syn_fastopen_ch &&
3733be14a   Haishuang Yan   ipv4: Namespaceif...
520
  		   atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
521
522
  		dst = sk_dst_get(sk);
  		if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
3733be14a   Haishuang Yan   ipv4: Namespaceif...
523
  			atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
cf1ef3f07   Wei Wang   net/tcp_fastopen:...
524
525
526
  		dst_release(dst);
  	}
  }
7268586ba   Yuchung Cheng   tcp: pause Fast O...
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  
  void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired)
  {
  	u32 timeouts = inet_csk(sk)->icsk_retransmits;
  	struct tcp_sock *tp = tcp_sk(sk);
  
  	/* Broken middle-boxes may black-hole Fast Open connection during or
  	 * even after the handshake. Be extremely conservative and pause
  	 * Fast Open globally after hitting the third consecutive timeout or
  	 * exceeding the configured timeout limit.
  	 */
  	if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) &&
  	    (timeouts == 2 || (timeouts < 2 && expired))) {
  		tcp_fastopen_active_disable(sk);
  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
  	}
  }