Blame view
net/rds/tcp.c
20.3 KB
70041088e RDS: Add TCP tran... |
1 |
/* |
1e2b44e78 rds: Enable RDS I... |
2 |
* Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
70041088e RDS: Add TCP tran... |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
* * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> |
5a0e3ad6a include cleanup: ... |
34 |
#include <linux/slab.h> |
70041088e RDS: Add TCP tran... |
35 |
#include <linux/in.h> |
3a9a231d9 net: Fix files ex... |
36 |
#include <linux/module.h> |
70041088e RDS: Add TCP tran... |
37 |
#include <net/tcp.h> |
467fa1535 RDS-TCP: Support ... |
38 39 |
#include <net/net_namespace.h> #include <net/netns/generic.h> |
eee2fa6ab rds: Changing IP ... |
40 |
#include <net/addrconf.h> |
70041088e RDS: Add TCP tran... |
41 42 43 44 45 46 47 |
#include "rds.h" #include "tcp.h" /* only for info exporting */ static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); static LIST_HEAD(rds_tcp_tc_list); |
1e2b44e78 rds: Enable RDS I... |
48 49 50 51 |
/* rds_tcp_tc_count counts only IPv4 connections. * rds6_tcp_tc_count counts both IPv4 and IPv6 connections. */ |
ff51bf841 rds: make local f... |
52 |
static unsigned int rds_tcp_tc_count; |
e65d4d963 rds: Remove IPv6 ... |
53 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
54 |
static unsigned int rds6_tcp_tc_count; |
e65d4d963 rds: Remove IPv6 ... |
55 |
#endif |
70041088e RDS: Add TCP tran... |
56 57 58 59 |
/* Track rds_tcp_connection structs so they can be cleaned up */ static DEFINE_SPINLOCK(rds_tcp_conn_lock); static LIST_HEAD(rds_tcp_conn_list); |
ebeeb1ad9 rds: tcp: use rds... |
60 |
static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); |
70041088e RDS: Add TCP tran... |
61 62 |
static struct kmem_cache *rds_tcp_conn_slab; |
c6a58ffed RDS: TCP: Add sys... |
63 |
static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, |
32927393d sysctl: pass kern... |
64 |
void *buffer, size_t *lenp, loff_t *fpos); |
c6a58ffed RDS: TCP: Add sys... |
65 |
|
af73e72dc RDS: TCP: Fix non... |
66 67 |
static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; |
c6a58ffed RDS: TCP: Add sys... |
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
static struct ctl_table rds_tcp_sysctl_table[] = { #define RDS_TCP_SNDBUF 0 { .procname = "rds_tcp_sndbuf", /* data is per-net pointer */ .maxlen = sizeof(int), .mode = 0644, .proc_handler = rds_tcp_skbuf_handler, .extra1 = &rds_tcp_min_sndbuf, }, #define RDS_TCP_RCVBUF 1 { .procname = "rds_tcp_rcvbuf", /* data is per-net pointer */ .maxlen = sizeof(int), .mode = 0644, .proc_handler = rds_tcp_skbuf_handler, .extra1 = &rds_tcp_min_rcvbuf, }, { } }; |
b589513e6 rds: tcp: compute... |
90 |
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) |
70041088e RDS: Add TCP tran... |
91 |
{ |
b589513e6 rds: tcp: compute... |
92 93 |
/* seq# of the last byte of data in tcp send buffer */ return tcp_sk(tc->t_sock->sk)->write_seq; |
70041088e RDS: Add TCP tran... |
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
} u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) { return tcp_sk(tc->t_sock->sk)->snd_una; } void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc) { rdsdebug("restoring sock %p callbacks from tc %p ", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_del_init(&tc->t_list_item); |
e65d4d963 rds: Remove IPv6 ... |
111 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
112 |
rds6_tcp_tc_count--; |
e65d4d963 rds: Remove IPv6 ... |
113 |
#endif |
1e2b44e78 rds: Enable RDS I... |
114 115 |
if (!tc->t_cpath->cp_conn->c_isv6) rds_tcp_tc_count--; |
70041088e RDS: Add TCP tran... |
116 117 118 119 120 121 122 123 124 125 126 127 128 |
spin_unlock(&rds_tcp_tc_list_lock); tc->t_sock = NULL; sock->sk->sk_write_space = tc->t_orig_write_space; sock->sk->sk_data_ready = tc->t_orig_data_ready; sock->sk->sk_state_change = tc->t_orig_state_change; sock->sk->sk_user_data = NULL; write_unlock_bh(&sock->sk->sk_callback_lock); } /* |
335b48d98 RDS: TCP: Add/use... |
129 130 131 132 133 134 135 136 137 |
* rds_tcp_reset_callbacks() switches the to the new sock and * returns the existing tc->t_sock. * * The only functions that set tc->t_sock are rds_tcp_set_callbacks * and rds_tcp_reset_callbacks. Send and receive trust that * it is set. The absence of RDS_CONN_UP bit protects those paths * from being called while it isn't set. */ void rds_tcp_reset_callbacks(struct socket *sock, |
ea3b1ea53 RDS: TCP: make ->... |
138 |
struct rds_conn_path *cp) |
335b48d98 RDS: TCP: Add/use... |
139 |
{ |
ea3b1ea53 RDS: TCP: make ->... |
140 |
struct rds_tcp_connection *tc = cp->cp_transport_data; |
335b48d98 RDS: TCP: Add/use... |
141 142 143 144 145 146 147 148 149 |
struct socket *osock = tc->t_sock; if (!osock) goto newsock; /* Need to resolve a duelling SYN between peers. * We have an outstanding SYN to this peer, which may * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting |
ea3b1ea53 RDS: TCP: make ->... |
150 151 |
* cp_transport_data. We quiesce these threads by setting * cp_state to something other than RDS_CONN_UP, and then |
335b48d98 RDS: TCP: Add/use... |
152 153 154 |
* waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). |
9c79440e2 RDS: TCP: fix rac... |
155 156 157 158 159 160 161 162 163 164 |
* * However an incoming syn-ack at this point would end up * marking the conn as RDS_CONN_UP, and would again permit * rds_send_xmi() threads through, so ideally we would * synchronize on RDS_CONN_UP after lock_sock(), but cannot * do that: waiting on !RDS_IN_XMIT after lock_sock() may * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT * would not get set. As a result, we set c_state to * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change * cannot mark rds_conn_path_up() in the window before lock_sock() |
335b48d98 RDS: TCP: Add/use... |
165 |
*/ |
ea3b1ea53 RDS: TCP: make ->... |
166 167 |
atomic_set(&cp->cp_state, RDS_CONN_RESETTING); wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); |
335b48d98 RDS: TCP: Add/use... |
168 169 |
lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ |
ac3615e7f RDS: TCP: Reduce ... |
170 171 |
cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); |
335b48d98 RDS: TCP: Add/use... |
172 173 174 175 176 177 |
if (tc->t_tinc) { rds_inc_put(&tc->t_tinc->ti_inc); tc->t_tinc = NULL; } tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_data_rem = 0; |
ac3615e7f RDS: TCP: Reduce ... |
178 |
rds_tcp_restore_callbacks(osock, tc); |
335b48d98 RDS: TCP: Add/use... |
179 180 181 |
release_sock(osock->sk); sock_release(osock); newsock: |
ea3b1ea53 RDS: TCP: make ->... |
182 |
rds_send_path_reset(cp); |
335b48d98 RDS: TCP: Add/use... |
183 |
lock_sock(sock->sk); |
ac3615e7f RDS: TCP: Reduce ... |
184 |
rds_tcp_set_callbacks(sock, cp); |
335b48d98 RDS: TCP: Add/use... |
185 186 187 188 189 190 |
release_sock(sock->sk); } /* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments * above rds_tcp_reset_callbacks for notes about synchronization * with data path |
70041088e RDS: Add TCP tran... |
191 |
*/ |
ea3b1ea53 RDS: TCP: make ->... |
192 |
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) |
70041088e RDS: Add TCP tran... |
193 |
{ |
ea3b1ea53 RDS: TCP: make ->... |
194 |
struct rds_tcp_connection *tc = cp->cp_transport_data; |
70041088e RDS: Add TCP tran... |
195 196 197 198 199 200 201 202 |
rdsdebug("setting sock %p callbacks to tc %p ", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); /* done under the callback_lock to serialize with write_space */ spin_lock(&rds_tcp_tc_list_lock); list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); |
e65d4d963 rds: Remove IPv6 ... |
203 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
204 |
rds6_tcp_tc_count++; |
e65d4d963 rds: Remove IPv6 ... |
205 |
#endif |
1e2b44e78 rds: Enable RDS I... |
206 207 |
if (!tc->t_cpath->cp_conn->c_isv6) rds_tcp_tc_count++; |
70041088e RDS: Add TCP tran... |
208 209 210 211 212 213 214 |
spin_unlock(&rds_tcp_tc_list_lock); /* accepted sockets need our listen data ready undone */ if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; |
ea3b1ea53 RDS: TCP: make ->... |
215 |
tc->t_cpath = cp; |
70041088e RDS: Add TCP tran... |
216 217 218 |
tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; |
ea3b1ea53 RDS: TCP: make ->... |
219 |
sock->sk->sk_user_data = cp; |
70041088e RDS: Add TCP tran... |
220 221 222 223 224 225 |
sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; write_unlock_bh(&sock->sk->sk_callback_lock); } |
1e2b44e78 rds: Enable RDS I... |
226 227 228 |
/* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4 * connections for backward compatibility. */ |
1ac507d4f RDS: TCP: report ... |
229 |
static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, |
70041088e RDS: Add TCP tran... |
230 231 232 233 234 235 |
struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_info_tcp_socket tsinfo; struct rds_tcp_connection *tc; unsigned long flags; |
70041088e RDS: Add TCP tran... |
236 237 238 239 240 241 242 |
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); if (len / sizeof(tsinfo) < rds_tcp_tc_count) goto out; list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { |
1e2b44e78 rds: Enable RDS I... |
243 |
struct inet_sock *inet = inet_sk(tc->t_sock->sk); |
70041088e RDS: Add TCP tran... |
244 |
|
1e2b44e78 rds: Enable RDS I... |
245 246 247 248 249 250 251 |
if (tc->t_cpath->cp_conn->c_isv6) continue; tsinfo.local_addr = inet->inet_saddr; tsinfo.local_port = inet->inet_sport; tsinfo.peer_addr = inet->inet_daddr; tsinfo.peer_port = inet->inet_dport; |
70041088e RDS: Add TCP tran... |
252 253 254 255 256 257 |
tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.data_rem = tc->t_tinc_data_rem; tsinfo.last_sent_nxt = tc->t_last_sent_nxt; tsinfo.last_expected_una = tc->t_last_expected_una; tsinfo.last_seen_una = tc->t_last_seen_una; |
3eb450367 rds: add type of ... |
258 |
tsinfo.tos = tc->t_cpath->cp_conn->c_tos; |
70041088e RDS: Add TCP tran... |
259 260 261 262 263 264 265 266 267 268 |
rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); } out: lens->nr = rds_tcp_tc_count; lens->each = sizeof(tsinfo); spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } |
e65d4d963 rds: Remove IPv6 ... |
269 |
#if IS_ENABLED(CONFIG_IPV6) |
b7ff8b103 rds: Extend RDS A... |
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped * address. */ static void rds6_tcp_tc_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds6_info_tcp_socket tsinfo6; struct rds_tcp_connection *tc; unsigned long flags; spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); if (len / sizeof(tsinfo6) < rds6_tcp_tc_count) goto out; list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { struct sock *sk = tc->t_sock->sk; struct inet_sock *inet = inet_sk(sk); tsinfo6.local_addr = sk->sk_v6_rcv_saddr; tsinfo6.local_port = inet->inet_sport; tsinfo6.peer_addr = sk->sk_v6_daddr; tsinfo6.peer_port = inet->inet_dport; tsinfo6.hdr_rem = tc->t_tinc_hdr_rem; tsinfo6.data_rem = tc->t_tinc_data_rem; tsinfo6.last_sent_nxt = tc->t_last_sent_nxt; tsinfo6.last_expected_una = tc->t_last_expected_una; tsinfo6.last_seen_una = tc->t_last_seen_una; rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6)); } out: lens->nr = rds6_tcp_tc_count; lens->each = sizeof(tsinfo6); spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); } |
e65d4d963 rds: Remove IPv6 ... |
311 |
#endif |
b7ff8b103 rds: Extend RDS A... |
312 |
|
eee2fa6ab rds: Changing IP ... |
313 314 |
static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, __u32 scope_id) |
70041088e RDS: Add TCP tran... |
315 |
{ |
eee2fa6ab rds: Changing IP ... |
316 |
struct net_device *dev = NULL; |
e65d4d963 rds: Remove IPv6 ... |
317 |
#if IS_ENABLED(CONFIG_IPV6) |
eee2fa6ab rds: Changing IP ... |
318 |
int ret; |
e65d4d963 rds: Remove IPv6 ... |
319 |
#endif |
eee2fa6ab rds: Changing IP ... |
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
if (ipv6_addr_v4mapped(addr)) { if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL) return 0; return -EADDRNOTAVAIL; } /* If the scope_id is specified, check only those addresses * hosted on the specified interface. */ if (scope_id != 0) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, scope_id); /* scope_id is not valid... */ if (!dev) { rcu_read_unlock(); return -EADDRNOTAVAIL; } rcu_read_unlock(); } |
e65d4d963 rds: Remove IPv6 ... |
340 |
#if IS_ENABLED(CONFIG_IPV6) |
eee2fa6ab rds: Changing IP ... |
341 342 |
ret = ipv6_chk_addr(net, addr, dev, 0); if (ret) |
70041088e RDS: Add TCP tran... |
343 |
return 0; |
e65d4d963 rds: Remove IPv6 ... |
344 |
#endif |
70041088e RDS: Add TCP tran... |
345 346 |
return -EADDRNOTAVAIL; } |
66261da16 rds: tcp: cleanup... |
347 348 349 |
static void rds_tcp_conn_free(void *arg) { struct rds_tcp_connection *tc = arg; |
53d0e83f9 rds: tcp: must us... |
350 |
unsigned long flags; |
66261da16 rds: tcp: cleanup... |
351 352 353 |
rdsdebug("freeing tc %p ", tc); |
53d0e83f9 rds: tcp: must us... |
354 |
spin_lock_irqsave(&rds_tcp_conn_lock, flags); |
66261da16 rds: tcp: cleanup... |
355 356 |
if (!tc->t_tcp_node_detached) list_del(&tc->t_tcp_node); |
53d0e83f9 rds: tcp: must us... |
357 |
spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); |
66261da16 rds: tcp: cleanup... |
358 359 360 |
kmem_cache_free(rds_tcp_conn_slab, tc); } |
70041088e RDS: Add TCP tran... |
361 362 363 |
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; |
66261da16 rds: tcp: cleanup... |
364 365 |
int i, j; int ret = 0; |
70041088e RDS: Add TCP tran... |
366 |
|
02105b2cc RDS: TCP: Make rd... |
367 368 |
for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); |
66261da16 rds: tcp: cleanup... |
369 370 |
if (!tc) { ret = -ENOMEM; |
ebeeb1ad9 rds: tcp: use rds... |
371 |
goto fail; |
66261da16 rds: tcp: cleanup... |
372 |
} |
02105b2cc RDS: TCP: Make rd... |
373 374 375 376 377 |
mutex_init(&tc->t_conn_path_lock); tc->t_sock = NULL; tc->t_tinc = NULL; tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_data_rem = 0; |
70041088e RDS: Add TCP tran... |
378 |
|
02105b2cc RDS: TCP: Make rd... |
379 380 |
conn->c_path[i].cp_transport_data = tc; tc->t_cpath = &conn->c_path[i]; |
ebeeb1ad9 rds: tcp: use rds... |
381 |
tc->t_tcp_node_detached = true; |
70041088e RDS: Add TCP tran... |
382 |
|
02105b2cc RDS: TCP: Make rd... |
383 384 385 386 |
rdsdebug("rds_conn_path [%d] tc %p ", i, conn->c_path[i].cp_transport_data); } |
53d0e83f9 rds: tcp: must us... |
387 |
spin_lock_irq(&rds_tcp_conn_lock); |
ebeeb1ad9 rds: tcp: use rds... |
388 389 390 391 392 |
for (i = 0; i < RDS_MPATH_WORKERS; i++) { tc = conn->c_path[i].cp_transport_data; tc->t_tcp_node_detached = false; list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); } |
53d0e83f9 rds: tcp: must us... |
393 |
spin_unlock_irq(&rds_tcp_conn_lock); |
ebeeb1ad9 rds: tcp: use rds... |
394 |
fail: |
66261da16 rds: tcp: cleanup... |
395 396 397 398 399 |
if (ret) { for (j = 0; j < i; j++) rds_tcp_conn_free(conn->c_path[j].cp_transport_data); } return ret; |
70041088e RDS: Add TCP tran... |
400 |
} |
afb4164d9 RDS: TCP: Refacto... |
401 402 403 404 405 406 407 408 409 410 |
static bool list_has_conn(struct list_head *list, struct rds_connection *conn) { struct rds_tcp_connection *tc, *_tc; list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { if (tc->t_cpath->cp_conn == conn) return true; } return false; } |
ebeeb1ad9 rds: tcp: use rds... |
411 412 413 414 415 416 417 418 419 |
static void rds_tcp_set_unloading(void) { atomic_set(&rds_tcp_unloading, 1); } static bool rds_tcp_is_unloading(struct rds_connection *conn) { return atomic_read(&rds_tcp_unloading) != 0; } |
70041088e RDS: Add TCP tran... |
420 421 422 423 424 425 426 |
static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&rds_tcp_conn_lock); |
afb4164d9 RDS: TCP: Refacto... |
427 428 429 430 |
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) list_move_tail(&tc->t_tcp_node, &tmp_list); } |
70041088e RDS: Add TCP tran... |
431 |
spin_unlock_irq(&rds_tcp_conn_lock); |
26e4e6bb6 RDS: TCP: Remove ... |
432 |
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) |
02105b2cc RDS: TCP: Make rd... |
433 |
rds_conn_destroy(tc->t_cpath->cp_conn); |
70041088e RDS: Add TCP tran... |
434 |
} |
467fa1535 RDS-TCP: Support ... |
435 |
static void rds_tcp_exit(void); |
70041088e RDS: Add TCP tran... |
436 |
|
56dc8bce9 rds: add transpor... |
437 438 439 440 441 |
static u8 rds_tcp_get_tos_map(u8 tos) { /* all user tos mapped to default 0 for TCP transport */ return 0; } |
70041088e RDS: Add TCP tran... |
442 443 |
struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, |
226f7a7d9 RDS: Rework path ... |
444 445 |
.xmit_path_prepare = rds_tcp_xmit_path_prepare, .xmit_path_complete = rds_tcp_xmit_path_complete, |
70041088e RDS: Add TCP tran... |
446 |
.xmit = rds_tcp_xmit, |
2da43c4a1 RDS: TCP: make re... |
447 |
.recv_path = rds_tcp_recv_path, |
70041088e RDS: Add TCP tran... |
448 449 |
.conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, |
b04e8554f RDS: TCP: Hooks t... |
450 |
.conn_path_connect = rds_tcp_conn_path_connect, |
226f7a7d9 RDS: Rework path ... |
451 |
.conn_path_shutdown = rds_tcp_conn_path_shutdown, |
70041088e RDS: Add TCP tran... |
452 |
.inc_copy_to_user = rds_tcp_inc_copy_to_user, |
70041088e RDS: Add TCP tran... |
453 454 455 |
.inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, .exit = rds_tcp_exit, |
56dc8bce9 rds: add transpor... |
456 |
.get_tos_map = rds_tcp_get_tos_map, |
70041088e RDS: Add TCP tran... |
457 458 |
.t_owner = THIS_MODULE, .t_name = "tcp", |
335776bd6 RDS: Track transp... |
459 |
.t_type = RDS_TRANS_TCP, |
70041088e RDS: Add TCP tran... |
460 |
.t_prefer_loopback = 1, |
5916e2c15 RDS: TCP: Enable ... |
461 |
.t_mp_capable = 1, |
ebeeb1ad9 rds: tcp: use rds... |
462 |
.t_unloading = rds_tcp_is_unloading, |
70041088e RDS: Add TCP tran... |
463 |
}; |
c7d03a00b netns: make struc... |
464 |
static unsigned int rds_tcp_netid; |
467fa1535 RDS-TCP: Support ... |
465 466 467 468 469 |
/* per-network namespace private data for this module */ struct rds_tcp_net { struct socket *rds_tcp_listen_sock; struct work_struct rds_tcp_accept_w; |
c6a58ffed RDS: TCP: Add sys... |
470 471 472 473 |
struct ctl_table_header *rds_tcp_sysctl; struct ctl_table *ctl_table; int sndbuf_size; int rcvbuf_size; |
467fa1535 RDS-TCP: Support ... |
474 |
}; |
c6a58ffed RDS: TCP: Add sys... |
475 476 477 478 479 480 481 482 |
/* All module specific customizations to the RDS-TCP socket should be done in * rds_tcp_tune() and applied after socket creation. */ void rds_tcp_tune(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); |
12abc5ee7 tcp: add tcp_sock... |
483 |
tcp_sock_set_nodelay(sock->sk); |
c6a58ffed RDS: TCP: Add sys... |
484 485 486 487 488 489 490 491 492 493 494 |
lock_sock(sk); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } if (rtn->rcvbuf_size > 0) { sk->sk_sndbuf = rtn->rcvbuf_size; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); } |
467fa1535 RDS-TCP: Support ... |
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 |
static void rds_tcp_accept_worker(struct work_struct *work) { struct rds_tcp_net *rtn = container_of(work, struct rds_tcp_net, rds_tcp_accept_w); while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0) cond_resched(); } void rds_tcp_accept_work(struct sock *sk) { struct net *net = sock_net(sk); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); queue_work(rds_wq, &rtn->rds_tcp_accept_w); } static __net_init int rds_tcp_init_net(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); |
c6a58ffed RDS: TCP: Add sys... |
516 517 |
struct ctl_table *tbl; int err = 0; |
467fa1535 RDS-TCP: Support ... |
518 |
|
c6a58ffed RDS: TCP: Add sys... |
519 520 521 522 523 524 525 526 527 528 529 |
memset(rtn, 0, sizeof(*rtn)); /* {snd, rcv}buf_size default to 0, which implies we let the * stack pick the value, and permit auto-tuning of buffer size. */ if (net == &init_net) { tbl = rds_tcp_sysctl_table; } else { tbl = kmemdup(rds_tcp_sysctl_table, sizeof(rds_tcp_sysctl_table), GFP_KERNEL); if (!tbl) { |
d14a108d5 net: rds: fix spe... |
530 531 |
pr_warn("could not set allocate sysctl table "); |
c6a58ffed RDS: TCP: Add sys... |
532 533 534 535 536 537 538 539 540 541 542 543 544 |
return -ENOMEM; } rtn->ctl_table = tbl; } tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size; tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size; rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl); if (!rtn->rds_tcp_sysctl) { pr_warn("could not register sysctl "); err = -ENOMEM; goto fail; } |
e65d4d963 rds: Remove IPv6 ... |
545 546 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
547 |
rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true); |
e65d4d963 rds: Remove IPv6 ... |
548 549 550 |
#else rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); #endif |
467fa1535 RDS-TCP: Support ... |
551 |
if (!rtn->rds_tcp_listen_sock) { |
1e2b44e78 rds: Enable RDS I... |
552 553 |
pr_warn("could not set up IPv6 listen sock "); |
e65d4d963 rds: Remove IPv6 ... |
554 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
555 556 557 |
/* Try IPv4 as some systems disable IPv6 */ rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); if (!rtn->rds_tcp_listen_sock) { |
e65d4d963 rds: Remove IPv6 ... |
558 |
#endif |
1e2b44e78 rds: Enable RDS I... |
559 560 561 562 |
unregister_net_sysctl_table(rtn->rds_tcp_sysctl); rtn->rds_tcp_sysctl = NULL; err = -EAFNOSUPPORT; goto fail; |
e65d4d963 rds: Remove IPv6 ... |
563 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
564 |
} |
e65d4d963 rds: Remove IPv6 ... |
565 |
#endif |
467fa1535 RDS-TCP: Support ... |
566 567 568 |
} INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); return 0; |
c6a58ffed RDS: TCP: Add sys... |
569 570 571 572 573 |
fail: if (net != &init_net) kfree(tbl); return err; |
467fa1535 RDS-TCP: Support ... |
574 |
} |
467fa1535 RDS-TCP: Support ... |
575 576 577 |
static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; |
467fa1535 RDS-TCP: Support ... |
578 579 |
LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); |
b21dd4506 rds: tcp: Sequenc... |
580 |
struct socket *lsock = rtn->rds_tcp_listen_sock; |
467fa1535 RDS-TCP: Support ... |
581 |
|
467fa1535 RDS-TCP: Support ... |
582 |
rtn->rds_tcp_listen_sock = NULL; |
b21dd4506 rds: tcp: Sequenc... |
583 |
rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); |
53d0e83f9 rds: tcp: must us... |
584 |
spin_lock_irq(&rds_tcp_conn_lock); |
467fa1535 RDS-TCP: Support ... |
585 |
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { |
681648e67 rds: tcp: correct... |
586 |
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); |
467fa1535 RDS-TCP: Support ... |
587 |
|
cb66ddd15 net: rds: force t... |
588 |
if (net != c_net) |
467fa1535 RDS-TCP: Support ... |
589 |
continue; |
f10b4cff9 rds: tcp: atomica... |
590 |
if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) { |
afb4164d9 RDS: TCP: Refacto... |
591 |
list_move_tail(&tc->t_tcp_node, &tmp_list); |
f10b4cff9 rds: tcp: atomica... |
592 593 594 595 |
} else { list_del(&tc->t_tcp_node); tc->t_tcp_node_detached = true; } |
467fa1535 RDS-TCP: Support ... |
596 |
} |
53d0e83f9 rds: tcp: must us... |
597 |
spin_unlock_irq(&rds_tcp_conn_lock); |
2d746c93b rds: tcp: remove ... |
598 |
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) |
02105b2cc RDS: TCP: Make rd... |
599 |
rds_conn_destroy(tc->t_cpath->cp_conn); |
467fa1535 RDS-TCP: Support ... |
600 |
} |
bdf5bd7f2 rds: tcp: remove ... |
601 |
static void __net_exit rds_tcp_exit_net(struct net *net) |
a93d01f57 RDS: TCP: avoid b... |
602 603 |
{ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); |
b21dd4506 rds: tcp: Sequenc... |
604 |
|
bdf5bd7f2 rds: tcp: remove ... |
605 |
rds_tcp_kill_sock(net); |
a93d01f57 RDS: TCP: avoid b... |
606 |
|
bdf5bd7f2 rds: tcp: remove ... |
607 608 |
if (rtn->rds_tcp_sysctl) unregister_net_sysctl_table(rtn->rds_tcp_sysctl); |
4087d2bc0 net: rds: remove ... |
609 |
if (net != &init_net) |
bdf5bd7f2 rds: tcp: remove ... |
610 |
kfree(rtn->ctl_table); |
a93d01f57 RDS: TCP: avoid b... |
611 |
} |
bdf5bd7f2 rds: tcp: remove ... |
612 613 614 615 616 |
static struct pernet_operations rds_tcp_net_ops = { .init = rds_tcp_init_net, .exit = rds_tcp_exit_net, .id = &rds_tcp_netid, .size = sizeof(struct rds_tcp_net), |
bdf5bd7f2 rds: tcp: remove ... |
617 618 619 |
}; void *rds_tcp_listen_sock_def_readable(struct net *net) |
467fa1535 RDS-TCP: Support ... |
620 |
{ |
bdf5bd7f2 rds: tcp: remove ... |
621 622 |
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); struct socket *lsock = rtn->rds_tcp_listen_sock; |
467fa1535 RDS-TCP: Support ... |
623 |
|
bdf5bd7f2 rds: tcp: remove ... |
624 625 |
if (!lsock) return NULL; |
467fa1535 RDS-TCP: Support ... |
626 |
|
bdf5bd7f2 rds: tcp: remove ... |
627 |
return lsock->sk->sk_user_data; |
467fa1535 RDS-TCP: Support ... |
628 |
} |
c6a58ffed RDS: TCP: Add sys... |
629 630 631 632 633 634 635 636 |
/* when sysctl is used to modify some kernel socket parameters,this * function resets the RDS connections in that netns so that we can * restart with new parameters. The assumption is that such reset * events are few and far-between. */ static void rds_tcp_sysctl_reset(struct net *net) { struct rds_tcp_connection *tc, *_tc; |
53d0e83f9 rds: tcp: must us... |
637 |
spin_lock_irq(&rds_tcp_conn_lock); |
c6a58ffed RDS: TCP: Add sys... |
638 |
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { |
681648e67 rds: tcp: correct... |
639 |
struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); |
c6a58ffed RDS: TCP: Add sys... |
640 641 642 |
if (net != c_net || !tc->t_sock) continue; |
02105b2cc RDS: TCP: Make rd... |
643 |
/* reconnect with new parameters */ |
aed20a53a rds: cancel send/... |
644 |
rds_conn_path_drop(tc->t_cpath, false); |
c6a58ffed RDS: TCP: Add sys... |
645 |
} |
53d0e83f9 rds: tcp: must us... |
646 |
spin_unlock_irq(&rds_tcp_conn_lock); |
c6a58ffed RDS: TCP: Add sys... |
647 648 649 |
} static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, |
32927393d sysctl: pass kern... |
650 |
void *buffer, size_t *lenp, loff_t *fpos) |
c6a58ffed RDS: TCP: Add sys... |
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 |
{ struct net *net = current->nsproxy->net_ns; int err; err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos); if (err < 0) { pr_warn("Invalid input. Must be >= %d ", *(int *)(ctl->extra1)); return err; } if (write) rds_tcp_sysctl_reset(net); return 0; } |
467fa1535 RDS-TCP: Support ... |
666 667 |
static void rds_tcp_exit(void) { |
ebeeb1ad9 rds: tcp: use rds... |
668 669 |
rds_tcp_set_unloading(); synchronize_rcu(); |
467fa1535 RDS-TCP: Support ... |
670 |
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); |
e65d4d963 rds: Remove IPv6 ... |
671 |
#if IS_ENABLED(CONFIG_IPV6) |
b7ff8b103 rds: Extend RDS A... |
672 |
rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); |
e65d4d963 rds: Remove IPv6 ... |
673 |
#endif |
bdf5bd7f2 rds: tcp: remove ... |
674 |
unregister_pernet_device(&rds_tcp_net_ops); |
467fa1535 RDS-TCP: Support ... |
675 676 677 678 679 680 |
rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); rds_tcp_recv_exit(); kmem_cache_destroy(rds_tcp_conn_slab); } module_exit(rds_tcp_exit); |
ff51bf841 rds: make local f... |
681 |
static int rds_tcp_init(void) |
70041088e RDS: Add TCP tran... |
682 683 684 685 686 687 |
{ int ret; rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", sizeof(struct rds_tcp_connection), 0, 0, NULL); |
8690bfa17 RDS: cleanup: rem... |
688 |
if (!rds_tcp_conn_slab) { |
70041088e RDS: Add TCP tran... |
689 690 691 |
ret = -ENOMEM; goto out; } |
16c09b1c7 rds: tcp: Reorder... |
692 693 |
ret = rds_tcp_recv_init(); if (ret) |
3b5923f07 rds: fix memory l... |
694 |
goto out_slab; |
467fa1535 RDS-TCP: Support ... |
695 |
|
bdf5bd7f2 rds: tcp: remove ... |
696 |
ret = register_pernet_device(&rds_tcp_net_ops); |
467fa1535 RDS-TCP: Support ... |
697 |
if (ret) |
16c09b1c7 rds: tcp: Reorder... |
698 |
goto out_recv; |
467fa1535 RDS-TCP: Support ... |
699 |
|
a8d63a53b rds: remove unnec... |
700 |
rds_trans_register(&rds_tcp_transport); |
70041088e RDS: Add TCP tran... |
701 |
|
70041088e RDS: Add TCP tran... |
702 |
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); |
e65d4d963 rds: Remove IPv6 ... |
703 |
#if IS_ENABLED(CONFIG_IPV6) |
b7ff8b103 rds: Extend RDS A... |
704 |
rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); |
e65d4d963 rds: Remove IPv6 ... |
705 |
#endif |
70041088e RDS: Add TCP tran... |
706 707 |
goto out; |
16c09b1c7 rds: tcp: Reorder... |
708 709 |
out_recv: rds_tcp_recv_exit(); |
3b5923f07 rds: fix memory l... |
710 |
out_slab: |
70041088e RDS: Add TCP tran... |
711 712 713 714 715 716 717 718 719 |
kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; } module_init(rds_tcp_init); MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); MODULE_DESCRIPTION("RDS: TCP transport"); MODULE_LICENSE("Dual BSD/GPL"); |