Blame view
net/rds/tcp_connect.c
6.15 KB
70041088e
|
1 |
/* |
eee2fa6ab
|
2 |
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
70041088e
|
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
* * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/in.h> #include <net/tcp.h> #include "rds.h" #include "tcp.h" void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); |
ea3b1ea53
|
43 |
struct rds_conn_path *cp; |
70041088e
|
44 |
struct rds_tcp_connection *tc; |
38036629c
|
45 |
read_lock_bh(&sk->sk_callback_lock); |
ea3b1ea53
|
46 47 |
cp = sk->sk_user_data; if (!cp) { |
70041088e
|
48 49 50 |
state_change = sk->sk_state_change; goto out; } |
ea3b1ea53
|
51 |
tc = cp->cp_transport_data; |
70041088e
|
52 53 54 55 |
state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d ", tc->t_sock, sk->sk_state); |
5c3da57d7
|
56 57 58 59 60 61 |
switch (sk->sk_state) { /* ignore connecting sockets as they make progress */ case TCP_SYN_SENT: case TCP_SYN_RECV: break; case TCP_ESTABLISHED: |
1a0e100fb
|
62 63 64 65 66 67 |
/* Force the peer to reconnect so that we have the * TCP ports going from <smaller-ip>.<transient> to * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the * RDS connection as RDS_CONN_UP until the reconnect, * to avoid RDS datagram loss. */ |
eee2fa6ab
|
68 69 |
if (rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0 && |
1a0e100fb
|
70 71 |
rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { |
aed20a53a
|
72 |
rds_conn_path_drop(cp, false); |
1a0e100fb
|
73 74 75 |
} else { rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } |
5c3da57d7
|
76 77 78 |
break; case TCP_CLOSE_WAIT: case TCP_CLOSE: |
aed20a53a
|
79 |
rds_conn_path_drop(cp, false); |
5c3da57d7
|
80 81 |
default: break; |
70041088e
|
82 83 |
} out: |
38036629c
|
84 |
read_unlock_bh(&sk->sk_callback_lock); |
70041088e
|
85 86 |
state_change(sk); } |
b04e8554f
|
87 |
int rds_tcp_conn_path_connect(struct rds_conn_path *cp) |
70041088e
|
88 89 |
{ struct socket *sock = NULL; |
1e2b44e78
|
90 |
struct sockaddr_in6 sin6; |
eee2fa6ab
|
91 92 93 |
struct sockaddr_in sin; struct sockaddr *addr; int addrlen; |
1e2b44e78
|
94 |
bool isv6; |
70041088e
|
95 |
int ret; |
b04e8554f
|
96 97 |
struct rds_connection *conn = cp->cp_conn; struct rds_tcp_connection *tc = cp->cp_transport_data; |
bd7c5f983
|
98 |
|
5916e2c15
|
99 100 101 102 103 |
/* for multipath rds,we only trigger the connection after * the handshake probe has determined the number of paths. */ if (cp->cp_index > 0 && cp->cp_conn->c_npaths < 2) return -EAGAIN; |
02105b2cc
|
104 |
mutex_lock(&tc->t_conn_path_lock); |
70041088e
|
105 |
|
b04e8554f
|
106 |
if (rds_conn_path_up(cp)) { |
02105b2cc
|
107 |
mutex_unlock(&tc->t_conn_path_lock); |
bd7c5f983
|
108 109 |
return 0; } |
1e2b44e78
|
110 111 112 113 114 115 116 117 118 |
if (ipv6_addr_v4mapped(&conn->c_laddr)) { ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); isv6 = false; } else { ret = sock_create_kern(rds_conn_net(conn), PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); isv6 = true; } |
70041088e
|
119 120 121 122 |
if (ret < 0) goto out; rds_tcp_tune(sock); |
1e2b44e78
|
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
if (isv6) { sin6.sin6_family = AF_INET6; sin6.sin6_addr = conn->c_laddr; sin6.sin6_port = 0; sin6.sin6_flowinfo = 0; sin6.sin6_scope_id = conn->c_dev_if; addr = (struct sockaddr *)&sin6; addrlen = sizeof(sin6); } else { sin.sin_family = AF_INET; sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; sin.sin_port = 0; addr = (struct sockaddr *)&sin; addrlen = sizeof(sin); } |
70041088e
|
138 |
|
eee2fa6ab
|
139 |
ret = sock->ops->bind(sock, addr, addrlen); |
70041088e
|
140 |
if (ret) { |
eee2fa6ab
|
141 142 |
rdsdebug("bind failed with %d at address %pI6c ", |
6884b348e
|
143 |
ret, &conn->c_laddr); |
70041088e
|
144 145 |
goto out; } |
1e2b44e78
|
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
if (isv6) { sin6.sin6_family = AF_INET6; sin6.sin6_addr = conn->c_faddr; sin6.sin6_port = htons(RDS_TCP_PORT); sin6.sin6_flowinfo = 0; sin6.sin6_scope_id = conn->c_dev_if; addr = (struct sockaddr *)&sin6; addrlen = sizeof(sin6); } else { sin.sin_family = AF_INET; sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; sin.sin_port = htons(RDS_TCP_PORT); addr = (struct sockaddr *)&sin; addrlen = sizeof(sin); } |
70041088e
|
161 162 163 164 165 |
/* * once we call connect() we can start getting callbacks and they * own the socket */ |
ea3b1ea53
|
166 |
rds_tcp_set_callbacks(sock, cp); |
eee2fa6ab
|
167 |
ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); |
70041088e
|
168 |
|
eee2fa6ab
|
169 170 |
rdsdebug("connect to address %pI6c returned %d ", &conn->c_faddr, ret); |
70041088e
|
171 172 |
if (ret == -EINPROGRESS) ret = 0; |
467fa1535
|
173 174 |
if (ret == 0) { rds_tcp_keepalive(sock); |
eb74cc97b
|
175 |
sock = NULL; |
467fa1535
|
176 |
} else { |
b04e8554f
|
177 |
rds_tcp_restore_callbacks(sock, cp->cp_transport_data); |
467fa1535
|
178 |
} |
70041088e
|
179 180 |
out: |
02105b2cc
|
181 |
mutex_unlock(&tc->t_conn_path_lock); |
70041088e
|
182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
if (sock) sock_release(sock); return ret; } /* * Before killing the tcp socket this needs to serialize with callbacks. The * caller has already grabbed the sending sem so we're serialized with other * senders. * * TCP calls the callbacks with the sock lock so we hold it while we reset the * callbacks to those set by TCP. Our callbacks won't execute again once we * hold the sock lock. */ |
226f7a7d9
|
196 |
void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) |
70041088e
|
197 |
{ |
226f7a7d9
|
198 |
struct rds_tcp_connection *tc = cp->cp_transport_data; |
70041088e
|
199 |
struct socket *sock = tc->t_sock; |
226f7a7d9
|
200 201 202 |
rdsdebug("shutting down conn %p tc %p sock %p ", cp->cp_conn, tc, sock); |
70041088e
|
203 204 |
if (sock) { |
ebeeb1ad9
|
205 |
if (rds_destroy_pending(cp->cp_conn)) |
c14b03668
|
206 |
rds_tcp_set_linger(sock); |
70041088e
|
207 208 209 210 211 212 |
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ release_sock(sock->sk); sock_release(sock); |
ccbd6a5a4
|
213 |
} |
70041088e
|
214 215 216 217 218 219 220 221 |
if (tc->t_tinc) { rds_inc_put(&tc->t_tinc->ti_inc); tc->t_tinc = NULL; } tc->t_tinc_hdr_rem = sizeof(struct rds_header); tc->t_tinc_data_rem = 0; } |