Blame view
net/rds/ib_cm.c
36.2 KB
ec16227e1 RDS/IB: Infiniban... |
1 |
/* |
9b17f5884 net/rds: Use DMA ... |
2 |
* Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
ec16227e1 RDS/IB: Infiniban... |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
* * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ |
d131c5bb6 net/rds: Add miss... |
33 |
#include <linux/dmapool.h> |
ec16227e1 RDS/IB: Infiniban... |
34 35 |
#include <linux/kernel.h> #include <linux/in.h> |
5a0e3ad6a include cleanup: ... |
36 |
#include <linux/slab.h> |
ec16227e1 RDS/IB: Infiniban... |
37 |
#include <linux/vmalloc.h> |
cb0a60564 net/rds: use prin... |
38 |
#include <linux/ratelimit.h> |
eee2fa6ab rds: Changing IP ... |
39 |
#include <net/addrconf.h> |
8094ba0ac RDMA/cma: Provide... |
40 |
#include <rdma/ib_cm.h> |
ec16227e1 RDS/IB: Infiniban... |
41 |
|
0cb43965d RDS: split out co... |
42 |
#include "rds_single_path.h" |
ec16227e1 RDS/IB: Infiniban... |
43 44 |
#include "rds.h" #include "ib.h" |
3a2886cca net/rds: Keep tra... |
45 |
#include "ib_mr.h" |
ec16227e1 RDS/IB: Infiniban... |
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
/* * Set the selected protocol version */ static void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version) { conn->c_version = version; } /* * Set up flow control */ static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits) { struct rds_ib_connection *ic = conn->c_transport_data; if (rds_ib_sysctl_flow_control && credits != 0) { /* We're doing flow control */ ic->i_flowctl = 1; rds_ib_send_add_credits(conn, credits); } else { ic->i_flowctl = 0; } } /* * Tune RNR behavior. Without flow control, we use a rather * low timeout, but not the absolute minimum - this should * be tunable. * * We already set the RNR retry count to 7 (which is the * smallest infinite number :-) above. * If flow control is off, we want to change this back to 0 * so that we learn quickly when our credit accounting is * buggy. * * Caller passes in a qp_attr pointer - don't waste stack spacv * by allocation this twice. */ static void rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) { int ret; attr->min_rnr_timer = IB_RNR_TIMER_000_32; ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); if (ret) printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d ", -ret); } /* * Connection established. * We get here for both outgoing and incoming connection. */ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) { |
ec16227e1 RDS/IB: Infiniban... |
103 |
struct rds_ib_connection *ic = conn->c_transport_data; |
eee2fa6ab rds: Changing IP ... |
104 |
const union rds_ib_conn_priv *dp = NULL; |
ec16227e1 RDS/IB: Infiniban... |
105 |
struct ib_qp_attr qp_attr; |
eee2fa6ab rds: Changing IP ... |
106 107 108 109 |
__be64 ack_seq = 0; __be32 credit = 0; u8 major = 0; u8 minor = 0; |
ec16227e1 RDS/IB: Infiniban... |
110 |
int err; |
eee2fa6ab rds: Changing IP ... |
111 112 113 114 115 116 117 118 119 120 121 122 123 |
dp = event->param.conn.private_data; if (conn->c_isv6) { if (event->param.conn.private_data_len >= sizeof(struct rds6_ib_connect_private)) { major = dp->ricp_v6.dp_protocol_major; minor = dp->ricp_v6.dp_protocol_minor; credit = dp->ricp_v6.dp_credit; /* dp structure start is not guaranteed to be 8 bytes * aligned. Since dp_ack_seq is 64-bit extended load * operations can be used so go through get_unaligned * to avoid unaligned errors. */ ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq); |
02a6a2592 RDS/IB: Handle co... |
124 |
} |
eee2fa6ab rds: Changing IP ... |
125 126 127 128 129 130 131 132 133 134 135 136 |
} else if (event->param.conn.private_data_len >= sizeof(struct rds_ib_connect_private)) { major = dp->ricp_v4.dp_protocol_major; minor = dp->ricp_v4.dp_protocol_minor; credit = dp->ricp_v4.dp_credit; ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq); } /* make sure it isn't empty data */ if (major) { rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor)); rds_ib_set_flow_control(conn, be32_to_cpu(credit)); |
ec16227e1 RDS/IB: Infiniban... |
137 |
} |
cdc306a5c rds: make v3.1 as... |
138 139 140 141 142 143 144 145 146 147 |
if (conn->c_version < RDS_PROTOCOL_VERSION) { if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported ", &conn->c_laddr, &conn->c_faddr, RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version)); rds_conn_destroy(conn); return; } |
f147dd9ec RDS/IB: Disallow ... |
148 |
} |
ec16227e1 RDS/IB: Infiniban... |
149 |
|
fd261ce6a rds: rdma: update... |
150 151 |
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s ", |
cdc306a5c rds: make v3.1 as... |
152 |
ic->i_active_side ? "Active" : "Passive", |
fd261ce6a rds: rdma: update... |
153 |
&conn->c_laddr, &conn->c_faddr, conn->c_tos, |
cdc306a5c rds: make v3.1 as... |
154 155 156 |
RDS_PROTOCOL_MAJOR(conn->c_version), RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); |
e0e6d0628 net: rds: add ser... |
157 158 |
/* receive sl from the peer */ ic->i_sl = ic->i_cm_id->route.path_rec->sl; |
cf657269d RDS: IB: fix pani... |
159 |
atomic_set(&ic->i_cq_quiesce, 0); |
581d53c91 RDS: IB: track an... |
160 161 162 |
/* Init rings and fill recv. this needs to wait until protocol * negotiation is complete, since ring layout is different * from 3.1 to 4.1. |
e11d912a7 RDS/IB: Move tx/r... |
163 164 165 166 167 |
*/ rds_ib_send_init_ring(ic); rds_ib_recv_init_ring(ic); /* Post receive buffers - as a side effect, this will update * the posted credit count. */ |
73ce4317b RDS: make sure we... |
168 |
rds_ib_recv_refill(conn, 1, GFP_KERNEL); |
e11d912a7 RDS/IB: Move tx/r... |
169 |
|
ec16227e1 RDS/IB: Infiniban... |
170 171 172 173 174 175 176 177 |
/* Tune RNR behavior */ rds_ib_tune_rnr(ic, &qp_attr); qp_attr.qp_state = IB_QPS_RTS; err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); if (err) printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d ", err); |
3e0249f9c RDS/IB: add refco... |
178 |
/* update ib_device with this local ipaddr */ |
eee2fa6ab rds: Changing IP ... |
179 |
err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr); |
ec16227e1 RDS/IB: Infiniban... |
180 |
if (err) |
3e0249f9c RDS/IB: add refco... |
181 182 183 |
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d) ", err); |
ec16227e1 RDS/IB: Infiniban... |
184 185 186 |
/* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ |
c0adf54a1 net/rds: fix unal... |
187 |
if (dp) { |
eee2fa6ab rds: Changing IP ... |
188 189 |
if (ack_seq) rds_send_drop_acked(conn, be64_to_cpu(ack_seq), |
c0adf54a1 net/rds: fix unal... |
190 191 |
NULL); } |
ec16227e1 RDS/IB: Infiniban... |
192 |
|
cdc306a5c rds: make v3.1 as... |
193 |
conn->c_proposed_version = conn->c_version; |
ec16227e1 RDS/IB: Infiniban... |
194 195 196 197 |
rds_connect_complete(conn); } static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, |
eee2fa6ab rds: Changing IP ... |
198 199 200 201 202 203 |
struct rdma_conn_param *conn_param, union rds_ib_conn_priv *dp, u32 protocol_version, u32 max_responder_resources, u32 max_initiator_depth, bool isv6) |
ec16227e1 RDS/IB: Infiniban... |
204 |
{ |
40589e74f RDS: Base init_de... |
205 |
struct rds_ib_connection *ic = conn->c_transport_data; |
3e0249f9c RDS/IB: add refco... |
206 |
struct rds_ib_device *rds_ibdev = ic->rds_ibdev; |
40589e74f RDS: Base init_de... |
207 |
|
ec16227e1 RDS/IB: Infiniban... |
208 |
memset(conn_param, 0, sizeof(struct rdma_conn_param)); |
40589e74f RDS: Base init_de... |
209 |
|
40589e74f RDS: Base init_de... |
210 211 212 213 |
conn_param->responder_resources = min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources); conn_param->initiator_depth = min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth); |
3ba23ade4 RDS: Set retry_co... |
214 |
conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); |
ec16227e1 RDS/IB: Infiniban... |
215 216 217 |
conn_param->rnr_retry_count = 7; if (dp) { |
ec16227e1 RDS/IB: Infiniban... |
218 |
memset(dp, 0, sizeof(*dp)); |
eee2fa6ab rds: Changing IP ... |
219 220 221 222 223 224 225 226 227 228 229 |
if (isv6) { dp->ricp_v6.dp_saddr = conn->c_laddr; dp->ricp_v6.dp_daddr = conn->c_faddr; dp->ricp_v6.dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->ricp_v6.dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->ricp_v6.dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v6.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); |
fd261ce6a rds: rdma: update... |
230 |
dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; |
eee2fa6ab rds: Changing IP ... |
231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
conn_param->private_data = &dp->ricp_v6; conn_param->private_data_len = sizeof(dp->ricp_v6); } else { dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3]; dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3]; dp->ricp_v4.dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); dp->ricp_v4.dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->ricp_v4.dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v4.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); |
fd261ce6a rds: rdma: update... |
245 |
dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; |
eee2fa6ab rds: Changing IP ... |
246 247 248 249 |
conn_param->private_data = &dp->ricp_v4; conn_param->private_data_len = sizeof(dp->ricp_v4); } |
ec16227e1 RDS/IB: Infiniban... |
250 251 252 253 |
/* Advertise flow control */ if (ic->i_flowctl) { unsigned int credits; |
eee2fa6ab rds: Changing IP ... |
254 255 256 257 258 259 260 261 |
credits = IB_GET_POST_CREDITS (atomic_read(&ic->i_credits)); if (isv6) dp->ricp_v6.dp_credit = cpu_to_be32(credits); else dp->ricp_v4.dp_credit = cpu_to_be32(credits); atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); |
ec16227e1 RDS/IB: Infiniban... |
262 |
} |
ec16227e1 RDS/IB: Infiniban... |
263 264 265 266 267 |
} } static void rds_ib_cq_event_handler(struct ib_event *event, void *data) { |
1bde04a63 RDS/IB: print IB ... |
268 269 |
rdsdebug("event %u (%s) data %p ", |
3c88f3dcf RDS: Switch to ge... |
270 |
event->event, ib_event_msg(event->event), data); |
ec16227e1 RDS/IB: Infiniban... |
271 |
} |
f4f943c95 RDS: IB: ack more... |
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
/* Plucking the oldest entry from the ring can be done concurrently with * the thread refilling the ring. Each ring operation is protected by * spinlocks and the transient state of refilling doesn't change the * recording of which entry is oldest. * * This relies on IB only calling one cq comp_handler for each cq so that * there will only be one caller of rds_recv_incoming() per RDS connection. */ static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; rdsdebug("conn %p cq %p ", conn, cq); rds_ib_stats_inc(s_ib_evt_handler_call); tasklet_schedule(&ic->i_recv_tasklet); } |
dcfd041c8 RDS: IB: Remove t... |
292 293 |
static void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq, struct ib_wc *wcs) |
f4f943c95 RDS: IB: ack more... |
294 |
{ |
dcfd041c8 RDS: IB: Remove t... |
295 |
int nr, i; |
f4f943c95 RDS: IB: ack more... |
296 297 298 299 300 301 302 303 304 |
struct ib_wc *wc; while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { for (i = 0; i < nr; i++) { wc = wcs + i; rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u ", (unsigned long long)wc->wr_id, wc->status, wc->byte_len, be32_to_cpu(wc->ex.imm_data)); |
0c28c0450 RDS: IB: split se... |
305 |
|
1659185fb RDS: IB: Support ... |
306 307 308 309 310 |
if (wc->wr_id <= ic->i_send_ring.w_nr || wc->wr_id == RDS_IB_ACK_WR_ID) rds_ib_send_cqe_handler(ic, wc); else rds_ib_mr_cqe_handler(ic, wc); |
f4f943c95 RDS: IB: ack more... |
311 312 313 |
} } } |
0c28c0450 RDS: IB: split se... |
314 315 316 317 |
static void rds_ib_tasklet_fn_send(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; |
0c28c0450 RDS: IB: split se... |
318 319 |
rds_ib_stats_inc(s_ib_tasklet_call); |
cf657269d RDS: IB: fix pani... |
320 321 322 |
/* if cq has been already reaped, ignore incoming cq event */ if (atomic_read(&ic->i_cq_quiesce)) return; |
dcfd041c8 RDS: IB: Remove t... |
323 |
poll_scq(ic, ic->i_send_cq, ic->i_send_wc); |
0c28c0450 RDS: IB: split se... |
324 |
ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); |
dcfd041c8 RDS: IB: Remove t... |
325 |
poll_scq(ic, ic->i_send_cq, ic->i_send_wc); |
0c28c0450 RDS: IB: split se... |
326 327 328 329 |
if (rds_conn_up(conn) && (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || test_bit(0, &conn->c_map_queued))) |
1f9ecd7ea RDS: Pass rds_con... |
330 |
rds_send_xmit(&ic->conn->c_path[0]); |
0c28c0450 RDS: IB: split se... |
331 |
} |
dcfd041c8 RDS: IB: Remove t... |
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq, struct ib_wc *wcs, struct rds_ib_ack_state *ack_state) { int nr, i; struct ib_wc *wc; while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { for (i = 0; i < nr; i++) { wc = wcs + i; rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u ", (unsigned long long)wc->wr_id, wc->status, wc->byte_len, be32_to_cpu(wc->ex.imm_data)); rds_ib_recv_cqe_handler(ic, wc, ack_state); } } } |
f4f943c95 RDS: IB: ack more... |
351 352 353 354 355 356 |
static void rds_ib_tasklet_fn_recv(unsigned long data) { struct rds_ib_connection *ic = (struct rds_ib_connection *)data; struct rds_connection *conn = ic->conn; struct rds_ib_device *rds_ibdev = ic->rds_ibdev; struct rds_ib_ack_state state; |
9441c973e RDS: IB: handle r... |
357 358 |
if (!rds_ibdev) rds_conn_drop(conn); |
f4f943c95 RDS: IB: ack more... |
359 360 |
rds_ib_stats_inc(s_ib_tasklet_call); |
cf657269d RDS: IB: fix pani... |
361 362 363 |
/* if cq has been already reaped, ignore incoming cq event */ if (atomic_read(&ic->i_cq_quiesce)) return; |
f4f943c95 RDS: IB: ack more... |
364 |
memset(&state, 0, sizeof(state)); |
dcfd041c8 RDS: IB: Remove t... |
365 |
poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); |
f4f943c95 RDS: IB: ack more... |
366 |
ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); |
dcfd041c8 RDS: IB: Remove t... |
367 |
poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); |
f4f943c95 RDS: IB: ack more... |
368 369 370 371 372 373 374 375 376 377 378 |
if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { rds_send_drop_acked(conn, state.ack_recv, NULL); ic->i_ack_recv = state.ack_recv; } if (rds_conn_up(conn)) rds_ib_attempt_ack(ic); } |
ec16227e1 RDS/IB: Infiniban... |
379 380 381 382 |
static void rds_ib_qp_event_handler(struct ib_event *event, void *data) { struct rds_connection *conn = data; struct rds_ib_connection *ic = conn->c_transport_data; |
1bde04a63 RDS/IB: print IB ... |
383 384 |
rdsdebug("conn %p ic %p event %u (%s) ", conn, ic, event->event, |
3c88f3dcf RDS: Switch to ge... |
385 |
ib_event_msg(event->event)); |
ec16227e1 RDS/IB: Infiniban... |
386 387 388 389 390 391 |
switch (event->event) { case IB_EVENT_COMM_EST: rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: |
fd261ce6a rds: rdma: update... |
392 393 394 395 |
rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting ", event->event, ib_event_msg(event->event), &conn->c_laddr, &conn->c_faddr); |
97069788d RDS: Turn down al... |
396 |
rds_conn_drop(conn); |
ec16227e1 RDS/IB: Infiniban... |
397 398 399 |
break; } } |
0c28c0450 RDS: IB: split se... |
400 401 402 403 404 405 406 407 408 409 410 411 |
static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; rdsdebug("conn %p cq %p ", conn, cq); rds_ib_stats_inc(s_ib_evt_handler_call); tasklet_schedule(&ic->i_send_tasklet); } |
be2f76eac RDS: IB: Add vect... |
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 |
static inline int ibdev_get_unused_vector(struct rds_ib_device *rds_ibdev) { int min = rds_ibdev->vector_load[rds_ibdev->dev->num_comp_vectors - 1]; int index = rds_ibdev->dev->num_comp_vectors - 1; int i; for (i = rds_ibdev->dev->num_comp_vectors - 1; i >= 0; i--) { if (rds_ibdev->vector_load[i] < min) { index = i; min = rds_ibdev->vector_load[i]; } } rds_ibdev->vector_load[index]++; return index; } static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) { rds_ibdev->vector_load[index]--; } |
9b17f5884 net/rds: Use DMA ... |
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 |
/* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * * @ibdev: the IB device * @pool: the DMA memory pool * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, struct dma_pool *pool, dma_addr_t **dma_addrs, u32 num_hdrs) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, ibdev_to_node(ibdev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, ibdev_to_node(ibdev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); if (!hdrs[i]) { rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); return NULL; } } *dma_addrs = hdr_daddrs; return hdrs; } /* Free the DMA memory used to store struct rds_header. * * @pool: the DMA memory pool * @hdrs: pointer to the array storing DMA memory pointers * @dma_addrs: pointer to the array storing DMA addresses * @num_hdars: number of headers to free. */ void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs) { u32 i; for (i = 0; i < num_hdrs; i++) dma_pool_free(pool, hdrs[i], dma_addrs[i]); kvfree(hdrs); kvfree(dma_addrs); } |
ec16227e1 RDS/IB: Infiniban... |
494 495 496 497 498 499 500 501 502 |
/* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. */ static int rds_ib_setup_qp(struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; struct ib_device *dev = ic->i_cm_id->device; struct ib_qp_init_attr attr; |
8e37210b3 IB/core: Change i... |
503 |
struct ib_cq_init_attr cq_attr = {}; |
ec16227e1 RDS/IB: Infiniban... |
504 |
struct rds_ib_device *rds_ibdev; |
a36e629ee rds: ib: update W... |
505 |
unsigned long max_wrs; |
ad6832f95 RDS: IB: allocate... |
506 |
int ret, fr_queue_space; |
9b17f5884 net/rds: Use DMA ... |
507 |
struct dma_pool *pool; |
ec16227e1 RDS/IB: Infiniban... |
508 |
|
3e0249f9c RDS/IB: add refco... |
509 510 511 |
/* * It's normal to see a null device if an incoming connection races * with device removal, so we don't print a warning. |
ec16227e1 RDS/IB: Infiniban... |
512 |
*/ |
3e0249f9c RDS/IB: add refco... |
513 514 |
rds_ibdev = rds_ib_get_client_data(dev); if (!rds_ibdev) |
ec16227e1 RDS/IB: Infiniban... |
515 |
return -EOPNOTSUPP; |
3e0249f9c RDS/IB: add refco... |
516 |
|
ad6832f95 RDS: IB: allocate... |
517 |
/* The fr_queue_space is currently set to 512, to add extra space on |
07549ee21 RDMA/rds: Remove ... |
518 |
* completion queue and send queue. This extra space is used for FRWR |
ad6832f95 RDS: IB: allocate... |
519 520 |
* registration and invalidation work requests */ |
07549ee21 RDMA/rds: Remove ... |
521 |
fr_queue_space = RDS_IB_DEFAULT_FR_WR; |
ad6832f95 RDS: IB: allocate... |
522 |
|
3e0249f9c RDS/IB: add refco... |
523 524 |
/* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); |
ec16227e1 RDS/IB: Infiniban... |
525 |
|
a36e629ee rds: ib: update W... |
526 527 528 529 530 531 532 533 534 |
max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; if (ic->i_send_ring.w_nr != max_wrs) rds_ib_ring_resize(&ic->i_send_ring, max_wrs); max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; if (ic->i_recv_ring.w_nr != max_wrs) rds_ib_ring_resize(&ic->i_recv_ring, max_wrs); |
ec16227e1 RDS/IB: Infiniban... |
535 536 537 |
/* Protection domain and memory range */ ic->i_pd = rds_ibdev->pd; |
ec16227e1 RDS/IB: Infiniban... |
538 |
|
be2f76eac RDS: IB: Add vect... |
539 |
ic->i_scq_vector = ibdev_get_unused_vector(rds_ibdev); |
ad6832f95 RDS: IB: allocate... |
540 |
cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1; |
be2f76eac RDS: IB: Add vect... |
541 |
cq_attr.comp_vector = ic->i_scq_vector; |
0c28c0450 RDS: IB: split se... |
542 |
ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, |
ec16227e1 RDS/IB: Infiniban... |
543 |
rds_ib_cq_event_handler, conn, |
8e37210b3 IB/core: Change i... |
544 |
&cq_attr); |
ec16227e1 RDS/IB: Infiniban... |
545 546 547 |
if (IS_ERR(ic->i_send_cq)) { ret = PTR_ERR(ic->i_send_cq); ic->i_send_cq = NULL; |
be2f76eac RDS: IB: Add vect... |
548 |
ibdev_put_vector(rds_ibdev, ic->i_scq_vector); |
ec16227e1 RDS/IB: Infiniban... |
549 550 |
rdsdebug("ib_create_cq send failed: %d ", ret); |
3b12f73a5 rds: ib: add erro... |
551 |
goto rds_ibdev_out; |
ec16227e1 RDS/IB: Infiniban... |
552 |
} |
be2f76eac RDS: IB: Add vect... |
553 |
ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev); |
8e37210b3 IB/core: Change i... |
554 |
cq_attr.cqe = ic->i_recv_ring.w_nr; |
be2f76eac RDS: IB: Add vect... |
555 |
cq_attr.comp_vector = ic->i_rcq_vector; |
f4f943c95 RDS: IB: ack more... |
556 |
ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, |
ec16227e1 RDS/IB: Infiniban... |
557 |
rds_ib_cq_event_handler, conn, |
8e37210b3 IB/core: Change i... |
558 |
&cq_attr); |
ec16227e1 RDS/IB: Infiniban... |
559 560 561 |
if (IS_ERR(ic->i_recv_cq)) { ret = PTR_ERR(ic->i_recv_cq); ic->i_recv_cq = NULL; |
be2f76eac RDS: IB: Add vect... |
562 |
ibdev_put_vector(rds_ibdev, ic->i_rcq_vector); |
ec16227e1 RDS/IB: Infiniban... |
563 564 |
rdsdebug("ib_create_cq recv failed: %d ", ret); |
3b12f73a5 rds: ib: add erro... |
565 |
goto send_cq_out; |
ec16227e1 RDS/IB: Infiniban... |
566 567 568 569 570 571 |
} ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d ", ret); |
3b12f73a5 rds: ib: add erro... |
572 |
goto recv_cq_out; |
ec16227e1 RDS/IB: Infiniban... |
573 574 575 576 577 578 |
} ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d ", ret); |
3b12f73a5 rds: ib: add erro... |
579 |
goto recv_cq_out; |
ec16227e1 RDS/IB: Infiniban... |
580 581 582 583 584 585 586 |
} /* XXX negotiate max send/recv with remote? */ memset(&attr, 0, sizeof(attr)); attr.event_handler = rds_ib_qp_event_handler; attr.qp_context = conn; /* + 1 to allow for the single ack message */ |
ad6832f95 RDS: IB: allocate... |
587 |
attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1; |
ec16227e1 RDS/IB: Infiniban... |
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 |
attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; attr.cap.max_send_sge = rds_ibdev->max_sge; attr.cap.max_recv_sge = RDS_IB_RECV_SGE; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; /* * XXX this can fail if max_*_wr is too large? Are we supposed * to back off until we get a value that the hardware can support? */ ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d ", ret); |
3b12f73a5 rds: ib: add erro... |
604 |
goto recv_cq_out; |
ec16227e1 RDS/IB: Infiniban... |
605 |
} |
9b17f5884 net/rds: Use DMA ... |
606 607 608 |
pool = rds_ibdev->rid_hdrs_pool; ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, ic->i_send_ring.w_nr); |
8690bfa17 RDS: cleanup: rem... |
609 |
if (!ic->i_send_hdrs) { |
ec16227e1 RDS/IB: Infiniban... |
610 |
ret = -ENOMEM; |
9b17f5884 net/rds: Use DMA ... |
611 612 |
rdsdebug("DMA send hdrs alloc failed "); |
3b12f73a5 rds: ib: add erro... |
613 |
goto qp_out; |
ec16227e1 RDS/IB: Infiniban... |
614 |
} |
9b17f5884 net/rds: Use DMA ... |
615 616 |
ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, ic->i_recv_ring.w_nr); |
8690bfa17 RDS: cleanup: rem... |
617 |
if (!ic->i_recv_hdrs) { |
ec16227e1 RDS/IB: Infiniban... |
618 |
ret = -ENOMEM; |
9b17f5884 net/rds: Use DMA ... |
619 620 |
rdsdebug("DMA recv hdrs alloc failed "); |
3b12f73a5 rds: ib: add erro... |
621 |
goto send_hdrs_dma_out; |
ec16227e1 RDS/IB: Infiniban... |
622 |
} |
9b17f5884 net/rds: Use DMA ... |
623 624 |
ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, &ic->i_ack_dma); |
8690bfa17 RDS: cleanup: rem... |
625 |
if (!ic->i_ack) { |
ec16227e1 RDS/IB: Infiniban... |
626 |
ret = -ENOMEM; |
9b17f5884 net/rds: Use DMA ... |
627 628 |
rdsdebug("DMA ack header alloc failed "); |
3b12f73a5 rds: ib: add erro... |
629 |
goto recv_hdrs_dma_out; |
ec16227e1 RDS/IB: Infiniban... |
630 |
} |
fd7becedb treewide: Use arr... |
631 632 |
ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work), ic->i_send_ring.w_nr), |
e4c52c98e RDS/IB: add _to_n... |
633 |
ibdev_to_node(dev)); |
8690bfa17 RDS: cleanup: rem... |
634 |
if (!ic->i_sends) { |
ec16227e1 RDS/IB: Infiniban... |
635 636 637 |
ret = -ENOMEM; rdsdebug("send allocation failed "); |
3b12f73a5 rds: ib: add erro... |
638 |
goto ack_dma_out; |
ec16227e1 RDS/IB: Infiniban... |
639 |
} |
ec16227e1 RDS/IB: Infiniban... |
640 |
|
fd7becedb treewide: Use arr... |
641 642 |
ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work), ic->i_recv_ring.w_nr), |
e4c52c98e RDS/IB: add _to_n... |
643 |
ibdev_to_node(dev)); |
8690bfa17 RDS: cleanup: rem... |
644 |
if (!ic->i_recvs) { |
ec16227e1 RDS/IB: Infiniban... |
645 646 647 |
ret = -ENOMEM; rdsdebug("recv allocation failed "); |
3b12f73a5 rds: ib: add erro... |
648 |
goto sends_out; |
ec16227e1 RDS/IB: Infiniban... |
649 |
} |
ec16227e1 RDS/IB: Infiniban... |
650 |
rds_ib_recv_init_ack(ic); |
e5580242a rds/ib: Remove ib... |
651 652 |
rdsdebug("conn %p pd %p cq %p %p ", conn, ic->i_pd, |
ec16227e1 RDS/IB: Infiniban... |
653 |
ic->i_send_cq, ic->i_recv_cq); |
91a825290 rds: ib: Fix miss... |
654 |
goto out; |
3b12f73a5 rds: ib: add erro... |
655 656 657 |
sends_out: vfree(ic->i_sends); |
9b17f5884 net/rds: Use DMA ... |
658 |
|
3b12f73a5 rds: ib: add erro... |
659 |
ack_dma_out: |
9b17f5884 net/rds: Use DMA ... |
660 661 |
dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); ic->i_ack = NULL; |
3b12f73a5 rds: ib: add erro... |
662 |
recv_hdrs_dma_out: |
9b17f5884 net/rds: Use DMA ... |
663 664 665 666 |
rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, ic->i_recv_ring.w_nr); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; |
3b12f73a5 rds: ib: add erro... |
667 |
send_hdrs_dma_out: |
9b17f5884 net/rds: Use DMA ... |
668 669 670 671 |
rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, ic->i_send_ring.w_nr); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; |
3b12f73a5 rds: ib: add erro... |
672 673 674 |
qp_out: rdma_destroy_qp(ic->i_cm_id); recv_cq_out: |
eaa1ca9cf rds: Don't check ... |
675 676 |
ib_destroy_cq(ic->i_recv_cq); ic->i_recv_cq = NULL; |
3b12f73a5 rds: ib: add erro... |
677 |
send_cq_out: |
eaa1ca9cf rds: Don't check ... |
678 679 |
ib_destroy_cq(ic->i_send_cq); ic->i_send_cq = NULL; |
3b12f73a5 rds: ib: add erro... |
680 681 |
rds_ibdev_out: rds_ib_remove_conn(rds_ibdev, conn); |
91a825290 rds: ib: Fix miss... |
682 |
out: |
3e0249f9c RDS/IB: add refco... |
683 |
rds_ib_dev_put(rds_ibdev); |
3b12f73a5 rds: ib: add erro... |
684 |
|
ec16227e1 RDS/IB: Infiniban... |
685 686 |
return ret; } |
eee2fa6ab rds: Changing IP ... |
687 |
static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) |
ec16227e1 RDS/IB: Infiniban... |
688 |
{ |
eee2fa6ab rds: Changing IP ... |
689 690 |
const union rds_ib_conn_priv *dp = event->param.conn.private_data; u8 data_len, major, minor; |
ec16227e1 RDS/IB: Infiniban... |
691 |
u32 version = 0; |
eee2fa6ab rds: Changing IP ... |
692 693 |
__be16 mask; u16 common; |
ec16227e1 RDS/IB: Infiniban... |
694 |
|
9ddbcfa09 RDS/IB: Improve R... |
695 696 |
/* * rdma_cm private data is odd - when there is any private data in the |
ec16227e1 RDS/IB: Infiniban... |
697 698 699 700 |
* request, we will be given a pretty large buffer without telling us the * original size. The only way to tell the difference is by looking at * the contents, which are initialized to zero. * If the protocol version fields aren't set, this is a connection attempt |
d936b1d53 net: rds: delete ... |
701 |
* from an older version. This could be 3.0 or 2.0 - we can't tell. |
9ddbcfa09 RDS/IB: Improve R... |
702 703 704 705 706 707 708 709 710 711 |
* We really should have changed this for OFED 1.3 :-( */ /* Be paranoid. RDS always has privdata */ if (!event->param.conn.private_data_len) { printk(KERN_NOTICE "RDS incoming connection has no private data, " "rejecting "); return 0; } |
eee2fa6ab rds: Changing IP ... |
712 713 714 715 716 717 718 719 720 721 722 |
if (isv6) { data_len = sizeof(struct rds6_ib_connect_private); major = dp->ricp_v6.dp_protocol_major; minor = dp->ricp_v6.dp_protocol_minor; mask = dp->ricp_v6.dp_protocol_minor_mask; } else { data_len = sizeof(struct rds_ib_connect_private); major = dp->ricp_v4.dp_protocol_major; minor = dp->ricp_v4.dp_protocol_minor; mask = dp->ricp_v4.dp_protocol_minor_mask; } |
9ddbcfa09 RDS/IB: Improve R... |
723 |
/* Even if len is crap *now* I still want to check it. -ASG */ |
eee2fa6ab rds: Changing IP ... |
724 |
if (event->param.conn.private_data_len < data_len || major == 0) |
fd261ce6a rds: rdma: update... |
725 |
return RDS_PROTOCOL_4_0; |
ec16227e1 RDS/IB: Infiniban... |
726 |
|
eee2fa6ab rds: Changing IP ... |
727 |
common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; |
fd261ce6a rds: rdma: update... |
728 729 |
if (major == 4 && common) { version = RDS_PROTOCOL_4_0; |
ec16227e1 RDS/IB: Infiniban... |
730 731 |
while ((common >>= 1) != 0) version++; |
cdc306a5c rds: make v3.1 as... |
732 733 734 |
} else if (RDS_PROTOCOL_COMPAT_VERSION == RDS_PROTOCOL(major, minor)) { version = RDS_PROTOCOL_COMPAT_VERSION; |
eee2fa6ab rds: Changing IP ... |
735 736 737 738 739 740 741 742 743 744 |
} else { if (isv6) printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u ", &dp->ricp_v6.dp_saddr, major, minor); else printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u ", &dp->ricp_v4.dp_saddr, major, minor); } |
ec16227e1 RDS/IB: Infiniban... |
745 746 |
return version; } |
e65d4d963 rds: Remove IPv6 ... |
747 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
748 |
/* Given an IPv6 address, find the net_device which hosts that address and |
eee2fa6ab rds: Changing IP ... |
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 |
* return its index. This is used by the rds_ib_cm_handle_connect() code to * find the interface index of where an incoming request comes from when * the request is using a link local address. * * Note one problem in this search. It is possible that two interfaces have * the same link local address. Unfortunately, this cannot be solved unless * the underlying layer gives us the interface which an incoming RDMA connect * request comes from. */ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) { struct net_device *dev; int idx = 0; rcu_read_lock(); for_each_netdev_rcu(net, dev) { |
1e2b44e78 rds: Enable RDS I... |
765 |
if (ipv6_chk_addr(net, addr, dev, 1)) { |
eee2fa6ab rds: Changing IP ... |
766 767 768 769 770 771 772 773 |
idx = dev->ifindex; break; } } rcu_read_unlock(); return idx; } |
e65d4d963 rds: Remove IPv6 ... |
774 |
#endif |
eee2fa6ab rds: Changing IP ... |
775 |
|
ec16227e1 RDS/IB: Infiniban... |
776 |
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, |
eee2fa6ab rds: Changing IP ... |
777 |
struct rdma_cm_event *event, bool isv6) |
ec16227e1 RDS/IB: Infiniban... |
778 779 780 |
{ __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; |
eee2fa6ab rds: Changing IP ... |
781 |
const struct rds_ib_conn_priv_cmn *dp_cmn; |
ec16227e1 RDS/IB: Infiniban... |
782 783 784 |
struct rds_connection *conn = NULL; struct rds_ib_connection *ic = NULL; struct rdma_conn_param conn_param; |
eee2fa6ab rds: Changing IP ... |
785 786 787 788 789 790 791 792 |
const union rds_ib_conn_priv *dp; union rds_ib_conn_priv dp_rep; struct in6_addr s_mapped_addr; struct in6_addr d_mapped_addr; const struct in6_addr *saddr6; const struct in6_addr *daddr6; int destroy = 1; u32 ifindex = 0; |
ec16227e1 RDS/IB: Infiniban... |
793 |
u32 version; |
eee2fa6ab rds: Changing IP ... |
794 |
int err = 1; |
ec16227e1 RDS/IB: Infiniban... |
795 796 |
/* Check whether the remote protocol version matches ours. */ |
eee2fa6ab rds: Changing IP ... |
797 |
version = rds_ib_protocol_compatible(event, isv6); |
d021fabf5 rds: rdma: add co... |
798 799 |
if (!version) { err = RDS_RDMA_REJ_INCOMPAT; |
ec16227e1 RDS/IB: Infiniban... |
800 |
goto out; |
d021fabf5 rds: rdma: add co... |
801 |
} |
ec16227e1 RDS/IB: Infiniban... |
802 |
|
eee2fa6ab rds: Changing IP ... |
803 804 |
dp = event->param.conn.private_data; if (isv6) { |
e65d4d963 rds: Remove IPv6 ... |
805 |
#if IS_ENABLED(CONFIG_IPV6) |
eee2fa6ab rds: Changing IP ... |
806 807 808 |
dp_cmn = &dp->ricp_v6.dp_cmn; saddr6 = &dp->ricp_v6.dp_saddr; daddr6 = &dp->ricp_v6.dp_daddr; |
1e2b44e78 rds: Enable RDS I... |
809 |
/* If either address is link local, need to find the |
eee2fa6ab rds: Changing IP ... |
810 811 812 813 814 815 816 817 818 819 820 |
* interface index in order to create a proper RDS * connection. */ if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) { /* Using init_net for now .. */ ifindex = __rds_find_ifindex(&init_net, daddr6); /* No index found... Need to bail out. */ if (ifindex == 0) { err = -EOPNOTSUPP; goto out; } |
1e2b44e78 rds: Enable RDS I... |
821 822 823 824 825 826 827 828 |
} else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) { /* Use our address to find the correct index. */ ifindex = __rds_find_ifindex(&init_net, daddr6); /* No index found... Need to bail out. */ if (ifindex == 0) { err = -EOPNOTSUPP; goto out; } |
eee2fa6ab rds: Changing IP ... |
829 |
} |
e65d4d963 rds: Remove IPv6 ... |
830 831 832 833 |
#else err = -EOPNOTSUPP; goto out; #endif |
eee2fa6ab rds: Changing IP ... |
834 835 836 837 838 839 840 |
} else { dp_cmn = &dp->ricp_v4.dp_cmn; ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr); ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr); saddr6 = &s_mapped_addr; daddr6 = &d_mapped_addr; } |
fd261ce6a rds: rdma: update... |
841 842 843 844 |
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d ", saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), |
ec16227e1 RDS/IB: Infiniban... |
845 |
(unsigned long long)be64_to_cpu(lguid), |
fd261ce6a rds: rdma: update... |
846 |
(unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); |
ec16227e1 RDS/IB: Infiniban... |
847 |
|
d5a8ac28a RDS-TCP: Make RDS... |
848 |
/* RDS/IB is not currently netns aware, thus init_net */ |
eee2fa6ab rds: Changing IP ... |
849 |
conn = rds_conn_create(&init_net, daddr6, saddr6, |
fd261ce6a rds: rdma: update... |
850 851 |
&rds_ib_transport, dp_cmn->ricpc_dp_toss, GFP_KERNEL, ifindex); |
ec16227e1 RDS/IB: Infiniban... |
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 |
if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld) ", PTR_ERR(conn)); conn = NULL; goto out; } /* * The connection request may occur while the * previous connection exist, e.g. in case of failover. * But as connections may be initiated simultaneously * by both hosts, we have a random backoff mechanism - * see the comment above rds_queue_reconnect() */ mutex_lock(&conn->c_cm_lock); if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { if (rds_conn_state(conn) == RDS_CONN_UP) { rdsdebug("incoming connect while connecting "); rds_conn_drop(conn); rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { /* Wait and see - our connect may still be succeeding */ rds_ib_stats_inc(s_ib_connect_raced); } |
ec16227e1 RDS/IB: Infiniban... |
878 879 880 881 882 883 |
goto out; } ic = conn->c_transport_data; rds_ib_set_protocol(conn, version); |
eee2fa6ab rds: Changing IP ... |
884 |
rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit)); |
ec16227e1 RDS/IB: Infiniban... |
885 886 887 |
/* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ |
eee2fa6ab rds: Changing IP ... |
888 889 890 |
if (dp_cmn->ricpc_ack_seq) rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq), NULL); |
ec16227e1 RDS/IB: Infiniban... |
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 |
BUG_ON(cm_id->context); BUG_ON(ic->i_cm_id); ic->i_cm_id = cm_id; cm_id->context = conn; /* We got halfway through setting up the ib_connection, if we * fail now, we have to take the long route out of this mess. */ destroy = 0; err = rds_ib_setup_qp(conn); if (err) { rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d) ", err); goto out; } |
40589e74f RDS: Base init_de... |
908 |
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version, |
eee2fa6ab rds: Changing IP ... |
909 910 |
event->param.conn.responder_resources, event->param.conn.initiator_depth, isv6); |
ec16227e1 RDS/IB: Infiniban... |
911 912 |
/* rdma_accept() calls rdma_reject() internally if it fails */ |
b418c5276 rds: ib: drop unn... |
913 914 915 |
if (rdma_accept(cm_id, &conn_param)) rds_ib_conn_error(conn, "rdma_accept failed "); |
ec16227e1 RDS/IB: Infiniban... |
916 917 |
out: |
a46ca94e7 RDS/IB: rds_ib_cm... |
918 919 920 |
if (conn) mutex_unlock(&conn->c_cm_lock); if (err) |
8094ba0ac RDMA/cma: Provide... |
921 922 |
rdma_reject(cm_id, &err, sizeof(int), IB_CM_REJ_CONSUMER_DEFINED); |
ec16227e1 RDS/IB: Infiniban... |
923 924 |
return destroy; } |
eee2fa6ab rds: Changing IP ... |
925 |
int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) |
ec16227e1 RDS/IB: Infiniban... |
926 927 928 929 |
{ struct rds_connection *conn = cm_id->context; struct rds_ib_connection *ic = conn->c_transport_data; struct rdma_conn_param conn_param; |
eee2fa6ab rds: Changing IP ... |
930 |
union rds_ib_conn_priv dp; |
ec16227e1 RDS/IB: Infiniban... |
931 932 933 934 |
int ret; /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ |
fd261ce6a rds: rdma: update... |
935 |
rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); |
ec16227e1 RDS/IB: Infiniban... |
936 937 938 939 940 941 942 943 |
ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); if (ret) { rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d) ", ret); goto out; } |
cdc306a5c rds: make v3.1 as... |
944 945 |
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, conn->c_proposed_version, |
eee2fa6ab rds: Changing IP ... |
946 |
UINT_MAX, UINT_MAX, isv6); |
071ba4cc5 RDMA: Add rdma_co... |
947 |
ret = rdma_connect_locked(cm_id, &conn_param); |
ec16227e1 RDS/IB: Infiniban... |
948 |
if (ret) |
071ba4cc5 RDMA: Add rdma_co... |
949 950 951 |
rds_ib_conn_error(conn, "rdma_connect_locked failed (%d) ", ret); |
ec16227e1 RDS/IB: Infiniban... |
952 953 954 955 956 957 958 959 960 |
out: /* Beware - returning non-zero tells the rdma_cm to destroy * the cm_id. We should certainly not do it as long as we still * "own" the cm_id. */ if (ret) { if (ic->i_cm_id == cm_id) ret = 0; } |
581d53c91 RDS: IB: track an... |
961 |
ic->i_active_side = true; |
ec16227e1 RDS/IB: Infiniban... |
962 963 |
return ret; } |
b04e8554f RDS: TCP: Hooks t... |
964 |
int rds_ib_conn_path_connect(struct rds_conn_path *cp) |
ec16227e1 RDS/IB: Infiniban... |
965 |
{ |
b04e8554f RDS: TCP: Hooks t... |
966 |
struct rds_connection *conn = cp->cp_conn; |
eee2fa6ab rds: Changing IP ... |
967 968 969 |
struct sockaddr_storage src, dest; rdma_cm_event_handler handler; struct rds_ib_connection *ic; |
ec16227e1 RDS/IB: Infiniban... |
970 |
int ret; |
eee2fa6ab rds: Changing IP ... |
971 |
ic = conn->c_transport_data; |
ec16227e1 RDS/IB: Infiniban... |
972 973 |
/* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ |
e65d4d963 rds: Remove IPv6 ... |
974 |
#if IS_ENABLED(CONFIG_IPV6) |
1e2b44e78 rds: Enable RDS I... |
975 976 977 |
if (conn->c_isv6) handler = rds6_rdma_cm_event_handler; else |
e65d4d963 rds: Remove IPv6 ... |
978 |
#endif |
1e2b44e78 rds: Enable RDS I... |
979 |
handler = rds_rdma_cm_event_handler; |
eee2fa6ab rds: Changing IP ... |
980 |
ic->i_cm_id = rdma_create_id(&init_net, handler, conn, |
b26f9b994 RDMA/cma: Pass QP... |
981 |
RDMA_PS_TCP, IB_QPT_RC); |
ec16227e1 RDS/IB: Infiniban... |
982 983 984 985 986 987 988 989 990 991 |
if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; rdsdebug("rdma_create_id() failed: %d ", ret); goto out; } rdsdebug("created cm id %p for conn %p ", ic->i_cm_id, conn); |
eee2fa6ab rds: Changing IP ... |
992 993 994 995 996 997 998 |
if (ipv6_addr_v4mapped(&conn->c_faddr)) { struct sockaddr_in *sin; sin = (struct sockaddr_in *)&src; sin->sin_family = AF_INET; sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; sin->sin_port = 0; |
ec16227e1 RDS/IB: Infiniban... |
999 |
|
eee2fa6ab rds: Changing IP ... |
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 |
sin = (struct sockaddr_in *)&dest; sin->sin_family = AF_INET; sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; sin->sin_port = htons(RDS_PORT); } else { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)&src; sin6->sin6_family = AF_INET6; sin6->sin6_addr = conn->c_laddr; sin6->sin6_port = 0; sin6->sin6_scope_id = conn->c_dev_if; sin6 = (struct sockaddr_in6 *)&dest; sin6->sin6_family = AF_INET6; sin6->sin6_addr = conn->c_faddr; sin6->sin6_port = htons(RDS_CM_PORT); sin6->sin6_scope_id = conn->c_dev_if; } |
ec16227e1 RDS/IB: Infiniban... |
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 |
ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, (struct sockaddr *)&dest, RDS_RDMA_RESOLVE_TIMEOUT_MS); if (ret) { rdsdebug("addr resolve failed for cm id %p: %d ", ic->i_cm_id, ret); rdma_destroy_id(ic->i_cm_id); ic->i_cm_id = NULL; } out: return ret; } /* * This is so careful about only cleaning up resources that were built up * so that it can be called at any point during startup. In fact it * can be called multiple times for a given connection. */ |
226f7a7d9 RDS: Rework path ... |
1040 |
void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) |
ec16227e1 RDS/IB: Infiniban... |
1041 |
{ |
226f7a7d9 RDS: Rework path ... |
1042 |
struct rds_connection *conn = cp->cp_conn; |
ec16227e1 RDS/IB: Infiniban... |
1043 1044 1045 1046 1047 1048 1049 1050 1051 |
struct rds_ib_connection *ic = conn->c_transport_data; int err = 0; rdsdebug("cm %p pd %p cq %p %p qp %p ", ic->i_cm_id, ic->i_pd, ic->i_send_cq, ic->i_recv_cq, ic->i_cm_id ? ic->i_cm_id->qp : NULL); if (ic->i_cm_id) { |
ec16227e1 RDS/IB: Infiniban... |
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 |
rdsdebug("disconnecting cm %p ", ic->i_cm_id); err = rdma_disconnect(ic->i_cm_id); if (err) { /* Actually this may happen quite frequently, when * an outgoing connect raced with an incoming connect. */ rdsdebug("failed to disconnect, cm: %p err %d ", ic->i_cm_id, err); } |
3a2886cca net/rds: Keep tra... |
1063 1064 1065 1066 |
/* kick off "flush_worker" for all pools in order to reap * all FRMR registrations that are still marked "FRMR_IS_INUSE" */ rds_ib_flush_mrs(); |
e32b4a704 RDS/IB: Do not wa... |
1067 |
/* |
f046011cd RDS/IB: track sig... |
1068 1069 1070 1071 1072 1073 1074 |
* We want to wait for tx and rx completion to finish * before we tear down the connection, but we have to be * careful not to get stuck waiting on a send ring that * only has unsignaled sends in it. We've shutdown new * sends before getting here so by waiting for signaled * sends to complete we're ensured that there will be no * more tx processing. |
e32b4a704 RDS/IB: Do not wa... |
1075 |
*/ |
ec16227e1 RDS/IB: Infiniban... |
1076 |
wait_event(rds_ib_ring_empty_wait, |
f046011cd RDS/IB: track sig... |
1077 |
rds_ib_ring_empty(&ic->i_recv_ring) && |
ad6832f95 RDS: IB: allocate... |
1078 |
(atomic_read(&ic->i_signaled_sends) == 0) && |
3a2886cca net/rds: Keep tra... |
1079 |
(atomic_read(&ic->i_fastreg_inuse_count) == 0) && |
a55207884 Revert "RDS: IB: ... |
1080 |
(atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); |
0c28c0450 RDS: IB: split se... |
1081 |
tasklet_kill(&ic->i_send_tasklet); |
f046011cd RDS/IB: track sig... |
1082 |
tasklet_kill(&ic->i_recv_tasklet); |
ec16227e1 RDS/IB: Infiniban... |
1083 |
|
cf657269d RDS: IB: fix pani... |
1084 |
atomic_set(&ic->i_cq_quiesce, 1); |
1bc7b863f RDS: destroy the ... |
1085 1086 1087 |
/* first destroy the ib state that generates callbacks */ if (ic->i_cm_id->qp) rdma_destroy_qp(ic->i_cm_id); |
be2f76eac RDS: IB: Add vect... |
1088 1089 1090 |
if (ic->i_send_cq) { if (ic->rds_ibdev) ibdev_put_vector(ic->rds_ibdev, ic->i_scq_vector); |
1bc7b863f RDS: destroy the ... |
1091 |
ib_destroy_cq(ic->i_send_cq); |
be2f76eac RDS: IB: Add vect... |
1092 1093 1094 1095 1096 |
} if (ic->i_recv_cq) { if (ic->rds_ibdev) ibdev_put_vector(ic->rds_ibdev, ic->i_rcq_vector); |
1bc7b863f RDS: destroy the ... |
1097 |
ib_destroy_cq(ic->i_recv_cq); |
be2f76eac RDS: IB: Add vect... |
1098 |
} |
1bc7b863f RDS: destroy the ... |
1099 |
|
9b17f5884 net/rds: Use DMA ... |
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 |
if (ic->rds_ibdev) { struct dma_pool *pool; pool = ic->rds_ibdev->rid_hdrs_pool; /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, ic->i_send_ring.w_nr); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, ic->i_recv_ring.w_nr); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); ic->i_ack = NULL; } } else { WARN_ON(ic->i_send_hdrs); WARN_ON(ic->i_send_hdrs_dma); WARN_ON(ic->i_recv_hdrs); WARN_ON(ic->i_recv_hdrs_dma); WARN_ON(ic->i_ack); } |
ec16227e1 RDS/IB: Infiniban... |
1133 1134 1135 1136 1137 |
if (ic->i_sends) rds_ib_send_clear_ring(ic); if (ic->i_recvs) rds_ib_recv_clear_ring(ic); |
1c3be624f RDS: Don't destro... |
1138 |
rdma_destroy_id(ic->i_cm_id); |
ec16227e1 RDS/IB: Infiniban... |
1139 1140 1141 |
/* * Move connection back to the nodev list. */ |
745cbccac RDS: Rewrite conn... |
1142 1143 |
if (ic->rds_ibdev) rds_ib_remove_conn(ic->rds_ibdev, conn); |
ec16227e1 RDS/IB: Infiniban... |
1144 1145 1146 |
ic->i_cm_id = NULL; ic->i_pd = NULL; |
ec16227e1 RDS/IB: Infiniban... |
1147 1148 |
ic->i_send_cq = NULL; ic->i_recv_cq = NULL; |
ec16227e1 RDS/IB: Infiniban... |
1149 1150 1151 1152 |
} BUG_ON(ic->rds_ibdev); /* Clear pending transmit */ |
ff3d7d361 RDS: Perform unma... |
1153 1154 1155 1156 1157 1158 |
if (ic->i_data_op) { struct rds_message *rm; rm = container_of(ic->i_data_op, struct rds_message, data); rds_message_put(rm); ic->i_data_op = NULL; |
ec16227e1 RDS/IB: Infiniban... |
1159 1160 1161 1162 |
} /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); |
8cbd9606a RDS: Use spinlock... |
1163 1164 1165 1166 1167 |
#ifdef KERNEL_HAS_ATOMIC64 atomic64_set(&ic->i_ack_next, 0); #else ic->i_ack_next = 0; #endif |
ec16227e1 RDS/IB: Infiniban... |
1168 1169 1170 1171 1172 |
ic->i_ack_recv = 0; /* Clear flow control state */ ic->i_flowctl = 0; atomic_set(&ic->i_credits, 0); |
a36e629ee rds: ib: update W... |
1173 1174 1175 |
/* Re-init rings, but retain sizes. */ rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr); |
ec16227e1 RDS/IB: Infiniban... |
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 |
if (ic->i_ibinc) { rds_inc_put(&ic->i_ibinc->ii_inc); ic->i_ibinc = NULL; } vfree(ic->i_sends); ic->i_sends = NULL; vfree(ic->i_recvs); ic->i_recvs = NULL; |
581d53c91 RDS: IB: track an... |
1186 |
ic->i_active_side = false; |
ec16227e1 RDS/IB: Infiniban... |
1187 1188 1189 1190 1191 1192 |
} int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_ib_connection *ic; unsigned long flags; |
332441258 RDS/IB: Add cachi... |
1193 |
int ret; |
ec16227e1 RDS/IB: Infiniban... |
1194 1195 |
/* XXX too lazy? */ |
f0229eaaf RDS: use gfp flag... |
1196 |
ic = kzalloc(sizeof(struct rds_ib_connection), gfp); |
8690bfa17 RDS: cleanup: rem... |
1197 |
if (!ic) |
ec16227e1 RDS/IB: Infiniban... |
1198 |
return -ENOMEM; |
f394ad28f rds: rds_ib_recv_... |
1199 |
ret = rds_ib_recv_alloc_caches(ic, gfp); |
332441258 RDS/IB: Add cachi... |
1200 1201 1202 1203 |
if (ret) { kfree(ic); return ret; } |
ec16227e1 RDS/IB: Infiniban... |
1204 |
INIT_LIST_HEAD(&ic->ib_node); |
0c28c0450 RDS: IB: split se... |
1205 1206 |
tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send, (unsigned long)ic); |
f4f943c95 RDS: IB: ack more... |
1207 |
tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv, |
0c28c0450 RDS: IB: split se... |
1208 |
(unsigned long)ic); |
ec16227e1 RDS/IB: Infiniban... |
1209 |
mutex_init(&ic->i_recv_mutex); |
8cbd9606a RDS: Use spinlock... |
1210 1211 1212 |
#ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&ic->i_ack_lock); #endif |
f046011cd RDS/IB: track sig... |
1213 |
atomic_set(&ic->i_signaled_sends, 0); |
aa4948937 net/rds: Initiali... |
1214 |
atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); |
ec16227e1 RDS/IB: Infiniban... |
1215 1216 1217 1218 1219 |
/* * rds_ib_conn_shutdown() waits for these to be emptied so they * must be initialized before it can be called. */ |
a36e629ee rds: ib: update W... |
1220 1221 |
rds_ib_ring_init(&ic->i_send_ring, 0); rds_ib_ring_init(&ic->i_recv_ring, 0); |
ec16227e1 RDS/IB: Infiniban... |
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 |
ic->conn = conn; conn->c_transport_data = ic; spin_lock_irqsave(&ib_nodev_conns_lock, flags); list_add_tail(&ic->ib_node, &ib_nodev_conns); spin_unlock_irqrestore(&ib_nodev_conns_lock, flags); rdsdebug("conn %p conn ic %p ", conn, conn->c_transport_data); return 0; } |
745cbccac RDS: Rewrite conn... |
1235 1236 1237 |
/* * Free a connection. Connection must be shut down and not set for reconnect. */ |
ec16227e1 RDS/IB: Infiniban... |
1238 1239 1240 |
void rds_ib_conn_free(void *arg) { struct rds_ib_connection *ic = arg; |
745cbccac RDS: Rewrite conn... |
1241 |
spinlock_t *lock_ptr; |
ec16227e1 RDS/IB: Infiniban... |
1242 1243 |
rdsdebug("ic %p ", ic); |
745cbccac RDS: Rewrite conn... |
1244 1245 1246 1247 1248 1249 1250 1251 1252 |
/* * Conn is either on a dev's list or on the nodev list. * A race with shutdown() or connect() would cause problems * (since rds_ibdev would change) but that should never happen. */ lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock; spin_lock_irq(lock_ptr); |
ec16227e1 RDS/IB: Infiniban... |
1253 |
list_del(&ic->ib_node); |
745cbccac RDS: Rewrite conn... |
1254 |
spin_unlock_irq(lock_ptr); |
332441258 RDS/IB: Add cachi... |
1255 |
rds_ib_recv_free_caches(ic); |
ec16227e1 RDS/IB: Infiniban... |
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 |
kfree(ic); } /* * An error occurred on the connection */ void __rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...) { va_list ap; rds_conn_drop(conn); va_start(ap, fmt); vprintk(fmt, ap); va_end(ap); } |