Blame view
net/sunrpc/xprtrdma/transport.c
24.8 KB
f58851e6b
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
/* * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the BSD-type * license below: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * Neither the name of the Network Appliance, Inc. nor the names of * its contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * transport.c * * This file contains the top-level implementation of an RPC RDMA * transport. * * Naming convention: functions beginning with xprt_ are part of the * transport switch. All others are RPC RDMA internal. */ #include <linux/module.h> |
5a0e3ad6a
|
51 |
#include <linux/slab.h> |
f58851e6b
|
52 |
#include <linux/seq_file.h> |
5976687a2
|
53 |
#include <linux/sunrpc/addr.h> |
f58851e6b
|
54 55 |
#include "xprt_rdma.h" |
f895b252d
|
56 |
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6b
|
57 58 |
# define RPCDBG_FACILITY RPCDBG_TRANS #endif |
f58851e6b
|
59 60 61 62 63 |
/* * tunables */ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
5d252f90a
|
64 |
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
f58851e6b
|
65 66 |
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; |
fff09594e
|
67 68 |
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; int xprt_rdma_pad_optimize; |
f58851e6b
|
69 |
|
f895b252d
|
70 |
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6b
|
71 72 73 |
static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; |
29c554227
|
74 75 |
static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; |
f58851e6b
|
76 77 78 79 80 81 |
static unsigned int zero; static unsigned int max_padding = PAGE_SIZE; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; |
fe2c6338f
|
82 |
static struct ctl_table xr_tunables_table[] = { |
f58851e6b
|
83 |
{ |
f58851e6b
|
84 85 86 87 |
.procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, |
6d4561110
|
88 |
.proc_handler = proc_dointvec_minmax, |
f58851e6b
|
89 90 91 92 |
.extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { |
f58851e6b
|
93 94 95 96 |
.procname = "rdma_max_inline_read", .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, |
44829d02d
|
97 |
.proc_handler = proc_dointvec_minmax, |
29c554227
|
98 99 |
.extra1 = &min_inline_size, .extra2 = &max_inline_size, |
f58851e6b
|
100 101 |
}, { |
f58851e6b
|
102 103 104 105 |
.procname = "rdma_max_inline_write", .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, |
44829d02d
|
106 |
.proc_handler = proc_dointvec_minmax, |
29c554227
|
107 108 |
.extra1 = &min_inline_size, .extra2 = &max_inline_size, |
f58851e6b
|
109 110 |
}, { |
f58851e6b
|
111 112 113 114 |
.procname = "rdma_inline_write_padding", .data = &xprt_rdma_inline_write_padding, .maxlen = sizeof(unsigned int), .mode = 0644, |
6d4561110
|
115 |
.proc_handler = proc_dointvec_minmax, |
f58851e6b
|
116 117 118 119 |
.extra1 = &zero, .extra2 = &max_padding, }, { |
f58851e6b
|
120 121 122 123 |
.procname = "rdma_memreg_strategy", .data = &xprt_rdma_memreg_strategy, .maxlen = sizeof(unsigned int), .mode = 0644, |
6d4561110
|
124 |
.proc_handler = proc_dointvec_minmax, |
f58851e6b
|
125 126 127 128 |
.extra1 = &min_memreg, .extra2 = &max_memreg, }, { |
9191ca3b3
|
129 130 131 132 |
.procname = "rdma_pad_optimize", .data = &xprt_rdma_pad_optimize, .maxlen = sizeof(unsigned int), .mode = 0644, |
6d4561110
|
133 |
.proc_handler = proc_dointvec, |
9191ca3b3
|
134 |
}, |
f8572d8f2
|
135 |
{ }, |
f58851e6b
|
136 |
}; |
fe2c6338f
|
137 |
static struct ctl_table sunrpc_table[] = { |
f58851e6b
|
138 |
{ |
f58851e6b
|
139 140 141 142 |
.procname = "sunrpc", .mode = 0555, .child = xr_tunables_table }, |
f8572d8f2
|
143 |
{ }, |
f58851e6b
|
144 145 146 |
}; #endif |
d31ae2548
|
147 |
static const struct rpc_xprt_ops xprt_rdma_procs; |
f58851e6b
|
148 149 |
static void |
0dd39cae2
|
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) { struct sockaddr_in *sin = (struct sockaddr_in *)sap; char buf[20]; snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; } static void xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; char buf[40]; snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; } |
5d252f90a
|
172 |
void |
5231eb977
|
173 |
xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) |
f58851e6b
|
174 |
{ |
0dd39cae2
|
175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
char buf[128]; switch (sap->sa_family) { case AF_INET: xprt_rdma_format_addresses4(xprt, sap); break; case AF_INET6: xprt_rdma_format_addresses6(xprt, sap); break; default: pr_err("rpcrdma: Unrecognized address family "); return; } |
f58851e6b
|
189 |
|
c877b849d
|
190 191 |
(void)rpc_ntop(sap, buf, sizeof(buf)); xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
f58851e6b
|
192 |
|
81160e66c
|
193 |
snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
c877b849d
|
194 |
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6b
|
195 |
|
81160e66c
|
196 |
snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
c877b849d
|
197 |
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6b
|
198 |
|
0dd39cae2
|
199 |
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
f58851e6b
|
200 |
} |
5d252f90a
|
201 |
void |
f58851e6b
|
202 203 |
xprt_rdma_free_addresses(struct rpc_xprt *xprt) { |
33e01dc7f
|
204 205 206 207 208 209 210 211 212 213 |
unsigned int i; for (i = 0; i < RPC_DISPLAY_MAX; i++) switch (i) { case RPC_DISPLAY_PROTO: case RPC_DISPLAY_NETID: continue; default: kfree(xprt->address_strings[i]); } |
f58851e6b
|
214 |
} |
3a72dc771
|
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
void rpcrdma_conn_func(struct rpcrdma_ep *ep) { schedule_delayed_work(&ep->rep_connect_worker, 0); } void rpcrdma_connect_worker(struct work_struct *work) { struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, rep_connect_worker.work); struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, rx_ep); struct rpc_xprt *xprt = &r_xprt->rx_xprt; spin_lock_bh(&xprt->transport_lock); if (++xprt->connect_cookie == 0) /* maintain a reserved value */ ++xprt->connect_cookie; if (ep->rep_connected > 0) { if (!xprt_test_and_set_connected(xprt)) xprt_wake_pending_tasks(xprt, 0); } else { if (xprt_test_and_clear_connected(xprt)) xprt_wake_pending_tasks(xprt, -ENOTCONN); } spin_unlock_bh(&xprt->transport_lock); } |
f58851e6b
|
242 243 244 |
static void xprt_rdma_connect_worker(struct work_struct *work) { |
5abefb861
|
245 246 247 |
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
f58851e6b
|
248 |
int rc = 0; |
d19751e7b
|
249 250 251 252 253 254 255 256 |
xprt_clear_connected(xprt); dprintk("RPC: %s: %sconnect ", __func__, r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); if (rc) xprt_wake_pending_tasks(xprt, rc); |
f58851e6b
|
257 258 259 260 |
dprintk("RPC: %s: exit ", __func__); xprt_clear_connecting(xprt); } |
4a0682583
|
261 262 263 264 265 266 267 268 269 270 |
static void xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); pr_info("rpcrdma: injecting transport disconnect on xprt=%p ", xprt); rdma_disconnect(r_xprt->rx_ia.ri_id); } |
f58851e6b
|
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 |
/* * xprt_rdma_destroy * * Destroy the xprt. * Free all memory associated with the object, including its own. * NOTE: none of the *destroy methods free memory for their top-level * objects, even though they may have allocated it (they do free * private memory). It's up to the caller to handle it. In this * case (RDMA transport), all structure memory is inlined with the * struct rpcrdma_xprt. */ static void xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
f58851e6b
|
286 287 288 |
dprintk("RPC: %s: called ", __func__); |
5abefb861
|
289 |
cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
f58851e6b
|
290 291 |
xprt_clear_connected(xprt); |
7f1d54191
|
292 |
rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
72c021738
|
293 |
rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
f58851e6b
|
294 295 296 |
rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); |
e204e621b
|
297 |
xprt_free(xprt); |
f58851e6b
|
298 299 300 301 302 303 |
dprintk("RPC: %s: returning ", __func__); module_put(THIS_MODULE); } |
2881ae74e
|
304 305 306 307 |
static const struct rpc_timeout xprt_rdma_default_timeout = { .to_initval = 60 * HZ, .to_maxval = 60 * HZ, }; |
f58851e6b
|
308 309 310 311 312 313 314 315 316 317 318 319 |
/** * xprt_setup_rdma - Set up transport to use RDMA * * @args: rpc transport arguments */ static struct rpc_xprt * xprt_setup_rdma(struct xprt_create *args) { struct rpcrdma_create_data_internal cdata; struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; struct rpcrdma_ep *new_ep; |
5231eb977
|
320 |
struct sockaddr *sap; |
f58851e6b
|
321 322 323 324 325 326 327 |
int rc; if (args->addrlen > sizeof(xprt->addr)) { dprintk("RPC: %s: address too large ", __func__); return ERR_PTR(-EBADF); } |
37aa21337
|
328 |
xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), |
d9ba131d8
|
329 |
xprt_rdma_slot_table_entries, |
bd1722d43
|
330 |
xprt_rdma_slot_table_entries); |
f58851e6b
|
331 332 333 334 335 336 |
if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt ", __func__); return ERR_PTR(-ENOMEM); } |
f58851e6b
|
337 |
/* 60 second timeout, no retries */ |
ba7392bb3
|
338 |
xprt->timeout = &xprt_rdma_default_timeout; |
bfaee096d
|
339 340 341 |
xprt->bind_timeout = RPCRDMA_BIND_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
f58851e6b
|
342 343 344 |
xprt->resvport = 0; /* privileged port not needed */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */ |
f58851e6b
|
345 346 347 348 349 |
xprt->ops = &xprt_rdma_procs; /* * Set up RDMA-specific connect data. */ |
5231eb977
|
350 351 |
sap = (struct sockaddr *)&cdata.addr; memcpy(sap, args->dstaddr, args->addrlen); |
f58851e6b
|
352 353 354 355 356 |
/* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ xprt->prot = IPPROTO_TCP; xprt->addrlen = args->addrlen; |
5231eb977
|
357 |
memcpy(&xprt->addr, sap, xprt->addrlen); |
f58851e6b
|
358 |
|
5231eb977
|
359 |
if (rpc_get_port(sap)) |
f58851e6b
|
360 |
xprt_set_bound(xprt); |
f58851e6b
|
361 |
cdata.max_requests = xprt->max_reqs; |
f58851e6b
|
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ cdata.inline_wsize = xprt_rdma_max_inline_write; if (cdata.inline_wsize > cdata.wsize) cdata.inline_wsize = cdata.wsize; cdata.inline_rsize = xprt_rdma_max_inline_read; if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; cdata.padding = xprt_rdma_inline_write_padding; /* * Create new transport instance, which includes initialized * o ia * o endpoint * o buffers */ new_xprt = rpcx_to_rdmax(xprt); |
fff09594e
|
383 |
rc = rpcrdma_ia_open(new_xprt, sap); |
f58851e6b
|
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 |
if (rc) goto out1; /* * initialize and create ep */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; new_ep->rep_remote_addr = cdata.addr; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); if (rc) goto out2; /* * Allocate pre-registered send and receive buffers for headers and * any inline data. Also specify any padding which will be provided * from a preregistered zero buffer. */ |
ac920d04a
|
404 |
rc = rpcrdma_buffer_create(new_xprt); |
f58851e6b
|
405 406 407 408 409 410 411 412 |
if (rc) goto out3; /* * Register a callback for connection events. This is necessary because * connection loss notification is async. We also catch connection loss * when reaping receives. */ |
5abefb861
|
413 414 |
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); |
f58851e6b
|
415 |
|
5231eb977
|
416 |
xprt_rdma_format_addresses(xprt, sap); |
1c9351ee0
|
417 418 419 420 |
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; xprt->max_payload <<= PAGE_SHIFT; |
43e959881
|
421 422 423 |
dprintk("RPC: %s: transport data payload maximum: %zu bytes ", __func__, xprt->max_payload); |
f58851e6b
|
424 425 426 |
if (!try_module_get(THIS_MODULE)) goto out4; |
5231eb977
|
427 428 429 430 |
dprintk("RPC: %s: %s:%s ", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); |
f58851e6b
|
431 432 433 434 435 436 |
return xprt; out4: xprt_rdma_free_addresses(xprt); rc = -EINVAL; out3: |
7f1d54191
|
437 |
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
f58851e6b
|
438 439 440 |
out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: |
e204e621b
|
441 |
xprt_free(xprt); |
f58851e6b
|
442 443 |
return ERR_PTR(rc); } |
bebd03186
|
444 445 446 447 448 449 |
/** * xprt_rdma_close - Close down RDMA connection * @xprt: generic transport to be closed * * Called during transport shutdown reconnect, or device * removal. Caller holds the transport's write lock. |
f58851e6b
|
450 451 452 453 454 |
*/ static void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
bebd03186
|
455 456 457 458 459 |
struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; dprintk("RPC: %s: closing xprt %p ", __func__, xprt); |
f58851e6b
|
460 |
|
bebd03186
|
461 462 463 464 465 466 467 468 |
if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { xprt_clear_connected(xprt); rpcrdma_ia_remove(ia); return; } if (ep->rep_connected == -ENODEV) return; if (ep->rep_connected > 0) |
08ca0dce1
|
469 |
xprt->reestablish_timeout = 0; |
62da3b248
|
470 |
xprt_disconnect_done(xprt); |
bebd03186
|
471 |
rpcrdma_ep_disconnect(ep, ia); |
f58851e6b
|
472 473 474 475 476 477 478 479 480 481 482 483 484 485 |
} static void xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) { struct sockaddr_in *sap; sap = (struct sockaddr_in *)&xprt->addr; sap->sin_port = htons(port); sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; sap->sin_port = htons(port); dprintk("RPC: %s: %u ", __func__, port); } |
33849792c
|
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 |
/** * xprt_rdma_timer - invoked when an RPC times out * @xprt: controlling RPC transport * @task: RPC task that timed out * * Invoked when the transport is still connected, but an RPC * retransmit timeout occurs. * * Since RDMA connections don't have a keep-alive, forcibly * disconnect and retry to connect. This drives full * detection of the network path, and retransmissions of * all pending RPCs. */ static void xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) { dprintk("RPC: %5u %s: xprt = %p ", task->tk_pid, __func__, xprt); xprt_force_disconnect(xprt); } |
f58851e6b
|
507 |
static void |
1b092092b
|
508 |
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
f58851e6b
|
509 |
{ |
f58851e6b
|
510 |
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
0b9e79431
|
511 512 |
if (r_xprt->rx_ep.rep_connected != 0) { /* Reconnect */ |
5abefb861
|
513 514 |
schedule_delayed_work(&r_xprt->rx_connect_worker, xprt->reestablish_timeout); |
0b9e79431
|
515 |
xprt->reestablish_timeout <<= 1; |
bfaee096d
|
516 517 518 519 |
if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
0b9e79431
|
520 |
} else { |
5abefb861
|
521 |
schedule_delayed_work(&r_xprt->rx_connect_worker, 0); |
0b9e79431
|
522 |
if (!RPC_IS_ASYNC(task)) |
5abefb861
|
523 |
flush_delayed_work(&r_xprt->rx_connect_worker); |
f58851e6b
|
524 525 |
} } |
9c40c49f1
|
526 527 528 529 530 531 532 |
/* Allocate a fixed-size buffer in which to construct and send the * RPC-over-RDMA header for this request. */ static bool rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, gfp_t flags) { |
08cf2efd5
|
533 |
size_t size = RPCRDMA_HDRBUF_SIZE; |
9c40c49f1
|
534 535 536 537 |
struct rpcrdma_regbuf *rb; if (req->rl_rdmabuf) return true; |
13650c23f
|
538 |
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); |
9c40c49f1
|
539 540 541 542 543 |
if (IS_ERR(rb)) return false; r_xprt->rx_stats.hardway_register_count += size; req->rl_rdmabuf = rb; |
7a80f3f0d
|
544 |
xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); |
9c40c49f1
|
545 546 |
return true; } |
9c40c49f1
|
547 548 549 550 551 |
static bool rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, size_t size, gfp_t flags) { struct rpcrdma_regbuf *rb; |
9c40c49f1
|
552 553 554 |
if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) return true; |
655fec698
|
555 |
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); |
9c40c49f1
|
556 557 |
if (IS_ERR(rb)) return false; |
13650c23f
|
558 |
rpcrdma_free_regbuf(req->rl_sendbuf); |
655fec698
|
559 |
r_xprt->rx_stats.hardway_register_count += size; |
9c40c49f1
|
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 |
req->rl_sendbuf = rb; return true; } /* The rq_rcv_buf is used only if a Reply chunk is necessary. * The decision to use a Reply chunk is made later in * rpcrdma_marshal_req. This buffer is registered at that time. * * Otherwise, the associated RPC Reply arrives in a separate * Receive buffer, arbitrarily chosen by the HCA. The buffer * allocated here for the RPC Reply is not utilized in that * case. See rpcrdma_inline_fixup. * * A regbuf is used here to remember the buffer size. */ static bool rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, size_t size, gfp_t flags) { struct rpcrdma_regbuf *rb; if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) return true; |
13650c23f
|
583 |
rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); |
9c40c49f1
|
584 585 |
if (IS_ERR(rb)) return false; |
13650c23f
|
586 |
rpcrdma_free_regbuf(req->rl_recvbuf); |
9c40c49f1
|
587 588 589 590 |
r_xprt->rx_stats.hardway_register_count += size; req->rl_recvbuf = rb; return true; } |
5fe6eaa1f
|
591 592 593 594 595 596 597 598 599 |
/** * xprt_rdma_allocate - allocate transport resources for an RPC * @task: RPC task * * Return values: * 0: Success; rq_buffer points to RPC buffer to use * ENOMEM: Out of memory, call again later * EIO: A permanent error occurred, do not retry * |
f58851e6b
|
600 |
* The RDMA allocate/free functions need the task structure as a place |
9c40c49f1
|
601 602 |
* to hide the struct rpcrdma_req, which is necessary for the actual * send/recv sequence. |
0ca77dc37
|
603 |
* |
9c40c49f1
|
604 605 |
* xprt_rdma_allocate provides buffers that are already mapped for * DMA, and a local DMA lkey is provided for each. |
f58851e6b
|
606 |
*/ |
5fe6eaa1f
|
607 608 |
static int xprt_rdma_allocate(struct rpc_task *task) |
f58851e6b
|
609 |
{ |
5fe6eaa1f
|
610 |
struct rpc_rqst *rqst = task->tk_rqstp; |
5fe6eaa1f
|
611 |
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
0ca77dc37
|
612 |
struct rpcrdma_req *req; |
a0a1d50cd
|
613 |
gfp_t flags; |
f58851e6b
|
614 |
|
0ca77dc37
|
615 |
req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
c977dea22
|
616 |
if (req == NULL) |
5fe6eaa1f
|
617 |
return -ENOMEM; |
f58851e6b
|
618 |
|
5d252f90a
|
619 |
flags = RPCRDMA_DEF_GFP; |
a0a1d50cd
|
620 621 |
if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; |
9c40c49f1
|
622 623 624 625 626 627 628 629 630 631 632 |
if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) goto out_fail; if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) goto out_fail; if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p ", task->tk_pid, __func__, rqst->rq_callsize, rqst->rq_rcvsize, req); |
0ca77dc37
|
633 |
|
575448bd3
|
634 |
req->rl_connect_cookie = 0; /* our reserved value */ |
5a6d1db45
|
635 |
rpcrdma_set_xprtdata(rqst, req); |
5fe6eaa1f
|
636 |
rqst->rq_buffer = req->rl_sendbuf->rg_base; |
9c40c49f1
|
637 |
rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
5fe6eaa1f
|
638 |
return 0; |
0ca77dc37
|
639 |
|
0ca77dc37
|
640 |
out_fail: |
f58851e6b
|
641 |
rpcrdma_buffer_put(req); |
5fe6eaa1f
|
642 |
return -ENOMEM; |
f58851e6b
|
643 |
} |
3435c74ae
|
644 645 646 647 648 |
/** * xprt_rdma_free - release resources allocated by xprt_rdma_allocate * @task: RPC task * * Caller guarantees rqst->rq_buffer is non-NULL. |
f58851e6b
|
649 650 |
*/ static void |
3435c74ae
|
651 |
xprt_rdma_free(struct rpc_task *task) |
f58851e6b
|
652 |
{ |
3435c74ae
|
653 654 655 |
struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
655fec698
|
656 |
struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
f58851e6b
|
657 |
|
ffc4d9b15
|
658 659 |
if (req->rl_backchannel) return; |
0ca77dc37
|
660 661 |
dprintk("RPC: %s: called on 0x%p ", __func__, req->rl_reply); |
f58851e6b
|
662 |
|
431af645c
|
663 |
if (!list_empty(&req->rl_registered)) |
42364042b
|
664 |
ia->ri_ops->ro_unmap_sync(r_xprt, &req->rl_registered); |
655fec698
|
665 |
rpcrdma_unmap_sges(ia, req); |
f58851e6b
|
666 667 |
rpcrdma_buffer_put(req); } |
7a89f9c62
|
668 669 670 671 |
/** * xprt_rdma_send_request - marshal and send an RPC request * @task: RPC task with an RPC message in rq_snd_buf * |
bebd03186
|
672 673 |
* Caller holds the transport's write lock. * |
7a89f9c62
|
674 675 676 677 678 679 680 |
* Return values: * 0: The request has been sent * ENOTCONN: Caller needs to invoke connect logic then call again * ENOBUFS: Call again later to send the request * EIO: A permanent error occurred. The request was not sent, * and don't try it again * |
f58851e6b
|
681 |
* send_request invokes the meat of RPC RDMA. It must do the following: |
7a89f9c62
|
682 |
* |
f58851e6b
|
683 684 685 686 687 688 689 690 |
* 1. Marshal the RPC request into an RPC RDMA request, which means * putting a header in front of data, and creating IOVs for RDMA * from those in the request. * 2. In marshaling, detect opportunities for RDMA, and use them. * 3. Post a recv message to set up asynch completion, then send * the request (rpcrdma_ep_post). * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). */ |
f58851e6b
|
691 692 693 694 |
static int xprt_rdma_send_request(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; |
a4f0835c6
|
695 |
struct rpc_xprt *xprt = rqst->rq_xprt; |
f58851e6b
|
696 697 |
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
6ab59945f
|
698 |
int rc = 0; |
f58851e6b
|
699 |
|
bebd03186
|
700 701 |
if (!xprt_connected(xprt)) goto drop_connection; |
9d6b04097
|
702 |
/* On retransmit, remove any previously registered chunks */ |
48016dce4
|
703 704 |
if (unlikely(!list_empty(&req->rl_registered))) r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); |
9d6b04097
|
705 |
|
09e60641f
|
706 |
rc = rpcrdma_marshal_req(r_xprt, rqst); |
6ab59945f
|
707 708 |
if (rc < 0) goto failed_marshal; |
f58851e6b
|
709 710 711 |
if (req->rl_reply == NULL) /* e.g. reconnection */ rpcrdma_recv_buffer_get(req); |
575448bd3
|
712 713 714 715 716 717 718 |
/* Must suppress retransmit to maintain credits */ if (req->rl_connect_cookie == xprt->connect_cookie) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; |
f58851e6b
|
719 |
|
d60dbb20a
|
720 |
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; |
f58851e6b
|
721 722 |
rqst->rq_bytes_sent = 0; return 0; |
575448bd3
|
723 |
|
c93c62231
|
724 |
failed_marshal: |
7a89f9c62
|
725 726 |
if (rc != -ENOTCONN) return rc; |
575448bd3
|
727 728 729 |
drop_connection: xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ |
f58851e6b
|
730 |
} |
5d252f90a
|
731 |
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
f58851e6b
|
732 733 734 735 736 737 |
{ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); long idle_time = 0; if (xprt_connected(xprt)) idle_time = (long)(jiffies - xprt->last_used) / HZ; |
763f7e4e4
|
738 739 740 741 742 743 744 745 746 747 748 749 |
seq_puts(seq, "\txprt:\trdma "); seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ", 0, /* need a local port? */ xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, idle_time, xprt->stat.sends, xprt->stat.recvs, xprt->stat.bad_xids, xprt->stat.req_u, xprt->stat.bklog_u); |
505bbe64d
|
750 |
seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ", |
763f7e4e4
|
751 752 753 754 755 756 757 758 759 |
r_xprt->rx_stats.read_chunk_count, r_xprt->rx_stats.write_chunk_count, r_xprt->rx_stats.reply_chunk_count, r_xprt->rx_stats.total_rdma_request, r_xprt->rx_stats.total_rdma_reply, r_xprt->rx_stats.pullup_copy_count, r_xprt->rx_stats.fixup_copy_count, r_xprt->rx_stats.hardway_register_count, r_xprt->rx_stats.failed_marshal_count, |
860477d1f
|
760 761 |
r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); |
c8b920bb4
|
762 763 |
seq_printf(seq, "%lu %lu %lu %lu ", |
505bbe64d
|
764 |
r_xprt->rx_stats.mrs_recovered, |
e2ac236c0
|
765 |
r_xprt->rx_stats.mrs_orphaned, |
c8b920bb4
|
766 767 |
r_xprt->rx_stats.mrs_allocated, r_xprt->rx_stats.local_inv_needed); |
f58851e6b
|
768 |
} |
d67fa4d85
|
769 770 771 |
static int xprt_rdma_enable_swap(struct rpc_xprt *xprt) { |
a04517888
|
772 |
return 0; |
d67fa4d85
|
773 774 775 776 777 778 |
} static void xprt_rdma_disable_swap(struct rpc_xprt *xprt) { } |
f58851e6b
|
779 780 781 |
/* * Plumbing for rpc transport switch and kernel module */ |
d31ae2548
|
782 |
static const struct rpc_xprt_ops xprt_rdma_procs = { |
e7ce710a8
|
783 |
.reserve_xprt = xprt_reserve_xprt_cong, |
f58851e6b
|
784 |
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
f39c1bfb5
|
785 |
.alloc_slot = xprt_alloc_slot, |
f58851e6b
|
786 787 |
.release_request = xprt_release_rqst_cong, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ |
33849792c
|
788 |
.timer = xprt_rdma_timer, |
f58851e6b
|
789 790 791 792 793 794 795 796 |
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ .set_port = xprt_rdma_set_port, .connect = xprt_rdma_connect, .buf_alloc = xprt_rdma_allocate, .buf_free = xprt_rdma_free, .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, |
d67fa4d85
|
797 798 799 |
.print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, |
f531a5dbc
|
800 801 802 |
.inject_disconnect = xprt_rdma_inject_disconnect, #if defined(CONFIG_SUNRPC_BACKCHANNEL) .bc_setup = xprt_rdma_bc_setup, |
76566773a
|
803 |
.bc_up = xprt_rdma_bc_up, |
6b26cc8c8
|
804 |
.bc_maxpayload = xprt_rdma_bc_maxpayload, |
f531a5dbc
|
805 806 807 |
.bc_free_rqst = xprt_rdma_bc_free_rqst, .bc_destroy = xprt_rdma_bc_destroy, #endif |
f58851e6b
|
808 809 810 811 812 813 814 815 816 |
}; static struct xprt_class xprt_rdma = { .list = LIST_HEAD_INIT(xprt_rdma.list), .name = "rdma", .owner = THIS_MODULE, .ident = XPRT_TRANSPORT_RDMA, .setup = xprt_setup_rdma, }; |
ffe1f0df5
|
817 |
void xprt_rdma_cleanup(void) |
f58851e6b
|
818 819 |
{ int rc; |
3a0799a94
|
820 821 |
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport "); |
f895b252d
|
822 |
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6b
|
823 824 825 826 827 828 829 830 831 832 |
if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; } #endif rc = xprt_unregister_transport(&xprt_rdma); if (rc) dprintk("RPC: %s: xprt_unregister returned %i ", __func__, rc); |
951e721ca
|
833 |
|
fe97b47cd
|
834 |
rpcrdma_destroy_wq(); |
5d252f90a
|
835 836 837 838 839 840 |
rc = xprt_unregister_transport(&xprt_rdma_bc); if (rc) dprintk("RPC: %s: xprt_unregister(bc) returned %i ", __func__, rc); |
f58851e6b
|
841 |
} |
ffe1f0df5
|
842 |
int xprt_rdma_init(void) |
f58851e6b
|
843 844 |
{ int rc; |
fe97b47cd
|
845 |
rc = rpcrdma_alloc_wq(); |
505bbe64d
|
846 |
if (rc) |
fe97b47cd
|
847 |
return rc; |
fe97b47cd
|
848 |
|
951e721ca
|
849 850 |
rc = xprt_register_transport(&xprt_rdma); if (rc) { |
fe97b47cd
|
851 |
rpcrdma_destroy_wq(); |
951e721ca
|
852 853 |
return rc; } |
5d252f90a
|
854 855 856 857 |
rc = xprt_register_transport(&xprt_rdma_bc); if (rc) { xprt_unregister_transport(&xprt_rdma); rpcrdma_destroy_wq(); |
5d252f90a
|
858 859 |
return rc; } |
3a0799a94
|
860 861 |
dprintk("RPCRDMA Module Init, register RPC RDMA transport "); |
f58851e6b
|
862 |
|
3a0799a94
|
863 864 865 866 |
dprintk("Defaults: "); dprintk("\tSlots %d " |
f58851e6b
|
867 868 869 870 871 |
"\tMaxInlineRead %d \tMaxInlineWrite %d ", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); |
3a0799a94
|
872 873 874 |
dprintk("\tPadding %d \tMemreg %d ", |
f58851e6b
|
875 |
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); |
f895b252d
|
876 |
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6b
|
877 878 879 880 881 |
if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif return 0; } |