Blame view

net/9p/trans_rdma.c 19.1 KB
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  /*
   * linux/fs/9p/trans_rdma.c
   *
   * RDMA transport layer based on the trans_fd.c implementation.
   *
   *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
   *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
   *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
   *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
   *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License version 2
   *  as published by the Free Software Foundation.
   *
   *  This program is distributed in the hope that it will be useful,
   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *  GNU General Public License for more details.
   *
   *  You should have received a copy of the GNU General Public License
   *  along with this program; if not, write to:
   *  Free Software Foundation
   *  51 Franklin Street, Fifth Floor
   *  Boston, MA  02111-1301  USA
   *
   */
5d3851530   Joe Perches   9p: Reduce object...
28
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  #include <linux/in.h>
  #include <linux/module.h>
  #include <linux/net.h>
  #include <linux/ipv6.h>
  #include <linux/kthread.h>
  #include <linux/errno.h>
  #include <linux/kernel.h>
  #include <linux/un.h>
  #include <linux/uaccess.h>
  #include <linux/inet.h>
  #include <linux/idr.h>
  #include <linux/file.h>
  #include <linux/parser.h>
  #include <linux/semaphore.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
43
  #include <linux/slab.h>
c4fac9100   David Howells   9p: Implement sho...
44
  #include <linux/seq_file.h>
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
45
46
47
48
49
  #include <net/9p/9p.h>
  #include <net/9p/client.h>
  #include <net/9p/transport.h>
  #include <rdma/ib_verbs.h>
  #include <rdma/rdma_cm.h>
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
50
51
52
53
54
55
56
57
58
  
  #define P9_PORT			5640
  #define P9_RDMA_SQ_DEPTH	32
  #define P9_RDMA_RQ_DEPTH	32
  #define P9_RDMA_SEND_SGE	4
  #define P9_RDMA_RECV_SGE	4
  #define P9_RDMA_IRD		0
  #define P9_RDMA_ORD		0
  #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
3fcc62f4e   Simon Derr   9P/RDMA: increase...
59
  #define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
60

fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
61
62
63
64
65
66
67
68
  /**
   * struct p9_trans_rdma - RDMA transport instance
   *
   * @state: tracks the transport state machine for connection setup and tear down
   * @cm_id: The RDMA CM ID
   * @pd: Protection Domain pointer
   * @qp: Queue Pair pointer
   * @cq: Completion Queue pointer
0e15597eb   Abhishek Kulkarni   9p: minor comment...
69
   * @dm_mr: DMA Memory Region pointer
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
70
71
   * @lkey: The local access only memory region key
   * @timeout: Number of uSecs to wait for connection management events
c4fac9100   David Howells   9p: Implement sho...
72
73
   * @privport: Whether a privileged port may be used
   * @port: The port to use
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
74
75
76
   * @sq_depth: The depth of the Send Queue
   * @sq_sem: Semaphore for the SQ
   * @rq_depth: The depth of the Receive Queue.
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
77
   * @rq_sem: Semaphore for the RQ
1cff33069   Simon Derr   9P/RDMA: count po...
78
79
   * @excess_rc : Amount of posted Receive Contexts without a pending request.
   *		See rdma_request()
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
80
81
   * @addr: The remote peer's address
   * @req_lock: Protects the active request list
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
   * @cm_done: Completion event for connection management tracking
   */
  struct p9_trans_rdma {
  	enum {
  		P9_RDMA_INIT,
  		P9_RDMA_ADDR_RESOLVED,
  		P9_RDMA_ROUTE_RESOLVED,
  		P9_RDMA_CONNECTED,
  		P9_RDMA_FLUSHING,
  		P9_RDMA_CLOSING,
  		P9_RDMA_CLOSED,
  	} state;
  	struct rdma_cm_id *cm_id;
  	struct ib_pd *pd;
  	struct ib_qp *qp;
  	struct ib_cq *cq;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
98
  	long timeout;
c4fac9100   David Howells   9p: Implement sho...
99
100
  	bool privport;
  	u16 port;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
101
102
103
  	int sq_depth;
  	struct semaphore sq_sem;
  	int rq_depth;
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
104
  	struct semaphore rq_sem;
1cff33069   Simon Derr   9P/RDMA: count po...
105
  	atomic_t excess_rc;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
106
107
108
109
110
111
112
113
114
  	struct sockaddr_in addr;
  	spinlock_t req_lock;
  
  	struct completion cm_done;
  };
  
  /**
   * p9_rdma_context - Keeps track of in-process WR
   *
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
115
116
117
118
119
120
   * @busa: Bus address to unmap when the WR completes
   * @req: Keeps track of requests (send)
   * @rc: Keepts track of replies (receive)
   */
  struct p9_rdma_req;
  struct p9_rdma_context {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
121
  	struct ib_cqe cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  	dma_addr_t busa;
  	union {
  		struct p9_req_t *req;
  		struct p9_fcall *rc;
  	};
  };
  
  /**
   * p9_rdma_opts - Collection of mount options
   * @port: port of connection
   * @sq_depth: The requested depth of the SQ. This really doesn't need
   * to be any deeper than the number of threads used in the client
   * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
   * @timeout: Time to wait in msecs for CM events
   */
  struct p9_rdma_opts {
  	short port;
c4fac9100   David Howells   9p: Implement sho...
139
  	bool privport;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
140
141
142
143
144
145
146
147
148
149
  	int sq_depth;
  	int rq_depth;
  	long timeout;
  };
  
  /*
   * Option Parsing (code inspired by NFS code)
   */
  enum {
  	/* Options that take integer arguments */
f569d3ef8   Dominique Martinet   net/9p: add a pri...
150
151
152
153
  	Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
  	/* Options that take no argument */
  	Opt_privport,
  	Opt_err,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
154
155
156
157
158
159
160
  };
  
  static match_table_t tokens = {
  	{Opt_port, "port=%u"},
  	{Opt_sq_depth, "sq=%u"},
  	{Opt_rq_depth, "rq=%u"},
  	{Opt_timeout, "timeout=%u"},
f569d3ef8   Dominique Martinet   net/9p: add a pri...
161
  	{Opt_privport, "privport"},
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
162
163
  	{Opt_err, NULL},
  };
c4fac9100   David Howells   9p: Implement sho...
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
  {
  	struct p9_trans_rdma *rdma = clnt->trans;
  
  	if (rdma->port != P9_PORT)
  		seq_printf(m, ",port=%u", rdma->port);
  	if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
  		seq_printf(m, ",sq=%u", rdma->sq_depth);
  	if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
  		seq_printf(m, ",rq=%u", rdma->rq_depth);
  	if (rdma->timeout != P9_RDMA_TIMEOUT)
  		seq_printf(m, ",timeout=%lu", rdma->timeout);
  	if (rdma->privport)
  		seq_puts(m, ",privport");
  	return 0;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
180
  /**
0e15597eb   Abhishek Kulkarni   9p: minor comment...
181
182
183
   * parse_opts - parse mount options into rdma options structure
   * @params: options string passed from mount
   * @opts: rdma transport-specific structure to parse options into
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
184
185
186
187
188
189
190
191
   *
   * Returns 0 upon success, -ERRNO upon failure
   */
  static int parse_opts(char *params, struct p9_rdma_opts *opts)
  {
  	char *p;
  	substring_t args[MAX_OPT_ARGS];
  	int option;
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
192
  	char *options, *tmp_options;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
193
194
195
196
197
  
  	opts->port = P9_PORT;
  	opts->sq_depth = P9_RDMA_SQ_DEPTH;
  	opts->rq_depth = P9_RDMA_RQ_DEPTH;
  	opts->timeout = P9_RDMA_TIMEOUT;
c4fac9100   David Howells   9p: Implement sho...
198
  	opts->privport = false;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
199
200
201
  
  	if (!params)
  		return 0;
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
202
203
  	tmp_options = kstrdup(params, GFP_KERNEL);
  	if (!tmp_options) {
5d3851530   Joe Perches   9p: Reduce object...
204
205
206
  		p9_debug(P9_DEBUG_ERROR,
  			 "failed to allocate copy of option string
  ");
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
207
208
  		return -ENOMEM;
  	}
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
209
  	options = tmp_options;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
210
211
212
213
214
215
216
  
  	while ((p = strsep(&options, ",")) != NULL) {
  		int token;
  		int r;
  		if (!*p)
  			continue;
  		token = match_token(p, tokens, args);
f569d3ef8   Dominique Martinet   net/9p: add a pri...
217
218
219
220
221
222
223
224
  		if ((token != Opt_err) && (token != Opt_privport)) {
  			r = match_int(&args[0], &option);
  			if (r < 0) {
  				p9_debug(P9_DEBUG_ERROR,
  					 "integer field, but no integer?
  ");
  				continue;
  			}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  		}
  		switch (token) {
  		case Opt_port:
  			opts->port = option;
  			break;
  		case Opt_sq_depth:
  			opts->sq_depth = option;
  			break;
  		case Opt_rq_depth:
  			opts->rq_depth = option;
  			break;
  		case Opt_timeout:
  			opts->timeout = option;
  			break;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
239
  		case Opt_privport:
c4fac9100   David Howells   9p: Implement sho...
240
  			opts->privport = true;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
241
  			break;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
242
243
244
245
246
247
  		default:
  			continue;
  		}
  	}
  	/* RQ must be at least as large as the SQ */
  	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
248
  	kfree(tmp_options);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
  	return 0;
  }
  
  static int
  p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
  {
  	struct p9_client *c = id->context;
  	struct p9_trans_rdma *rdma = c->trans;
  	switch (event->event) {
  	case RDMA_CM_EVENT_ADDR_RESOLVED:
  		BUG_ON(rdma->state != P9_RDMA_INIT);
  		rdma->state = P9_RDMA_ADDR_RESOLVED;
  		break;
  
  	case RDMA_CM_EVENT_ROUTE_RESOLVED:
  		BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
  		rdma->state = P9_RDMA_ROUTE_RESOLVED;
  		break;
  
  	case RDMA_CM_EVENT_ESTABLISHED:
  		BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
  		rdma->state = P9_RDMA_CONNECTED;
  		break;
  
  	case RDMA_CM_EVENT_DISCONNECTED:
  		if (rdma)
  			rdma->state = P9_RDMA_CLOSED;
  		if (c)
  			c->status = Disconnected;
  		break;
  
  	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  		break;
  
  	case RDMA_CM_EVENT_ADDR_CHANGE:
  	case RDMA_CM_EVENT_ROUTE_ERROR:
  	case RDMA_CM_EVENT_DEVICE_REMOVAL:
  	case RDMA_CM_EVENT_MULTICAST_JOIN:
  	case RDMA_CM_EVENT_MULTICAST_ERROR:
  	case RDMA_CM_EVENT_REJECTED:
  	case RDMA_CM_EVENT_CONNECT_REQUEST:
  	case RDMA_CM_EVENT_CONNECT_RESPONSE:
  	case RDMA_CM_EVENT_CONNECT_ERROR:
  	case RDMA_CM_EVENT_ADDR_ERROR:
  	case RDMA_CM_EVENT_UNREACHABLE:
  		c->status = Disconnected;
  		rdma_disconnect(rdma->cm_id);
  		break;
  	default:
  		BUG();
  	}
  	complete(&rdma->cm_done);
  	return 0;
  }
  
  static void
7cf20fc62   Christoph Hellwig   net/9p: convert t...
305
  recv_done(struct ib_cq *cq, struct ib_wc *wc)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
306
  {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
307
308
309
310
  	struct p9_client *client = cq->cq_context;
  	struct p9_trans_rdma *rdma = client->trans;
  	struct p9_rdma_context *c =
  		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
311
312
313
314
315
316
317
  	struct p9_req_t *req;
  	int err = 0;
  	int16_t tag;
  
  	req = NULL;
  	ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
  							 DMA_FROM_DEVICE);
7cf20fc62   Christoph Hellwig   net/9p: convert t...
318
  	if (wc->status != IB_WC_SUCCESS)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
319
320
321
322
323
324
325
326
327
  		goto err_out;
  
  	err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
  	if (err)
  		goto err_out;
  
  	req = p9_tag_lookup(client, tag);
  	if (!req)
  		goto err_out;
47229ff85   Simon Derr   9P/RDMA: Protect ...
328
329
330
331
332
333
  	/* Check that we have not yet received a reply for this request.
  	 */
  	if (unlikely(req->rc)) {
  		pr_err("Duplicate reply for request %d", tag);
  		goto err_out;
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
334
  	req->rc = c->rc;
2b6e72ed7   Dominique Martinet   9P: Add memory ba...
335
  	p9_client_cb(client, req, REQ_STATUS_RCVD);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
336

7cf20fc62   Christoph Hellwig   net/9p: convert t...
337
338
339
   out:
  	up(&rdma->rq_sem);
  	kfree(c);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
340
341
342
  	return;
  
   err_out:
7cf20fc62   Christoph Hellwig   net/9p: convert t...
343
344
345
  	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d
  ",
  			req, err, wc->status);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
346
347
  	rdma->state = P9_RDMA_FLUSHING;
  	client->status = Disconnected;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
348
  	goto out;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
349
350
351
  }
  
  static void
7cf20fc62   Christoph Hellwig   net/9p: convert t...
352
  send_done(struct ib_cq *cq, struct ib_wc *wc)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
353
  {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
354
355
356
357
  	struct p9_client *client = cq->cq_context;
  	struct p9_trans_rdma *rdma = client->trans;
  	struct p9_rdma_context *c =
  		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
358
359
360
  	ib_dma_unmap_single(rdma->cm_id->device,
  			    c->busa, c->req->tc->size,
  			    DMA_TO_DEVICE);
7cf20fc62   Christoph Hellwig   net/9p: convert t...
361
362
  	up(&rdma->sq_sem);
  	kfree(c);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
363
364
365
366
  }
  
  static void qp_event_handler(struct ib_event *event, void *context)
  {
5d3851530   Joe Perches   9p: Reduce object...
367
368
369
  	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p
  ",
  		 event->event, context);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
370
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
371
372
373
374
  static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
  {
  	if (!rdma)
  		return;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
375
376
377
378
379
380
381
  	if (rdma->qp && !IS_ERR(rdma->qp))
  		ib_destroy_qp(rdma->qp);
  
  	if (rdma->pd && !IS_ERR(rdma->pd))
  		ib_dealloc_pd(rdma->pd);
  
  	if (rdma->cq && !IS_ERR(rdma->cq))
7cf20fc62   Christoph Hellwig   net/9p: convert t...
382
  		ib_free_cq(rdma->cq);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
  
  	if (rdma->cm_id && !IS_ERR(rdma->cm_id))
  		rdma_destroy_id(rdma->cm_id);
  
  	kfree(rdma);
  }
  
  static int
  post_recv(struct p9_client *client, struct p9_rdma_context *c)
  {
  	struct p9_trans_rdma *rdma = client->trans;
  	struct ib_recv_wr wr, *bad_wr;
  	struct ib_sge sge;
  
  	c->busa = ib_dma_map_single(rdma->cm_id->device,
  				    c->rc->sdata, client->msize,
  				    DMA_FROM_DEVICE);
  	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
  		goto error;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
402
  	c->cqe.done = recv_done;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
403
404
  	sge.addr = c->busa;
  	sge.length = client->msize;
2f31fa881   Jason Gunthorpe   net/9p: Remove ib...
405
  	sge.lkey = rdma->pd->local_dma_lkey;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
406
407
  
  	wr.next = NULL;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
408
  	wr.wr_cqe = &c->cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
409
410
411
412
413
  	wr.sg_list = &sge;
  	wr.num_sge = 1;
  	return ib_post_recv(rdma->qp, &wr, &bad_wr);
  
   error:
5d3851530   Joe Perches   9p: Reduce object...
414
415
  	p9_debug(P9_DEBUG_ERROR, "EIO
  ");
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
416
417
418
419
420
421
422
423
424
425
426
427
  	return -EIO;
  }
  
  static int rdma_request(struct p9_client *client, struct p9_req_t *req)
  {
  	struct p9_trans_rdma *rdma = client->trans;
  	struct ib_send_wr wr, *bad_wr;
  	struct ib_sge sge;
  	int err = 0;
  	unsigned long flags;
  	struct p9_rdma_context *c = NULL;
  	struct p9_rdma_context *rpl_context = NULL;
1cff33069   Simon Derr   9P/RDMA: count po...
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  	/* When an error occurs between posting the recv and the send,
  	 * there will be a receive context posted without a pending request.
  	 * Since there is no way to "un-post" it, we remember it and skip
  	 * post_recv() for the next request.
  	 * So here,
  	 * see if we are this `next request' and need to absorb an excess rc.
  	 * If yes, then drop and free our own, and do not recv_post().
  	 **/
  	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
  		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
  			/* Got one ! */
  			kfree(req->rc);
  			req->rc = NULL;
  			goto dont_need_post_recv;
  		} else {
  			/* We raced and lost. */
  			atomic_inc(&rdma->excess_rc);
  		}
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
447
  	/* Allocate an fcall for the reply */
eeff66ef6   Aneesh Kumar K.V   net/9p: Convert t...
448
  	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
449
450
  	if (!rpl_context) {
  		err = -ENOMEM;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
451
  		goto recv_error;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
452
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
453
  	rpl_context->rc = req->rc;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
454
455
456
457
458
459
460
461
  
  	/*
  	 * Post a receive buffer for this request. We need to ensure
  	 * there is a reply buffer available for every outstanding
  	 * request. A flushed request can result in no reply for an
  	 * outstanding request, so we must keep a count to avoid
  	 * overflowing the RQ.
  	 */
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
462
463
464
465
  	if (down_interruptible(&rdma->rq_sem)) {
  		err = -EINTR;
  		goto recv_error;
  	}
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
466
467
468
469
470
  
  	err = post_recv(client, rpl_context);
  	if (err) {
  		p9_debug(P9_DEBUG_FCALL, "POST RECV failed
  ");
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
471
  		goto recv_error;
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
472
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
473
474
  	/* remove posted receive buffer from request structure */
  	req->rc = NULL;
1cff33069   Simon Derr   9P/RDMA: count po...
475
  dont_need_post_recv:
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
476
  	/* Post the request */
eeff66ef6   Aneesh Kumar K.V   net/9p: Convert t...
477
  	c = kmalloc(sizeof *c, GFP_NOFS);
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
478
479
  	if (!c) {
  		err = -ENOMEM;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
480
  		goto send_error;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
481
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
482
483
484
485
486
  	c->req = req;
  
  	c->busa = ib_dma_map_single(rdma->cm_id->device,
  				    c->req->tc->sdata, c->req->tc->size,
  				    DMA_TO_DEVICE);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
487
488
489
490
  	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
  		err = -EIO;
  		goto send_error;
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
491

7cf20fc62   Christoph Hellwig   net/9p: convert t...
492
  	c->cqe.done = send_done;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
493
494
  	sge.addr = c->busa;
  	sge.length = c->req->tc->size;
2f31fa881   Jason Gunthorpe   net/9p: Remove ib...
495
  	sge.lkey = rdma->pd->local_dma_lkey;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
496
497
  
  	wr.next = NULL;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
498
  	wr.wr_cqe = &c->cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
499
500
501
502
  	wr.opcode = IB_WR_SEND;
  	wr.send_flags = IB_SEND_SIGNALED;
  	wr.sg_list = &sge;
  	wr.num_sge = 1;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
503
504
505
506
  	if (down_interruptible(&rdma->sq_sem)) {
  		err = -EINTR;
  		goto send_error;
  	}
3f9d5b8df   Simon Derr   9pnet_rdma: updat...
507
508
509
510
511
  	/* Mark request as `sent' *before* we actually send it,
  	 * because doing if after could erase the REQ_STATUS_RCVD
  	 * status in case of a very fast reply.
  	 */
  	req->status = REQ_STATUS_SENT;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
512
513
514
  	err = ib_post_send(rdma->qp, &wr, &bad_wr);
  	if (err)
  		goto send_error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
515

2f52d07cb   Simon Derr   9P/RDMA: Improve ...
516
517
  	/* Success */
  	return 0;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
518

2f52d07cb   Simon Derr   9P/RDMA: Improve ...
519
520
   /* Handle errors that happened during or while preparing the send: */
   send_error:
3f9d5b8df   Simon Derr   9pnet_rdma: updat...
521
  	req->status = REQ_STATUS_ERROR;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
522
  	kfree(c);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
523
524
  	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()
  ", err);
1cff33069   Simon Derr   9P/RDMA: count po...
525
526
527
528
529
  
  	/* Ach.
  	 *  We did recv_post(), but not send. We have one recv_post in excess.
  	 */
  	atomic_inc(&rdma->excess_rc);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
530
531
532
533
  	return err;
  
   /* Handle errors that happened during or while preparing post_recv(): */
   recv_error:
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
534
  	kfree(rpl_context);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
  	spin_lock_irqsave(&rdma->req_lock, flags);
  	if (rdma->state < P9_RDMA_CLOSING) {
  		rdma->state = P9_RDMA_CLOSING;
  		spin_unlock_irqrestore(&rdma->req_lock, flags);
  		rdma_disconnect(rdma->cm_id);
  	} else
  		spin_unlock_irqrestore(&rdma->req_lock, flags);
  	return err;
  }
  
  static void rdma_close(struct p9_client *client)
  {
  	struct p9_trans_rdma *rdma;
  
  	if (!client)
  		return;
  
  	rdma = client->trans;
  	if (!rdma)
  		return;
  
  	client->status = Disconnected;
  	rdma_disconnect(rdma->cm_id);
  	rdma_destroy_trans(rdma);
  }
  
  /**
   * alloc_rdma - Allocate and initialize the rdma transport structure
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
563
564
565
566
567
568
569
570
571
   * @opts: Mount options structure
   */
  static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
  {
  	struct p9_trans_rdma *rdma;
  
  	rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
  	if (!rdma)
  		return NULL;
c4fac9100   David Howells   9p: Implement sho...
572
573
  	rdma->port = opts->port;
  	rdma->privport = opts->privport;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
574
575
576
577
578
579
  	rdma->sq_depth = opts->sq_depth;
  	rdma->rq_depth = opts->rq_depth;
  	rdma->timeout = opts->timeout;
  	spin_lock_init(&rdma->req_lock);
  	init_completion(&rdma->cm_done);
  	sema_init(&rdma->sq_sem, rdma->sq_depth);
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
580
  	sema_init(&rdma->rq_sem, rdma->rq_depth);
1cff33069   Simon Derr   9P/RDMA: count po...
581
  	atomic_set(&rdma->excess_rc, 0);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
582
583
584
  
  	return rdma;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
585
586
  static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
  {
931700d26   Simon Derr   9pnet_rdma: add c...
587
588
589
  	/* Nothing to do here.
  	 * We will take care of it (if we have to) in rdma_cancelled()
  	 */
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
590
591
  	return 1;
  }
931700d26   Simon Derr   9pnet_rdma: add c...
592
593
594
595
596
597
598
599
600
  /* A request has been fully flushed without a reply.
   * That means we have posted one buffer in excess.
   */
  static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
  {
  	struct p9_trans_rdma *rdma = client->trans;
  	atomic_inc(&rdma->excess_rc);
  	return 0;
  }
f569d3ef8   Dominique Martinet   net/9p: add a pri...
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
  static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
  {
  	struct sockaddr_in cl = {
  		.sin_family = AF_INET,
  		.sin_addr.s_addr = htonl(INADDR_ANY),
  	};
  	int port, err = -EINVAL;
  
  	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
  		cl.sin_port = htons((ushort)port);
  		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
  		if (err != -EADDRINUSE)
  			break;
  	}
  	return err;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
617
618
619
620
621
622
623
624
625
626
627
628
629
630
  /**
   * trans_create_rdma - Transport method for creating atransport instance
   * @client: client instance
   * @addr: IP address string
   * @args: Mount options string
   */
  static int
  rdma_create_trans(struct p9_client *client, const char *addr, char *args)
  {
  	int err;
  	struct p9_rdma_opts opts;
  	struct p9_trans_rdma *rdma;
  	struct rdma_conn_param conn_param;
  	struct ib_qp_init_attr qp_attr;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
631

390671089   Tomas Bortoli   9p: fix multiple ...
632
633
  	if (addr == NULL)
  		return -EINVAL;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
634
635
636
637
638
639
640
641
642
643
644
  	/* Parse the transport specific mount options */
  	err = parse_opts(args, &opts);
  	if (err < 0)
  		return err;
  
  	/* Create and initialize the RDMA transport structure */
  	rdma = alloc_rdma(&opts);
  	if (!rdma)
  		return -ENOMEM;
  
  	/* Create the RDMA CM ID */
fa20105e0   Guy Shapiro   IB/cma: Add suppo...
645
646
  	rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
  				     RDMA_PS_TCP, IB_QPT_RC);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
647
648
  	if (IS_ERR(rdma->cm_id))
  		goto error;
517ac45af   Tom Tucker   9p: rdma: Set tra...
649
650
  	/* Associate the client with the transport */
  	client->trans = rdma;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
651
652
653
654
655
656
657
658
659
660
  	/* Bind to a privileged port if we need to */
  	if (opts.privport) {
  		err = p9_rdma_bind_privport(rdma);
  		if (err < 0) {
  			pr_err("%s (%d): problem binding to privport: %d
  ",
  			       __func__, task_pid_nr(current), -err);
  			goto error;
  		}
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
  	/* Resolve the server's address */
  	rdma->addr.sin_family = AF_INET;
  	rdma->addr.sin_addr.s_addr = in_aton(addr);
  	rdma->addr.sin_port = htons(opts.port);
  	err = rdma_resolve_addr(rdma->cm_id, NULL,
  				(struct sockaddr *)&rdma->addr,
  				rdma->timeout);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
  		goto error;
  
  	/* Resolve the route to the server */
  	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
681
  	/* Create the Completion Queue */
7cf20fc62   Christoph Hellwig   net/9p: convert t...
682
683
684
  	rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
  			opts.sq_depth + opts.rq_depth + 1,
  			0, IB_POLL_SOFTIRQ);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
685
686
  	if (IS_ERR(rdma->cq))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
687
688
  
  	/* Create the Protection Domain */
ed082d36a   Christoph Hellwig   IB/core: add supp...
689
  	rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
690
691
  	if (IS_ERR(rdma->pd))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
  	/* Create the Queue Pair */
  	memset(&qp_attr, 0, sizeof qp_attr);
  	qp_attr.event_handler = qp_event_handler;
  	qp_attr.qp_context = client;
  	qp_attr.cap.max_send_wr = opts.sq_depth;
  	qp_attr.cap.max_recv_wr = opts.rq_depth;
  	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
  	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
  	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  	qp_attr.qp_type = IB_QPT_RC;
  	qp_attr.send_cq = rdma->cq;
  	qp_attr.recv_cq = rdma->cq;
  	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
  	if (err)
  		goto error;
  	rdma->qp = rdma->cm_id->qp;
  
  	/* Request a connection */
  	memset(&conn_param, 0, sizeof(conn_param));
  	conn_param.private_data = NULL;
  	conn_param.private_data_len = 0;
  	conn_param.responder_resources = P9_RDMA_IRD;
  	conn_param.initiator_depth = P9_RDMA_ORD;
  	err = rdma_connect(rdma->cm_id, &conn_param);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_CONNECTED))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
  	client->status = Connected;
  
  	return 0;
  
  error:
  	rdma_destroy_trans(rdma);
  	return -ENOTCONN;
  }
  
  static struct p9_trans_module p9_rdma_trans = {
  	.name = "rdma",
  	.maxsize = P9_RDMA_MAXSIZE,
  	.def = 0,
  	.owner = THIS_MODULE,
  	.create = rdma_create_trans,
  	.close = rdma_close,
  	.request = rdma_request,
  	.cancel = rdma_cancel,
931700d26   Simon Derr   9pnet_rdma: add c...
739
  	.cancelled = rdma_cancelled,
c4fac9100   David Howells   9p: Implement sho...
740
  	.show_options = p9_rdma_show_options,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
  };
  
  /**
   * p9_trans_rdma_init - Register the 9P RDMA transport driver
   */
  static int __init p9_trans_rdma_init(void)
  {
  	v9fs_register_trans(&p9_rdma_trans);
  	return 0;
  }
  
  static void __exit p9_trans_rdma_exit(void)
  {
  	v9fs_unregister_trans(&p9_rdma_trans);
  }
  
  module_init(p9_trans_rdma_init);
  module_exit(p9_trans_rdma_exit);
  
  MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
  MODULE_DESCRIPTION("RDMA Transport for 9P");
  MODULE_LICENSE("Dual BSD/GPL");