Blame view

net/9p/trans_rdma.c 18.5 KB
1f3276132   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
2
3
4
5
6
7
8
9
10
11
  /*
   * linux/fs/9p/trans_rdma.c
   *
   * RDMA transport layer based on the trans_fd.c implementation.
   *
   *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
   *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
   *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
   *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
   *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
12
   */
5d3851530   Joe Perches   9p: Reduce object...
13
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  #include <linux/in.h>
  #include <linux/module.h>
  #include <linux/net.h>
  #include <linux/ipv6.h>
  #include <linux/kthread.h>
  #include <linux/errno.h>
  #include <linux/kernel.h>
  #include <linux/un.h>
  #include <linux/uaccess.h>
  #include <linux/inet.h>
  #include <linux/idr.h>
  #include <linux/file.h>
  #include <linux/parser.h>
  #include <linux/semaphore.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
28
  #include <linux/slab.h>
c4fac9100   David Howells   9p: Implement sho...
29
  #include <linux/seq_file.h>
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
30
31
32
33
34
  #include <net/9p/9p.h>
  #include <net/9p/client.h>
  #include <net/9p/transport.h>
  #include <rdma/ib_verbs.h>
  #include <rdma/rdma_cm.h>
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
35
36
37
38
39
40
41
42
43
  
  #define P9_PORT			5640
  #define P9_RDMA_SQ_DEPTH	32
  #define P9_RDMA_RQ_DEPTH	32
  #define P9_RDMA_SEND_SGE	4
  #define P9_RDMA_RECV_SGE	4
  #define P9_RDMA_IRD		0
  #define P9_RDMA_ORD		0
  #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
3fcc62f4e   Simon Derr   9P/RDMA: increase...
44
  #define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
45

fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
46
47
48
49
50
51
52
53
  /**
   * struct p9_trans_rdma - RDMA transport instance
   *
   * @state: tracks the transport state machine for connection setup and tear down
   * @cm_id: The RDMA CM ID
   * @pd: Protection Domain pointer
   * @qp: Queue Pair pointer
   * @cq: Completion Queue pointer
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
54
   * @timeout: Number of uSecs to wait for connection management events
c4fac9100   David Howells   9p: Implement sho...
55
56
   * @privport: Whether a privileged port may be used
   * @port: The port to use
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
57
58
59
   * @sq_depth: The depth of the Send Queue
   * @sq_sem: Semaphore for the SQ
   * @rq_depth: The depth of the Receive Queue.
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
60
   * @rq_sem: Semaphore for the RQ
1cff33069   Simon Derr   9P/RDMA: count po...
61
62
   * @excess_rc : Amount of posted Receive Contexts without a pending request.
   *		See rdma_request()
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
63
64
   * @addr: The remote peer's address
   * @req_lock: Protects the active request list
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
   * @cm_done: Completion event for connection management tracking
   */
  struct p9_trans_rdma {
  	enum {
  		P9_RDMA_INIT,
  		P9_RDMA_ADDR_RESOLVED,
  		P9_RDMA_ROUTE_RESOLVED,
  		P9_RDMA_CONNECTED,
  		P9_RDMA_FLUSHING,
  		P9_RDMA_CLOSING,
  		P9_RDMA_CLOSED,
  	} state;
  	struct rdma_cm_id *cm_id;
  	struct ib_pd *pd;
  	struct ib_qp *qp;
  	struct ib_cq *cq;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
81
  	long timeout;
c4fac9100   David Howells   9p: Implement sho...
82
83
  	bool privport;
  	u16 port;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
84
85
86
  	int sq_depth;
  	struct semaphore sq_sem;
  	int rq_depth;
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
87
  	struct semaphore rq_sem;
1cff33069   Simon Derr   9P/RDMA: count po...
88
  	atomic_t excess_rc;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
89
90
91
92
93
  	struct sockaddr_in addr;
  	spinlock_t req_lock;
  
  	struct completion cm_done;
  };
15e522a7b   Andrew Lunn   net: 9p: kerneldo...
94
  struct p9_rdma_req;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
95
  /**
15e522a7b   Andrew Lunn   net: 9p: kerneldo...
96
   * struct p9_rdma_context - Keeps track of in-process WR
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
97
   *
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
98
99
100
101
   * @busa: Bus address to unmap when the WR completes
   * @req: Keeps track of requests (send)
   * @rc: Keepts track of replies (receive)
   */
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
102
  struct p9_rdma_context {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
103
  	struct ib_cqe cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
104
105
106
  	dma_addr_t busa;
  	union {
  		struct p9_req_t *req;
523adb6cc   Dominique Martinet   9p: embed fcall i...
107
  		struct p9_fcall rc;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
108
109
110
111
  	};
  };
  
  /**
15e522a7b   Andrew Lunn   net: 9p: kerneldo...
112
   * struct p9_rdma_opts - Collection of mount options
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
113
114
115
116
117
118
119
120
   * @port: port of connection
   * @sq_depth: The requested depth of the SQ. This really doesn't need
   * to be any deeper than the number of threads used in the client
   * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
   * @timeout: Time to wait in msecs for CM events
   */
  struct p9_rdma_opts {
  	short port;
c4fac9100   David Howells   9p: Implement sho...
121
  	bool privport;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
122
123
124
125
126
127
128
129
130
131
  	int sq_depth;
  	int rq_depth;
  	long timeout;
  };
  
  /*
   * Option Parsing (code inspired by NFS code)
   */
  enum {
  	/* Options that take integer arguments */
f569d3ef8   Dominique Martinet   net/9p: add a pri...
132
133
134
135
  	Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
  	/* Options that take no argument */
  	Opt_privport,
  	Opt_err,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
136
137
138
139
140
141
142
  };
  
  static match_table_t tokens = {
  	{Opt_port, "port=%u"},
  	{Opt_sq_depth, "sq=%u"},
  	{Opt_rq_depth, "rq=%u"},
  	{Opt_timeout, "timeout=%u"},
f569d3ef8   Dominique Martinet   net/9p: add a pri...
143
  	{Opt_privport, "privport"},
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
144
145
  	{Opt_err, NULL},
  };
c4fac9100   David Howells   9p: Implement sho...
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
  static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
  {
  	struct p9_trans_rdma *rdma = clnt->trans;
  
  	if (rdma->port != P9_PORT)
  		seq_printf(m, ",port=%u", rdma->port);
  	if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
  		seq_printf(m, ",sq=%u", rdma->sq_depth);
  	if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
  		seq_printf(m, ",rq=%u", rdma->rq_depth);
  	if (rdma->timeout != P9_RDMA_TIMEOUT)
  		seq_printf(m, ",timeout=%lu", rdma->timeout);
  	if (rdma->privport)
  		seq_puts(m, ",privport");
  	return 0;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
162
  /**
0e15597eb   Abhishek Kulkarni   9p: minor comment...
163
164
165
   * parse_opts - parse mount options into rdma options structure
   * @params: options string passed from mount
   * @opts: rdma transport-specific structure to parse options into
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
166
167
168
169
170
171
172
173
   *
   * Returns 0 upon success, -ERRNO upon failure
   */
  static int parse_opts(char *params, struct p9_rdma_opts *opts)
  {
  	char *p;
  	substring_t args[MAX_OPT_ARGS];
  	int option;
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
174
  	char *options, *tmp_options;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
175
176
177
178
179
  
  	opts->port = P9_PORT;
  	opts->sq_depth = P9_RDMA_SQ_DEPTH;
  	opts->rq_depth = P9_RDMA_RQ_DEPTH;
  	opts->timeout = P9_RDMA_TIMEOUT;
c4fac9100   David Howells   9p: Implement sho...
180
  	opts->privport = false;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
181
182
183
  
  	if (!params)
  		return 0;
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
184
185
  	tmp_options = kstrdup(params, GFP_KERNEL);
  	if (!tmp_options) {
5d3851530   Joe Perches   9p: Reduce object...
186
187
188
  		p9_debug(P9_DEBUG_ERROR,
  			 "failed to allocate copy of option string
  ");
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
189
190
  		return -ENOMEM;
  	}
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
191
  	options = tmp_options;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
192
193
194
195
196
197
198
  
  	while ((p = strsep(&options, ",")) != NULL) {
  		int token;
  		int r;
  		if (!*p)
  			continue;
  		token = match_token(p, tokens, args);
f569d3ef8   Dominique Martinet   net/9p: add a pri...
199
200
201
202
203
204
205
206
  		if ((token != Opt_err) && (token != Opt_privport)) {
  			r = match_int(&args[0], &option);
  			if (r < 0) {
  				p9_debug(P9_DEBUG_ERROR,
  					 "integer field, but no integer?
  ");
  				continue;
  			}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  		}
  		switch (token) {
  		case Opt_port:
  			opts->port = option;
  			break;
  		case Opt_sq_depth:
  			opts->sq_depth = option;
  			break;
  		case Opt_rq_depth:
  			opts->rq_depth = option;
  			break;
  		case Opt_timeout:
  			opts->timeout = option;
  			break;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
221
  		case Opt_privport:
c4fac9100   David Howells   9p: Implement sho...
222
  			opts->privport = true;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
223
  			break;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
224
225
226
227
228
229
  		default:
  			continue;
  		}
  	}
  	/* RQ must be at least as large as the SQ */
  	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
d8c8a9e36   Eric Van Hensbergen   9p: fix option pa...
230
  	kfree(tmp_options);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
  	return 0;
  }
  
  static int
  p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
  {
  	struct p9_client *c = id->context;
  	struct p9_trans_rdma *rdma = c->trans;
  	switch (event->event) {
  	case RDMA_CM_EVENT_ADDR_RESOLVED:
  		BUG_ON(rdma->state != P9_RDMA_INIT);
  		rdma->state = P9_RDMA_ADDR_RESOLVED;
  		break;
  
  	case RDMA_CM_EVENT_ROUTE_RESOLVED:
  		BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
  		rdma->state = P9_RDMA_ROUTE_RESOLVED;
  		break;
  
  	case RDMA_CM_EVENT_ESTABLISHED:
  		BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
  		rdma->state = P9_RDMA_CONNECTED;
  		break;
  
  	case RDMA_CM_EVENT_DISCONNECTED:
  		if (rdma)
  			rdma->state = P9_RDMA_CLOSED;
473c7dd1d   Dominique Martinet   9p/rdma: remove u...
258
  		c->status = Disconnected;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
  		break;
  
  	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  		break;
  
  	case RDMA_CM_EVENT_ADDR_CHANGE:
  	case RDMA_CM_EVENT_ROUTE_ERROR:
  	case RDMA_CM_EVENT_DEVICE_REMOVAL:
  	case RDMA_CM_EVENT_MULTICAST_JOIN:
  	case RDMA_CM_EVENT_MULTICAST_ERROR:
  	case RDMA_CM_EVENT_REJECTED:
  	case RDMA_CM_EVENT_CONNECT_REQUEST:
  	case RDMA_CM_EVENT_CONNECT_RESPONSE:
  	case RDMA_CM_EVENT_CONNECT_ERROR:
  	case RDMA_CM_EVENT_ADDR_ERROR:
  	case RDMA_CM_EVENT_UNREACHABLE:
  		c->status = Disconnected;
  		rdma_disconnect(rdma->cm_id);
  		break;
  	default:
  		BUG();
  	}
  	complete(&rdma->cm_done);
  	return 0;
  }
  
  static void
7cf20fc62   Christoph Hellwig   net/9p: convert t...
286
  recv_done(struct ib_cq *cq, struct ib_wc *wc)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
287
  {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
288
289
290
291
  	struct p9_client *client = cq->cq_context;
  	struct p9_trans_rdma *rdma = client->trans;
  	struct p9_rdma_context *c =
  		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
292
293
294
295
296
297
298
  	struct p9_req_t *req;
  	int err = 0;
  	int16_t tag;
  
  	req = NULL;
  	ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
  							 DMA_FROM_DEVICE);
7cf20fc62   Christoph Hellwig   net/9p: convert t...
299
  	if (wc->status != IB_WC_SUCCESS)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
300
  		goto err_out;
523adb6cc   Dominique Martinet   9p: embed fcall i...
301
302
  	c->rc.size = wc->byte_len;
  	err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
303
304
305
306
307
308
  	if (err)
  		goto err_out;
  
  	req = p9_tag_lookup(client, tag);
  	if (!req)
  		goto err_out;
47229ff85   Simon Derr   9P/RDMA: Protect ...
309
310
  	/* Check that we have not yet received a reply for this request.
  	 */
523adb6cc   Dominique Martinet   9p: embed fcall i...
311
  	if (unlikely(req->rc.sdata)) {
47229ff85   Simon Derr   9P/RDMA: Protect ...
312
313
314
  		pr_err("Duplicate reply for request %d", tag);
  		goto err_out;
  	}
523adb6cc   Dominique Martinet   9p: embed fcall i...
315
316
  	req->rc.size = c->rc.size;
  	req->rc.sdata = c->rc.sdata;
2b6e72ed7   Dominique Martinet   9P: Add memory ba...
317
  	p9_client_cb(client, req, REQ_STATUS_RCVD);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
318

7cf20fc62   Christoph Hellwig   net/9p: convert t...
319
320
321
   out:
  	up(&rdma->rq_sem);
  	kfree(c);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
322
323
324
  	return;
  
   err_out:
7cf20fc62   Christoph Hellwig   net/9p: convert t...
325
326
327
  	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d
  ",
  			req, err, wc->status);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
328
329
  	rdma->state = P9_RDMA_FLUSHING;
  	client->status = Disconnected;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
330
  	goto out;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
331
332
333
  }
  
  static void
7cf20fc62   Christoph Hellwig   net/9p: convert t...
334
  send_done(struct ib_cq *cq, struct ib_wc *wc)
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
335
  {
7cf20fc62   Christoph Hellwig   net/9p: convert t...
336
337
338
339
  	struct p9_client *client = cq->cq_context;
  	struct p9_trans_rdma *rdma = client->trans;
  	struct p9_rdma_context *c =
  		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
340
  	ib_dma_unmap_single(rdma->cm_id->device,
523adb6cc   Dominique Martinet   9p: embed fcall i...
341
  			    c->busa, c->req->tc.size,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
342
  			    DMA_TO_DEVICE);
7cf20fc62   Christoph Hellwig   net/9p: convert t...
343
  	up(&rdma->sq_sem);
728356ded   Tomas Bortoli   9p: Add refcount ...
344
  	p9_req_put(c->req);
7cf20fc62   Christoph Hellwig   net/9p: convert t...
345
  	kfree(c);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
346
347
348
349
  }
  
  static void qp_event_handler(struct ib_event *event, void *context)
  {
5d3851530   Joe Perches   9p: Reduce object...
350
351
352
  	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p
  ",
  		 event->event, context);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
353
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
354
355
356
357
  static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
  {
  	if (!rdma)
  		return;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
358
359
360
361
362
363
364
  	if (rdma->qp && !IS_ERR(rdma->qp))
  		ib_destroy_qp(rdma->qp);
  
  	if (rdma->pd && !IS_ERR(rdma->pd))
  		ib_dealloc_pd(rdma->pd);
  
  	if (rdma->cq && !IS_ERR(rdma->cq))
7cf20fc62   Christoph Hellwig   net/9p: convert t...
365
  		ib_free_cq(rdma->cq);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
366
367
368
369
370
371
372
373
374
375
376
  
  	if (rdma->cm_id && !IS_ERR(rdma->cm_id))
  		rdma_destroy_id(rdma->cm_id);
  
  	kfree(rdma);
  }
  
  static int
  post_recv(struct p9_client *client, struct p9_rdma_context *c)
  {
  	struct p9_trans_rdma *rdma = client->trans;
72bc4d375   Bart Van Assche   net/9p: Simplify ...
377
  	struct ib_recv_wr wr;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
378
379
380
  	struct ib_sge sge;
  
  	c->busa = ib_dma_map_single(rdma->cm_id->device,
523adb6cc   Dominique Martinet   9p: embed fcall i...
381
  				    c->rc.sdata, client->msize,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
382
383
384
  				    DMA_FROM_DEVICE);
  	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
  		goto error;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
385
  	c->cqe.done = recv_done;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
386
387
  	sge.addr = c->busa;
  	sge.length = client->msize;
2f31fa881   Jason Gunthorpe   net/9p: Remove ib...
388
  	sge.lkey = rdma->pd->local_dma_lkey;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
389
390
  
  	wr.next = NULL;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
391
  	wr.wr_cqe = &c->cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
392
393
  	wr.sg_list = &sge;
  	wr.num_sge = 1;
72bc4d375   Bart Van Assche   net/9p: Simplify ...
394
  	return ib_post_recv(rdma->qp, &wr, NULL);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
395
396
  
   error:
5d3851530   Joe Perches   9p: Reduce object...
397
398
  	p9_debug(P9_DEBUG_ERROR, "EIO
  ");
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
399
400
401
402
403
404
  	return -EIO;
  }
  
  static int rdma_request(struct p9_client *client, struct p9_req_t *req)
  {
  	struct p9_trans_rdma *rdma = client->trans;
72bc4d375   Bart Van Assche   net/9p: Simplify ...
405
  	struct ib_send_wr wr;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
406
407
408
409
410
  	struct ib_sge sge;
  	int err = 0;
  	unsigned long flags;
  	struct p9_rdma_context *c = NULL;
  	struct p9_rdma_context *rpl_context = NULL;
1cff33069   Simon Derr   9P/RDMA: count po...
411
412
413
414
415
416
417
418
419
420
  	/* When an error occurs between posting the recv and the send,
  	 * there will be a receive context posted without a pending request.
  	 * Since there is no way to "un-post" it, we remember it and skip
  	 * post_recv() for the next request.
  	 * So here,
  	 * see if we are this `next request' and need to absorb an excess rc.
  	 * If yes, then drop and free our own, and do not recv_post().
  	 **/
  	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
  		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
523adb6cc   Dominique Martinet   9p: embed fcall i...
421
422
423
  			/* Got one! */
  			p9_fcall_fini(&req->rc);
  			req->rc.sdata = NULL;
1cff33069   Simon Derr   9P/RDMA: count po...
424
425
426
427
428
429
  			goto dont_need_post_recv;
  		} else {
  			/* We raced and lost. */
  			atomic_inc(&rdma->excess_rc);
  		}
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
430
  	/* Allocate an fcall for the reply */
eeff66ef6   Aneesh Kumar K.V   net/9p: Convert t...
431
  	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
432
433
  	if (!rpl_context) {
  		err = -ENOMEM;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
434
  		goto recv_error;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
435
  	}
523adb6cc   Dominique Martinet   9p: embed fcall i...
436
  	rpl_context->rc.sdata = req->rc.sdata;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
437
438
439
440
441
442
443
444
  
  	/*
  	 * Post a receive buffer for this request. We need to ensure
  	 * there is a reply buffer available for every outstanding
  	 * request. A flushed request can result in no reply for an
  	 * outstanding request, so we must keep a count to avoid
  	 * overflowing the RQ.
  	 */
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
445
446
447
448
  	if (down_interruptible(&rdma->rq_sem)) {
  		err = -EINTR;
  		goto recv_error;
  	}
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
449
450
451
  
  	err = post_recv(client, rpl_context);
  	if (err) {
8b894adb2   Dominique Martinet   9p/rdma: do not d...
452
453
  		p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d
  ", err);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
454
  		goto recv_error;
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
455
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
456
  	/* remove posted receive buffer from request structure */
523adb6cc   Dominique Martinet   9p: embed fcall i...
457
  	req->rc.sdata = NULL;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
458

1cff33069   Simon Derr   9P/RDMA: count po...
459
  dont_need_post_recv:
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
460
  	/* Post the request */
eeff66ef6   Aneesh Kumar K.V   net/9p: Convert t...
461
  	c = kmalloc(sizeof *c, GFP_NOFS);
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
462
463
  	if (!c) {
  		err = -ENOMEM;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
464
  		goto send_error;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
465
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
466
467
468
  	c->req = req;
  
  	c->busa = ib_dma_map_single(rdma->cm_id->device,
523adb6cc   Dominique Martinet   9p: embed fcall i...
469
  				    c->req->tc.sdata, c->req->tc.size,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
470
  				    DMA_TO_DEVICE);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
471
472
473
474
  	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
  		err = -EIO;
  		goto send_error;
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
475

7cf20fc62   Christoph Hellwig   net/9p: convert t...
476
  	c->cqe.done = send_done;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
477
  	sge.addr = c->busa;
523adb6cc   Dominique Martinet   9p: embed fcall i...
478
  	sge.length = c->req->tc.size;
2f31fa881   Jason Gunthorpe   net/9p: Remove ib...
479
  	sge.lkey = rdma->pd->local_dma_lkey;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
480
481
  
  	wr.next = NULL;
7cf20fc62   Christoph Hellwig   net/9p: convert t...
482
  	wr.wr_cqe = &c->cqe;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
483
484
485
486
  	wr.opcode = IB_WR_SEND;
  	wr.send_flags = IB_SEND_SIGNALED;
  	wr.sg_list = &sge;
  	wr.num_sge = 1;
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
487
488
489
490
  	if (down_interruptible(&rdma->sq_sem)) {
  		err = -EINTR;
  		goto send_error;
  	}
3f9d5b8df   Simon Derr   9pnet_rdma: updat...
491
492
493
494
495
  	/* Mark request as `sent' *before* we actually send it,
  	 * because doing if after could erase the REQ_STATUS_RCVD
  	 * status in case of a very fast reply.
  	 */
  	req->status = REQ_STATUS_SENT;
72bc4d375   Bart Van Assche   net/9p: Simplify ...
496
  	err = ib_post_send(rdma->qp, &wr, NULL);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
497
498
  	if (err)
  		goto send_error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
499

2f52d07cb   Simon Derr   9P/RDMA: Improve ...
500
501
  	/* Success */
  	return 0;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
502

2f52d07cb   Simon Derr   9P/RDMA: Improve ...
503
504
   /* Handle errors that happened during or while preparing the send: */
   send_error:
3f9d5b8df   Simon Derr   9pnet_rdma: updat...
505
  	req->status = REQ_STATUS_ERROR;
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
506
  	kfree(c);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
507
508
  	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()
  ", err);
1cff33069   Simon Derr   9P/RDMA: count po...
509
510
511
512
513
  
  	/* Ach.
  	 *  We did recv_post(), but not send. We have one recv_post in excess.
  	 */
  	atomic_inc(&rdma->excess_rc);
2f52d07cb   Simon Derr   9P/RDMA: Improve ...
514
515
516
517
  	return err;
  
   /* Handle errors that happened during or while preparing post_recv(): */
   recv_error:
1d6400c7c   Davidlohr Bueso   net/9p: fix memor...
518
  	kfree(rpl_context);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
519
  	spin_lock_irqsave(&rdma->req_lock, flags);
8b894adb2   Dominique Martinet   9p/rdma: do not d...
520
  	if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
  		rdma->state = P9_RDMA_CLOSING;
  		spin_unlock_irqrestore(&rdma->req_lock, flags);
  		rdma_disconnect(rdma->cm_id);
  	} else
  		spin_unlock_irqrestore(&rdma->req_lock, flags);
  	return err;
  }
  
  static void rdma_close(struct p9_client *client)
  {
  	struct p9_trans_rdma *rdma;
  
  	if (!client)
  		return;
  
  	rdma = client->trans;
  	if (!rdma)
  		return;
  
  	client->status = Disconnected;
  	rdma_disconnect(rdma->cm_id);
  	rdma_destroy_trans(rdma);
  }
  
  /**
   * alloc_rdma - Allocate and initialize the rdma transport structure
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
547
548
549
550
551
552
553
554
555
   * @opts: Mount options structure
   */
  static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
  {
  	struct p9_trans_rdma *rdma;
  
  	rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
  	if (!rdma)
  		return NULL;
c4fac9100   David Howells   9p: Implement sho...
556
557
  	rdma->port = opts->port;
  	rdma->privport = opts->privport;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
558
559
560
561
562
563
  	rdma->sq_depth = opts->sq_depth;
  	rdma->rq_depth = opts->rq_depth;
  	rdma->timeout = opts->timeout;
  	spin_lock_init(&rdma->req_lock);
  	init_completion(&rdma->cm_done);
  	sema_init(&rdma->sq_sem, rdma->sq_depth);
fd453d0ed   Simon Derr   9P/RDMA: Use a se...
564
  	sema_init(&rdma->rq_sem, rdma->rq_depth);
1cff33069   Simon Derr   9P/RDMA: count po...
565
  	atomic_set(&rdma->excess_rc, 0);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
566
567
568
  
  	return rdma;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
569
570
  static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
  {
931700d26   Simon Derr   9pnet_rdma: add c...
571
572
573
  	/* Nothing to do here.
  	 * We will take care of it (if we have to) in rdma_cancelled()
  	 */
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
574
575
  	return 1;
  }
931700d26   Simon Derr   9pnet_rdma: add c...
576
577
578
579
580
581
582
583
584
  /* A request has been fully flushed without a reply.
   * That means we have posted one buffer in excess.
   */
  static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
  {
  	struct p9_trans_rdma *rdma = client->trans;
  	atomic_inc(&rdma->excess_rc);
  	return 0;
  }
f569d3ef8   Dominique Martinet   net/9p: add a pri...
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
  static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
  {
  	struct sockaddr_in cl = {
  		.sin_family = AF_INET,
  		.sin_addr.s_addr = htonl(INADDR_ANY),
  	};
  	int port, err = -EINVAL;
  
  	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
  		cl.sin_port = htons((ushort)port);
  		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
  		if (err != -EADDRINUSE)
  			break;
  	}
  	return err;
  }
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
601
  /**
4a026da91   Sun Lianwen   net/9p: correct s...
602
   * rdma_create_trans - Transport method for creating a transport instance
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
603
604
605
606
607
608
609
610
611
612
613
614
   * @client: client instance
   * @addr: IP address string
   * @args: Mount options string
   */
  static int
  rdma_create_trans(struct p9_client *client, const char *addr, char *args)
  {
  	int err;
  	struct p9_rdma_opts opts;
  	struct p9_trans_rdma *rdma;
  	struct rdma_conn_param conn_param;
  	struct ib_qp_init_attr qp_attr;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
615

10aa14527   Tomas Bortoli   9p: fix multiple ...
616
617
  	if (addr == NULL)
  		return -EINVAL;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
618
619
620
621
622
623
624
625
626
627
628
  	/* Parse the transport specific mount options */
  	err = parse_opts(args, &opts);
  	if (err < 0)
  		return err;
  
  	/* Create and initialize the RDMA transport structure */
  	rdma = alloc_rdma(&opts);
  	if (!rdma)
  		return -ENOMEM;
  
  	/* Create the RDMA CM ID */
fa20105e0   Guy Shapiro   IB/cma: Add suppo...
629
630
  	rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
  				     RDMA_PS_TCP, IB_QPT_RC);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
631
632
  	if (IS_ERR(rdma->cm_id))
  		goto error;
517ac45af   Tom Tucker   9p: rdma: Set tra...
633
634
  	/* Associate the client with the transport */
  	client->trans = rdma;
f569d3ef8   Dominique Martinet   net/9p: add a pri...
635
636
637
638
639
640
641
642
643
644
  	/* Bind to a privileged port if we need to */
  	if (opts.privport) {
  		err = p9_rdma_bind_privport(rdma);
  		if (err < 0) {
  			pr_err("%s (%d): problem binding to privport: %d
  ",
  			       __func__, task_pid_nr(current), -err);
  			goto error;
  		}
  	}
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
  	/* Resolve the server's address */
  	rdma->addr.sin_family = AF_INET;
  	rdma->addr.sin_addr.s_addr = in_aton(addr);
  	rdma->addr.sin_port = htons(opts.port);
  	err = rdma_resolve_addr(rdma->cm_id, NULL,
  				(struct sockaddr *)&rdma->addr,
  				rdma->timeout);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
  		goto error;
  
  	/* Resolve the route to the server */
  	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
665
  	/* Create the Completion Queue */
20cf4e026   Chuck Lever   rdma: Enable ib_a...
666
667
668
  	rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
  				   opts.sq_depth + opts.rq_depth + 1,
  				   IB_POLL_SOFTIRQ);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
669
670
  	if (IS_ERR(rdma->cq))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
671
672
  
  	/* Create the Protection Domain */
ed082d36a   Christoph Hellwig   IB/core: add supp...
673
  	rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
674
675
  	if (IS_ERR(rdma->pd))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
  	/* Create the Queue Pair */
  	memset(&qp_attr, 0, sizeof qp_attr);
  	qp_attr.event_handler = qp_event_handler;
  	qp_attr.qp_context = client;
  	qp_attr.cap.max_send_wr = opts.sq_depth;
  	qp_attr.cap.max_recv_wr = opts.rq_depth;
  	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
  	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
  	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  	qp_attr.qp_type = IB_QPT_RC;
  	qp_attr.send_cq = rdma->cq;
  	qp_attr.recv_cq = rdma->cq;
  	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
  	if (err)
  		goto error;
  	rdma->qp = rdma->cm_id->qp;
  
  	/* Request a connection */
  	memset(&conn_param, 0, sizeof(conn_param));
  	conn_param.private_data = NULL;
  	conn_param.private_data_len = 0;
  	conn_param.responder_resources = P9_RDMA_IRD;
  	conn_param.initiator_depth = P9_RDMA_ORD;
  	err = rdma_connect(rdma->cm_id, &conn_param);
  	if (err)
  		goto error;
  	err = wait_for_completion_interruptible(&rdma->cm_done);
  	if (err || (rdma->state != P9_RDMA_CONNECTED))
  		goto error;
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
  	client->status = Connected;
  
  	return 0;
  
  error:
  	rdma_destroy_trans(rdma);
  	return -ENOTCONN;
  }
  
  static struct p9_trans_module p9_rdma_trans = {
  	.name = "rdma",
  	.maxsize = P9_RDMA_MAXSIZE,
  	.def = 0,
  	.owner = THIS_MODULE,
  	.create = rdma_create_trans,
  	.close = rdma_close,
  	.request = rdma_request,
  	.cancel = rdma_cancel,
931700d26   Simon Derr   9pnet_rdma: add c...
723
  	.cancelled = rdma_cancelled,
c4fac9100   David Howells   9p: Implement sho...
724
  	.show_options = p9_rdma_show_options,
fc79d4b10   Tom Tucker   9p: rdma: RDMA Tr...
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
  };
  
  /**
   * p9_trans_rdma_init - Register the 9P RDMA transport driver
   */
  static int __init p9_trans_rdma_init(void)
  {
  	v9fs_register_trans(&p9_rdma_trans);
  	return 0;
  }
  
  static void __exit p9_trans_rdma_exit(void)
  {
  	v9fs_unregister_trans(&p9_rdma_trans);
  }
  
  module_init(p9_trans_rdma_init);
  module_exit(p9_trans_rdma_exit);
  
  MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
  MODULE_DESCRIPTION("RDMA Transport for 9P");
  MODULE_LICENSE("Dual BSD/GPL");