Blame view

net/rds/ib_rdma.c 19.7 KB
08b48a1ed   Andy Grover   RDS/IB: Implement...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  /*
   * Copyright (c) 2006 Oracle.  All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
   * General Public License (GPL) Version 2, available from the file
   * COPYING in the main directory of this source tree, or the
   * OpenIB.org BSD license below:
   *
   *     Redistribution and use in source and binary forms, with or
   *     without modification, are permitted provided that the following
   *     conditions are met:
   *
   *      - Redistributions of source code must retain the above
   *        copyright notice, this list of conditions and the following
   *        disclaimer.
   *
   *      - Redistributions in binary form must reproduce the above
   *        copyright notice, this list of conditions and the following
   *        disclaimer in the documentation and/or other materials
   *        provided with the distribution.
   *
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   *
   */
  #include <linux/kernel.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
34
  #include <linux/slab.h>
764f2dd92   Chris Mason   rds: rcu-ize rds_...
35
  #include <linux/rculist.h>
1bc144b62   Huang Ying   net, rds, Replace...
36
  #include <linux/llist.h>
08b48a1ed   Andy Grover   RDS/IB: Implement...
37
38
  
  #include "rds.h"
08b48a1ed   Andy Grover   RDS/IB: Implement...
39
  #include "ib.h"
6fa70da60   Chris Mason   rds: recycle FMRs...
40
41
  static DEFINE_PER_CPU(unsigned long, clean_list_grace);
  #define CLEAN_LIST_BUSY_BIT 0
08b48a1ed   Andy Grover   RDS/IB: Implement...
42
43
44
45
46
47
48
49
  
  /*
   * This is stored as mr->r_trans_private.
   */
  struct rds_ib_mr {
  	struct rds_ib_device	*device;
  	struct rds_ib_mr_pool	*pool;
  	struct ib_fmr		*fmr;
6fa70da60   Chris Mason   rds: recycle FMRs...
50

1bc144b62   Huang Ying   net, rds, Replace...
51
  	struct llist_node	llnode;
6fa70da60   Chris Mason   rds: recycle FMRs...
52
53
54
  
  	/* unmap_list is for freeing */
  	struct list_head	unmap_list;
08b48a1ed   Andy Grover   RDS/IB: Implement...
55
56
57
58
59
60
61
62
63
64
65
66
67
  	unsigned int		remap_count;
  
  	struct scatterlist	*sg;
  	unsigned int		sg_len;
  	u64			*dma;
  	int			sg_dma_len;
  };
  
  /*
   * Our own little FMR pool
   */
  struct rds_ib_mr_pool {
  	struct mutex		flush_lock;		/* serialize fmr invalidate */
7a0ff5dbd   Chris Mason   RDS: use delayed ...
68
  	struct delayed_work	flush_worker;		/* flush worker */
08b48a1ed   Andy Grover   RDS/IB: Implement...
69

08b48a1ed   Andy Grover   RDS/IB: Implement...
70
71
  	atomic_t		item_count;		/* total # of MRs */
  	atomic_t		dirty_count;		/* # dirty of MRs */
6fa70da60   Chris Mason   rds: recycle FMRs...
72

1bc144b62   Huang Ying   net, rds, Replace...
73
74
75
  	struct llist_head	drop_list;		/* MRs that have reached their max_maps limit */
  	struct llist_head	free_list;		/* unused MRs */
  	struct llist_head	clean_list;		/* global unused & unamapped MRs */
6fa70da60   Chris Mason   rds: recycle FMRs...
76
  	wait_queue_head_t	flush_wait;
08b48a1ed   Andy Grover   RDS/IB: Implement...
77
78
79
80
81
82
  	atomic_t		free_pinned;		/* memory pinned by free MRs */
  	unsigned long		max_items;
  	unsigned long		max_items_soft;
  	unsigned long		max_free_pinned;
  	struct ib_fmr_attr	fmr_attr;
  };
6fa70da60   Chris Mason   rds: recycle FMRs...
83
  static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
08b48a1ed   Andy Grover   RDS/IB: Implement...
84
85
86
87
88
89
90
  static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
  static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
  
  static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
  {
  	struct rds_ib_device *rds_ibdev;
  	struct rds_ib_ipaddr *i_ipaddr;
ea819867b   Zach Brown   RDS/IB: protect t...
91
92
  	rcu_read_lock();
  	list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
764f2dd92   Chris Mason   rds: rcu-ize rds_...
93
  		list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
94
  			if (i_ipaddr->ipaddr == ipaddr) {
3e0249f9c   Zach Brown   RDS/IB: add refco...
95
  				atomic_inc(&rds_ibdev->refcount);
764f2dd92   Chris Mason   rds: rcu-ize rds_...
96
  				rcu_read_unlock();
08b48a1ed   Andy Grover   RDS/IB: Implement...
97
98
99
  				return rds_ibdev;
  			}
  		}
08b48a1ed   Andy Grover   RDS/IB: Implement...
100
  	}
ea819867b   Zach Brown   RDS/IB: protect t...
101
  	rcu_read_unlock();
08b48a1ed   Andy Grover   RDS/IB: Implement...
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  
  	return NULL;
  }
  
  static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
  {
  	struct rds_ib_ipaddr *i_ipaddr;
  
  	i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
  	if (!i_ipaddr)
  		return -ENOMEM;
  
  	i_ipaddr->ipaddr = ipaddr;
  
  	spin_lock_irq(&rds_ibdev->spinlock);
764f2dd92   Chris Mason   rds: rcu-ize rds_...
117
  	list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
118
119
120
121
122
123
124
  	spin_unlock_irq(&rds_ibdev->spinlock);
  
  	return 0;
  }
  
  static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
  {
4a81802b5   Andy Grover   RDS/IB: Remove un...
125
  	struct rds_ib_ipaddr *i_ipaddr;
764f2dd92   Chris Mason   rds: rcu-ize rds_...
126
  	struct rds_ib_ipaddr *to_free = NULL;
08b48a1ed   Andy Grover   RDS/IB: Implement...
127
128
  
  	spin_lock_irq(&rds_ibdev->spinlock);
764f2dd92   Chris Mason   rds: rcu-ize rds_...
129
  	list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
130
  		if (i_ipaddr->ipaddr == ipaddr) {
764f2dd92   Chris Mason   rds: rcu-ize rds_...
131
132
  			list_del_rcu(&i_ipaddr->list);
  			to_free = i_ipaddr;
08b48a1ed   Andy Grover   RDS/IB: Implement...
133
134
135
136
  			break;
  		}
  	}
  	spin_unlock_irq(&rds_ibdev->spinlock);
764f2dd92   Chris Mason   rds: rcu-ize rds_...
137
138
139
140
141
  
  	if (to_free) {
  		synchronize_rcu();
  		kfree(to_free);
  	}
08b48a1ed   Andy Grover   RDS/IB: Implement...
142
143
144
145
146
147
148
  }
  
  int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
  {
  	struct rds_ib_device *rds_ibdev_old;
  
  	rds_ibdev_old = rds_ib_get_device(ipaddr);
3e0249f9c   Zach Brown   RDS/IB: add refco...
149
  	if (rds_ibdev_old) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
150
  		rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
3e0249f9c   Zach Brown   RDS/IB: add refco...
151
152
  		rds_ib_dev_put(rds_ibdev_old);
  	}
08b48a1ed   Andy Grover   RDS/IB: Implement...
153
154
155
  
  	return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
  }
745cbccac   Andy Grover   RDS: Rewrite conn...
156
  void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
08b48a1ed   Andy Grover   RDS/IB: Implement...
157
158
159
160
161
162
163
164
  {
  	struct rds_ib_connection *ic = conn->c_transport_data;
  
  	/* conn was previously on the nodev_conns_list */
  	spin_lock_irq(&ib_nodev_conns_lock);
  	BUG_ON(list_empty(&ib_nodev_conns));
  	BUG_ON(list_empty(&ic->ib_node));
  	list_del(&ic->ib_node);
08b48a1ed   Andy Grover   RDS/IB: Implement...
165

aef3ea33e   Dan Carpenter   rds: spin_lock_ir...
166
  	spin_lock(&rds_ibdev->spinlock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
167
  	list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
aef3ea33e   Dan Carpenter   rds: spin_lock_ir...
168
  	spin_unlock(&rds_ibdev->spinlock);
745cbccac   Andy Grover   RDS: Rewrite conn...
169
  	spin_unlock_irq(&ib_nodev_conns_lock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
170
171
  
  	ic->rds_ibdev = rds_ibdev;
3e0249f9c   Zach Brown   RDS/IB: add refco...
172
  	atomic_inc(&rds_ibdev->refcount);
08b48a1ed   Andy Grover   RDS/IB: Implement...
173
  }
745cbccac   Andy Grover   RDS: Rewrite conn...
174
  void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
08b48a1ed   Andy Grover   RDS/IB: Implement...
175
  {
745cbccac   Andy Grover   RDS: Rewrite conn...
176
  	struct rds_ib_connection *ic = conn->c_transport_data;
08b48a1ed   Andy Grover   RDS/IB: Implement...
177

745cbccac   Andy Grover   RDS: Rewrite conn...
178
179
  	/* place conn on nodev_conns_list */
  	spin_lock(&ib_nodev_conns_lock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
180

745cbccac   Andy Grover   RDS: Rewrite conn...
181
182
183
184
185
186
187
188
189
190
  	spin_lock_irq(&rds_ibdev->spinlock);
  	BUG_ON(list_empty(&ic->ib_node));
  	list_del(&ic->ib_node);
  	spin_unlock_irq(&rds_ibdev->spinlock);
  
  	list_add_tail(&ic->ib_node, &ib_nodev_conns);
  
  	spin_unlock(&ib_nodev_conns_lock);
  
  	ic->rds_ibdev = NULL;
3e0249f9c   Zach Brown   RDS/IB: add refco...
191
  	rds_ib_dev_put(rds_ibdev);
08b48a1ed   Andy Grover   RDS/IB: Implement...
192
  }
8aeb1ba66   Zach Brown   RDS/IB: destroy c...
193
  void rds_ib_destroy_nodev_conns(void)
08b48a1ed   Andy Grover   RDS/IB: Implement...
194
195
196
197
198
  {
  	struct rds_ib_connection *ic, *_ic;
  	LIST_HEAD(tmp_list);
  
  	/* avoid calling conn_destroy with irqs off */
8aeb1ba66   Zach Brown   RDS/IB: destroy c...
199
200
201
  	spin_lock_irq(&ib_nodev_conns_lock);
  	list_splice(&ib_nodev_conns, &tmp_list);
  	spin_unlock_irq(&ib_nodev_conns_lock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
202

433d308dd   Andy Grover   RDS: Fix panic on...
203
  	list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
08b48a1ed   Andy Grover   RDS/IB: Implement...
204
  		rds_conn_destroy(ic->conn);
08b48a1ed   Andy Grover   RDS/IB: Implement...
205
206
207
208
209
210
211
212
213
  }
  
  struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
  {
  	struct rds_ib_mr_pool *pool;
  
  	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
  	if (!pool)
  		return ERR_PTR(-ENOMEM);
1bc144b62   Huang Ying   net, rds, Replace...
214
215
216
  	init_llist_head(&pool->free_list);
  	init_llist_head(&pool->drop_list);
  	init_llist_head(&pool->clean_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
217
  	mutex_init(&pool->flush_lock);
6fa70da60   Chris Mason   rds: recycle FMRs...
218
  	init_waitqueue_head(&pool->flush_wait);
7a0ff5dbd   Chris Mason   RDS: use delayed ...
219
  	INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
08b48a1ed   Andy Grover   RDS/IB: Implement...
220
221
222
  
  	pool->fmr_attr.max_pages = fmr_message_size;
  	pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
a870d6272   Andy Grover   RDS/IB: Always us...
223
  	pool->fmr_attr.page_shift = PAGE_SHIFT;
08b48a1ed   Andy Grover   RDS/IB: Implement...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
  	pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
  
  	/* We never allow more than max_items MRs to be allocated.
  	 * When we exceed more than max_items_soft, we start freeing
  	 * items more aggressively.
  	 * Make sure that max_items > max_items_soft > max_items / 2
  	 */
  	pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
  	pool->max_items = rds_ibdev->max_fmrs;
  
  	return pool;
  }
  
  void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
  {
  	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
  
  	iinfo->rdma_mr_max = pool->max_items;
  	iinfo->rdma_mr_size = pool->fmr_attr.max_pages;
  }
  
  void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
  {
7a0ff5dbd   Chris Mason   RDS: use delayed ...
247
  	cancel_delayed_work_sync(&pool->flush_worker);
6fa70da60   Chris Mason   rds: recycle FMRs...
248
  	rds_ib_flush_mr_pool(pool, 1, NULL);
571c02fa8   Andy Grover   RDS: Workaround f...
249
250
  	WARN_ON(atomic_read(&pool->item_count));
  	WARN_ON(atomic_read(&pool->free_pinned));
08b48a1ed   Andy Grover   RDS/IB: Implement...
251
252
253
254
255
256
  	kfree(pool);
  }
  
  static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
  {
  	struct rds_ib_mr *ibmr = NULL;
1bc144b62   Huang Ying   net, rds, Replace...
257
  	struct llist_node *ret;
6fa70da60   Chris Mason   rds: recycle FMRs...
258
  	unsigned long *flag;
08b48a1ed   Andy Grover   RDS/IB: Implement...
259

6fa70da60   Chris Mason   rds: recycle FMRs...
260
261
262
  	preempt_disable();
  	flag = &__get_cpu_var(clean_list_grace);
  	set_bit(CLEAN_LIST_BUSY_BIT, flag);
1bc144b62   Huang Ying   net, rds, Replace...
263
  	ret = llist_del_first(&pool->clean_list);
6fa70da60   Chris Mason   rds: recycle FMRs...
264
  	if (ret)
1bc144b62   Huang Ying   net, rds, Replace...
265
  		ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
08b48a1ed   Andy Grover   RDS/IB: Implement...
266

6fa70da60   Chris Mason   rds: recycle FMRs...
267
268
  	clear_bit(CLEAN_LIST_BUSY_BIT, flag);
  	preempt_enable();
08b48a1ed   Andy Grover   RDS/IB: Implement...
269
270
  	return ibmr;
  }
6fa70da60   Chris Mason   rds: recycle FMRs...
271
272
273
274
275
276
277
278
279
280
281
  static inline void wait_clean_list_grace(void)
  {
  	int cpu;
  	unsigned long *flag;
  
  	for_each_online_cpu(cpu) {
  		flag = &per_cpu(clean_list_grace, cpu);
  		while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
  			cpu_relax();
  	}
  }
08b48a1ed   Andy Grover   RDS/IB: Implement...
282
283
284
285
286
  static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
  {
  	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
  	struct rds_ib_mr *ibmr = NULL;
  	int err = 0, iter = 0;
8576f374a   Chris Mason   RDS: flush fmrs b...
287
  	if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
c534a107e   Tejun Heo   rds/ib: use syste...
288
  		schedule_delayed_work(&pool->flush_worker, 10);
8576f374a   Chris Mason   RDS: flush fmrs b...
289

08b48a1ed   Andy Grover   RDS/IB: Implement...
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  	while (1) {
  		ibmr = rds_ib_reuse_fmr(pool);
  		if (ibmr)
  			return ibmr;
  
  		/* No clean MRs - now we have the choice of either
  		 * allocating a fresh MR up to the limit imposed by the
  		 * driver, or flush any dirty unused MRs.
  		 * We try to avoid stalling in the send path if possible,
  		 * so we allocate as long as we're allowed to.
  		 *
  		 * We're fussy with enforcing the FMR limit, though. If the driver
  		 * tells us we can't use more than N fmrs, we shouldn't start
  		 * arguing with it */
  		if (atomic_inc_return(&pool->item_count) <= pool->max_items)
  			break;
  
  		atomic_dec(&pool->item_count);
  
  		if (++iter > 2) {
  			rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted);
  			return ERR_PTR(-EAGAIN);
  		}
  
  		/* We do have some empty MRs. Flush them out. */
  		rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
6fa70da60   Chris Mason   rds: recycle FMRs...
316
317
318
  		rds_ib_flush_mr_pool(pool, 0, &ibmr);
  		if (ibmr)
  			return ibmr;
08b48a1ed   Andy Grover   RDS/IB: Implement...
319
  	}
e4c52c98e   Andy Grover   RDS/IB: add _to_n...
320
  	ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
08b48a1ed   Andy Grover   RDS/IB: Implement...
321
322
323
324
  	if (!ibmr) {
  		err = -ENOMEM;
  		goto out_no_cigar;
  	}
38a4e5e61   Chris Mason   rds: Use RCU for ...
325
  	memset(ibmr, 0, sizeof(*ibmr));
08b48a1ed   Andy Grover   RDS/IB: Implement...
326
327
328
  	ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
  			(IB_ACCESS_LOCAL_WRITE |
  			 IB_ACCESS_REMOTE_READ |
15133f6e6   Andy Grover   RDS: Implement at...
329
330
  			 IB_ACCESS_REMOTE_WRITE|
  			 IB_ACCESS_REMOTE_ATOMIC),
08b48a1ed   Andy Grover   RDS/IB: Implement...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
  			&pool->fmr_attr);
  	if (IS_ERR(ibmr->fmr)) {
  		err = PTR_ERR(ibmr->fmr);
  		ibmr->fmr = NULL;
  		printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)
  ", err);
  		goto out_no_cigar;
  	}
  
  	rds_ib_stats_inc(s_ib_rdma_mr_alloc);
  	return ibmr;
  
  out_no_cigar:
  	if (ibmr) {
  		if (ibmr->fmr)
  			ib_dealloc_fmr(ibmr->fmr);
  		kfree(ibmr);
  	}
  	atomic_dec(&pool->item_count);
  	return ERR_PTR(err);
  }
  
  static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
  	       struct scatterlist *sg, unsigned int nents)
  {
  	struct ib_device *dev = rds_ibdev->dev;
  	struct scatterlist *scat = sg;
  	u64 io_addr = 0;
  	u64 *dma_pages;
  	u32 len;
  	int page_cnt, sg_dma_len;
  	int i, j;
  	int ret;
  
  	sg_dma_len = ib_dma_map_sg(dev, sg, nents,
  				 DMA_BIDIRECTIONAL);
  	if (unlikely(!sg_dma_len)) {
  		printk(KERN_WARNING "RDS/IB: dma_map_sg failed!
  ");
  		return -EBUSY;
  	}
  
  	len = 0;
  	page_cnt = 0;
  
  	for (i = 0; i < sg_dma_len; ++i) {
  		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
  		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
a870d6272   Andy Grover   RDS/IB: Always us...
379
  		if (dma_addr & ~PAGE_MASK) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
380
381
382
383
384
  			if (i > 0)
  				return -EINVAL;
  			else
  				++page_cnt;
  		}
a870d6272   Andy Grover   RDS/IB: Always us...
385
  		if ((dma_addr + dma_len) & ~PAGE_MASK) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
386
387
388
389
390
391
392
393
  			if (i < sg_dma_len - 1)
  				return -EINVAL;
  			else
  				++page_cnt;
  		}
  
  		len += dma_len;
  	}
a870d6272   Andy Grover   RDS/IB: Always us...
394
  	page_cnt += len >> PAGE_SHIFT;
08b48a1ed   Andy Grover   RDS/IB: Implement...
395
396
  	if (page_cnt > fmr_message_size)
  		return -EINVAL;
e4c52c98e   Andy Grover   RDS/IB: add _to_n...
397
398
  	dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
  				 rdsibdev_to_node(rds_ibdev));
08b48a1ed   Andy Grover   RDS/IB: Implement...
399
400
401
402
403
404
405
  	if (!dma_pages)
  		return -ENOMEM;
  
  	page_cnt = 0;
  	for (i = 0; i < sg_dma_len; ++i) {
  		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
  		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
a870d6272   Andy Grover   RDS/IB: Always us...
406
  		for (j = 0; j < dma_len; j += PAGE_SIZE)
08b48a1ed   Andy Grover   RDS/IB: Implement...
407
  			dma_pages[page_cnt++] =
a870d6272   Andy Grover   RDS/IB: Always us...
408
  				(dma_addr & PAGE_MASK) + j;
08b48a1ed   Andy Grover   RDS/IB: Implement...
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
  	}
  
  	ret = ib_map_phys_fmr(ibmr->fmr,
  				   dma_pages, page_cnt, io_addr);
  	if (ret)
  		goto out;
  
  	/* Success - we successfully remapped the MR, so we can
  	 * safely tear down the old mapping. */
  	rds_ib_teardown_mr(ibmr);
  
  	ibmr->sg = scat;
  	ibmr->sg_len = nents;
  	ibmr->sg_dma_len = sg_dma_len;
  	ibmr->remap_count++;
  
  	rds_ib_stats_inc(s_ib_rdma_mr_used);
  	ret = 0;
  
  out:
  	kfree(dma_pages);
  
  	return ret;
  }
  
  void rds_ib_sync_mr(void *trans_private, int direction)
  {
  	struct rds_ib_mr *ibmr = trans_private;
  	struct rds_ib_device *rds_ibdev = ibmr->device;
  
  	switch (direction) {
  	case DMA_FROM_DEVICE:
  		ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
  			ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
  		break;
  	case DMA_TO_DEVICE:
  		ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
  			ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
  		break;
  	}
  }
  
  static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
  {
  	struct rds_ib_device *rds_ibdev = ibmr->device;
  
  	if (ibmr->sg_dma_len) {
  		ib_dma_unmap_sg(rds_ibdev->dev,
  				ibmr->sg, ibmr->sg_len,
  				DMA_BIDIRECTIONAL);
  		ibmr->sg_dma_len = 0;
  	}
  
  	/* Release the s/g list */
  	if (ibmr->sg_len) {
  		unsigned int i;
  
  		for (i = 0; i < ibmr->sg_len; ++i) {
  			struct page *page = sg_page(&ibmr->sg[i]);
  
  			/* FIXME we need a way to tell a r/w MR
  			 * from a r/o MR */
9e2effba2   Andy Grover   RDS: Fix BUG_ONs ...
471
  			BUG_ON(irqs_disabled());
08b48a1ed   Andy Grover   RDS/IB: Implement...
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
  			set_page_dirty(page);
  			put_page(page);
  		}
  		kfree(ibmr->sg);
  
  		ibmr->sg = NULL;
  		ibmr->sg_len = 0;
  	}
  }
  
  static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
  {
  	unsigned int pinned = ibmr->sg_len;
  
  	__rds_ib_teardown_mr(ibmr);
  	if (pinned) {
  		struct rds_ib_device *rds_ibdev = ibmr->device;
  		struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
  
  		atomic_sub(pinned, &pool->free_pinned);
  	}
  }
  
  static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
  {
  	unsigned int item_count;
  
  	item_count = atomic_read(&pool->item_count);
  	if (free_all)
  		return item_count;
  
  	return 0;
  }
  
  /*
1bc144b62   Huang Ying   net, rds, Replace...
507
   * given an llist of mrs, put them all into the list_head for more processing
6fa70da60   Chris Mason   rds: recycle FMRs...
508
   */
1bc144b62   Huang Ying   net, rds, Replace...
509
  static void llist_append_to_list(struct llist_head *llist, struct list_head *list)
6fa70da60   Chris Mason   rds: recycle FMRs...
510
511
  {
  	struct rds_ib_mr *ibmr;
1bc144b62   Huang Ying   net, rds, Replace...
512
513
514
515
516
517
518
  	struct llist_node *node;
  	struct llist_node *next;
  
  	node = llist_del_all(llist);
  	while (node) {
  		next = node->next;
  		ibmr = llist_entry(node, struct rds_ib_mr, llnode);
6fa70da60   Chris Mason   rds: recycle FMRs...
519
  		list_add_tail(&ibmr->unmap_list, list);
1bc144b62   Huang Ying   net, rds, Replace...
520
  		node = next;
6fa70da60   Chris Mason   rds: recycle FMRs...
521
522
523
524
  	}
  }
  
  /*
1bc144b62   Huang Ying   net, rds, Replace...
525
526
527
   * this takes a list head of mrs and turns it into linked llist nodes
   * of clusters.  Each cluster has linked llist nodes of
   * MR_CLUSTER_SIZE mrs that are ready for reuse.
6fa70da60   Chris Mason   rds: recycle FMRs...
528
   */
1bc144b62   Huang Ying   net, rds, Replace...
529
530
531
532
  static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
  				struct list_head *list,
  				struct llist_node **nodes_head,
  				struct llist_node **nodes_tail)
6fa70da60   Chris Mason   rds: recycle FMRs...
533
534
  {
  	struct rds_ib_mr *ibmr;
1bc144b62   Huang Ying   net, rds, Replace...
535
536
  	struct llist_node *cur = NULL;
  	struct llist_node **next = nodes_head;
6fa70da60   Chris Mason   rds: recycle FMRs...
537
538
  
  	list_for_each_entry(ibmr, list, unmap_list) {
1bc144b62   Huang Ying   net, rds, Replace...
539
540
541
  		cur = &ibmr->llnode;
  		*next = cur;
  		next = &cur->next;
6fa70da60   Chris Mason   rds: recycle FMRs...
542
  	}
1bc144b62   Huang Ying   net, rds, Replace...
543
544
  	*next = NULL;
  	*nodes_tail = cur;
6fa70da60   Chris Mason   rds: recycle FMRs...
545
546
547
  }
  
  /*
08b48a1ed   Andy Grover   RDS/IB: Implement...
548
549
550
551
552
   * Flush our pool of MRs.
   * At a minimum, all currently unused MRs are unmapped.
   * If the number of MRs allocated exceeds the limit, we also try
   * to free as many MRs as needed to get back to this limit.
   */
6fa70da60   Chris Mason   rds: recycle FMRs...
553
554
  static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
  			        int free_all, struct rds_ib_mr **ibmr_ret)
08b48a1ed   Andy Grover   RDS/IB: Implement...
555
556
  {
  	struct rds_ib_mr *ibmr, *next;
1bc144b62   Huang Ying   net, rds, Replace...
557
558
  	struct llist_node *clean_nodes;
  	struct llist_node *clean_tail;
08b48a1ed   Andy Grover   RDS/IB: Implement...
559
560
561
  	LIST_HEAD(unmap_list);
  	LIST_HEAD(fmr_list);
  	unsigned long unpinned = 0;
08b48a1ed   Andy Grover   RDS/IB: Implement...
562
563
564
565
  	unsigned int nfreed = 0, ncleaned = 0, free_goal;
  	int ret = 0;
  
  	rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
6fa70da60   Chris Mason   rds: recycle FMRs...
566
567
568
569
570
571
572
573
574
575
576
577
  	if (ibmr_ret) {
  		DEFINE_WAIT(wait);
  		while(!mutex_trylock(&pool->flush_lock)) {
  			ibmr = rds_ib_reuse_fmr(pool);
  			if (ibmr) {
  				*ibmr_ret = ibmr;
  				finish_wait(&pool->flush_wait, &wait);
  				goto out_nolock;
  			}
  
  			prepare_to_wait(&pool->flush_wait, &wait,
  					TASK_UNINTERRUPTIBLE);
1bc144b62   Huang Ying   net, rds, Replace...
578
  			if (llist_empty(&pool->clean_list))
6fa70da60   Chris Mason   rds: recycle FMRs...
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
  				schedule();
  
  			ibmr = rds_ib_reuse_fmr(pool);
  			if (ibmr) {
  				*ibmr_ret = ibmr;
  				finish_wait(&pool->flush_wait, &wait);
  				goto out_nolock;
  			}
  		}
  		finish_wait(&pool->flush_wait, &wait);
  	} else
  		mutex_lock(&pool->flush_lock);
  
  	if (ibmr_ret) {
  		ibmr = rds_ib_reuse_fmr(pool);
  		if (ibmr) {
  			*ibmr_ret = ibmr;
  			goto out;
  		}
  	}
08b48a1ed   Andy Grover   RDS/IB: Implement...
599

08b48a1ed   Andy Grover   RDS/IB: Implement...
600
  	/* Get the list of all MRs to be dropped. Ordering matters -
6fa70da60   Chris Mason   rds: recycle FMRs...
601
602
  	 * we want to put drop_list ahead of free_list.
  	 */
1bc144b62   Huang Ying   net, rds, Replace...
603
604
  	llist_append_to_list(&pool->drop_list, &unmap_list);
  	llist_append_to_list(&pool->free_list, &unmap_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
605
  	if (free_all)
1bc144b62   Huang Ying   net, rds, Replace...
606
  		llist_append_to_list(&pool->clean_list, &unmap_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
607
608
609
610
611
612
613
  
  	free_goal = rds_ib_flush_goal(pool, free_all);
  
  	if (list_empty(&unmap_list))
  		goto out;
  
  	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
6fa70da60   Chris Mason   rds: recycle FMRs...
614
  	list_for_each_entry(ibmr, &unmap_list, unmap_list)
08b48a1ed   Andy Grover   RDS/IB: Implement...
615
  		list_add(&ibmr->fmr->list, &fmr_list);
6fa70da60   Chris Mason   rds: recycle FMRs...
616

08b48a1ed   Andy Grover   RDS/IB: Implement...
617
618
619
620
621
622
  	ret = ib_unmap_fmr(&fmr_list);
  	if (ret)
  		printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)
  ", ret);
  
  	/* Now we can destroy the DMA mapping and unpin any pages */
6fa70da60   Chris Mason   rds: recycle FMRs...
623
  	list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
08b48a1ed   Andy Grover   RDS/IB: Implement...
624
625
626
627
  		unpinned += ibmr->sg_len;
  		__rds_ib_teardown_mr(ibmr);
  		if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
  			rds_ib_stats_inc(s_ib_rdma_mr_free);
6fa70da60   Chris Mason   rds: recycle FMRs...
628
  			list_del(&ibmr->unmap_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
629
630
631
632
633
634
  			ib_dealloc_fmr(ibmr->fmr);
  			kfree(ibmr);
  			nfreed++;
  		}
  		ncleaned++;
  	}
6fa70da60   Chris Mason   rds: recycle FMRs...
635
636
637
  	if (!list_empty(&unmap_list)) {
  		/* we have to make sure that none of the things we're about
  		 * to put on the clean list would race with other cpus trying
1bc144b62   Huang Ying   net, rds, Replace...
638
  		 * to pull items off.  The llist would explode if we managed to
6fa70da60   Chris Mason   rds: recycle FMRs...
639
  		 * remove something from the clean list and then add it back again
1bc144b62   Huang Ying   net, rds, Replace...
640
  		 * while another CPU was spinning on that same item in llist_del_first.
6fa70da60   Chris Mason   rds: recycle FMRs...
641
  		 *
1bc144b62   Huang Ying   net, rds, Replace...
642
  		 * This is pretty unlikely, but just in case  wait for an llist grace period
6fa70da60   Chris Mason   rds: recycle FMRs...
643
644
645
  		 * here before adding anything back into the clean list.
  		 */
  		wait_clean_list_grace();
1bc144b62   Huang Ying   net, rds, Replace...
646
  		list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
6fa70da60   Chris Mason   rds: recycle FMRs...
647
  		if (ibmr_ret)
1bc144b62   Huang Ying   net, rds, Replace...
648
  			*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
6fa70da60   Chris Mason   rds: recycle FMRs...
649

1bc144b62   Huang Ying   net, rds, Replace...
650
651
652
  		/* more than one entry in llist nodes */
  		if (clean_nodes->next)
  			llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
6fa70da60   Chris Mason   rds: recycle FMRs...
653
654
  
  	}
08b48a1ed   Andy Grover   RDS/IB: Implement...
655
656
657
658
659
660
661
  
  	atomic_sub(unpinned, &pool->free_pinned);
  	atomic_sub(ncleaned, &pool->dirty_count);
  	atomic_sub(nfreed, &pool->item_count);
  
  out:
  	mutex_unlock(&pool->flush_lock);
6fa70da60   Chris Mason   rds: recycle FMRs...
662
663
664
  	if (waitqueue_active(&pool->flush_wait))
  		wake_up(&pool->flush_wait);
  out_nolock:
08b48a1ed   Andy Grover   RDS/IB: Implement...
665
666
667
668
669
  	return ret;
  }
  
  static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
  {
7a0ff5dbd   Chris Mason   RDS: use delayed ...
670
  	struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
08b48a1ed   Andy Grover   RDS/IB: Implement...
671

6fa70da60   Chris Mason   rds: recycle FMRs...
672
  	rds_ib_flush_mr_pool(pool, 0, NULL);
08b48a1ed   Andy Grover   RDS/IB: Implement...
673
674
675
676
677
678
679
  }
  
  void rds_ib_free_mr(void *trans_private, int invalidate)
  {
  	struct rds_ib_mr *ibmr = trans_private;
  	struct rds_ib_device *rds_ibdev = ibmr->device;
  	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
08b48a1ed   Andy Grover   RDS/IB: Implement...
680
681
682
683
684
  
  	rdsdebug("RDS/IB: free_mr nents %u
  ", ibmr->sg_len);
  
  	/* Return it to the pool's free list */
08b48a1ed   Andy Grover   RDS/IB: Implement...
685
  	if (ibmr->remap_count >= pool->fmr_attr.max_maps)
1bc144b62   Huang Ying   net, rds, Replace...
686
  		llist_add(&ibmr->llnode, &pool->drop_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
687
  	else
1bc144b62   Huang Ying   net, rds, Replace...
688
  		llist_add(&ibmr->llnode, &pool->free_list);
08b48a1ed   Andy Grover   RDS/IB: Implement...
689
690
691
  
  	atomic_add(ibmr->sg_len, &pool->free_pinned);
  	atomic_inc(&pool->dirty_count);
08b48a1ed   Andy Grover   RDS/IB: Implement...
692
693
  
  	/* If we've pinned too many pages, request a flush */
f64f9e719   Joe Perches   net: Move && and ...
694
695
  	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
  	    atomic_read(&pool->dirty_count) >= pool->max_items / 10)
c534a107e   Tejun Heo   rds/ib: use syste...
696
  		schedule_delayed_work(&pool->flush_worker, 10);
08b48a1ed   Andy Grover   RDS/IB: Implement...
697
698
699
  
  	if (invalidate) {
  		if (likely(!in_interrupt())) {
6fa70da60   Chris Mason   rds: recycle FMRs...
700
  			rds_ib_flush_mr_pool(pool, 0, NULL);
08b48a1ed   Andy Grover   RDS/IB: Implement...
701
702
703
  		} else {
  			/* We get here if the user created a MR marked
  			 * as use_once and invalidate at the same time. */
c534a107e   Tejun Heo   rds/ib: use syste...
704
  			schedule_delayed_work(&pool->flush_worker, 10);
08b48a1ed   Andy Grover   RDS/IB: Implement...
705
706
  		}
  	}
3e0249f9c   Zach Brown   RDS/IB: add refco...
707
708
  
  	rds_ib_dev_put(rds_ibdev);
08b48a1ed   Andy Grover   RDS/IB: Implement...
709
710
711
712
713
  }
  
  void rds_ib_flush_mrs(void)
  {
  	struct rds_ib_device *rds_ibdev;
ea819867b   Zach Brown   RDS/IB: protect t...
714
  	down_read(&rds_ib_devices_lock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
715
716
717
718
  	list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
  		struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
  
  		if (pool)
6fa70da60   Chris Mason   rds: recycle FMRs...
719
  			rds_ib_flush_mr_pool(pool, 0, NULL);
08b48a1ed   Andy Grover   RDS/IB: Implement...
720
  	}
ea819867b   Zach Brown   RDS/IB: protect t...
721
  	up_read(&rds_ib_devices_lock);
08b48a1ed   Andy Grover   RDS/IB: Implement...
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
  }
  
  void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
  		    struct rds_sock *rs, u32 *key_ret)
  {
  	struct rds_ib_device *rds_ibdev;
  	struct rds_ib_mr *ibmr = NULL;
  	int ret;
  
  	rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
  	if (!rds_ibdev) {
  		ret = -ENODEV;
  		goto out;
  	}
  
  	if (!rds_ibdev->mr_pool) {
  		ret = -ENODEV;
  		goto out;
  	}
  
  	ibmr = rds_ib_alloc_fmr(rds_ibdev);
  	if (IS_ERR(ibmr))
  		return ibmr;
  
  	ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
  	if (ret == 0)
  		*key_ret = ibmr->fmr->rkey;
  	else
  		printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)
  ", ret);
  
  	ibmr->device = rds_ibdev;
3e0249f9c   Zach Brown   RDS/IB: add refco...
754
  	rds_ibdev = NULL;
08b48a1ed   Andy Grover   RDS/IB: Implement...
755
756
757
758
759
760
761
  
   out:
  	if (ret) {
  		if (ibmr)
  			rds_ib_free_mr(ibmr, 0);
  		ibmr = ERR_PTR(ret);
  	}
3e0249f9c   Zach Brown   RDS/IB: add refco...
762
763
  	if (rds_ibdev)
  		rds_ib_dev_put(rds_ibdev);
08b48a1ed   Andy Grover   RDS/IB: Implement...
764
765
  	return ibmr;
  }
6fa70da60   Chris Mason   rds: recycle FMRs...
766