Commit 04ce9ab385dc97eb55299d533cd3af79b8fc7529

Authored by Dan Williams
1 parent a08abd8ca8

async_xor: permit callers to pass in a 'dma/page scribble' region

async_xor() needs space to perform dma and page address conversions.  In
most cases the code can simply reuse the struct page * array because the
size of the native pointer matches the size of a dma/page address.  In
order to support archs where sizeof(dma_addr_t) is larger than
sizeof(struct page *), or to preserve the input parameters, we utilize a
memory region passed in by the caller.

Since the code is now prepared to handle the case where it cannot
perform address conversions on the stack, we no longer need the
!HIGHMEM64G dependency in drivers/dma/Kconfig.

[ Impact: don't clobber input buffers for address conversions ]

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Showing 3 changed files with 58 additions and 47 deletions Side-by-side Diff

Documentation/crypto/async-tx-api.txt
... ... @@ -115,29 +115,42 @@
115 115 Perform a xor->copy->xor operation where each operation depends on the
116 116 result from the previous operation:
117 117  
118   -void complete_xor_copy_xor(void *param)
  118 +void callback(void *param)
119 119 {
120   - printk("complete\n");
  120 + struct completion *cmp = param;
  121 +
  122 + complete(cmp);
121 123 }
122 124  
123   -int run_xor_copy_xor(struct page **xor_srcs,
124   - int xor_src_cnt,
125   - struct page *xor_dest,
126   - size_t xor_len,
127   - struct page *copy_src,
128   - struct page *copy_dest,
129   - size_t copy_len)
  125 +void run_xor_copy_xor(struct page **xor_srcs,
  126 + int xor_src_cnt,
  127 + struct page *xor_dest,
  128 + size_t xor_len,
  129 + struct page *copy_src,
  130 + struct page *copy_dest,
  131 + size_t copy_len)
130 132 {
131 133 struct dma_async_tx_descriptor *tx;
  134 + addr_conv_t addr_conv[xor_src_cnt];
  135 + struct async_submit_ctl submit;
  136 + addr_conv_t addr_conv[NDISKS];
  137 + struct completion cmp;
132 138  
133   - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
134   - ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
135   - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL);
136   - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
137   - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK,
138   - tx, complete_xor_copy_xor, NULL);
  139 + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
  140 + addr_conv);
  141 + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
139 142  
  143 + submit->depend_tx = tx;
  144 + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
  145 +
  146 + init_completion(&cmp);
  147 + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
  148 + callback, &cmp, addr_conv);
  149 + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
  150 +
140 151 async_tx_issue_pending_all();
  152 +
  153 + wait_for_completion(&cmp);
141 154 }
142 155  
143 156 See include/linux/async_tx.h for more information on the flags. See the
crypto/async_tx/async_xor.c
... ... @@ -33,11 +33,10 @@
33 33 /* do_async_xor - dma map the pages and perform the xor with an engine */
34 34 static __async_inline struct dma_async_tx_descriptor *
35 35 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
36   - unsigned int offset, int src_cnt, size_t len,
  36 + unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
37 37 struct async_submit_ctl *submit)
38 38 {
39 39 struct dma_device *dma = chan->device;
40   - dma_addr_t *dma_src = (dma_addr_t *) src_list;
41 40 struct dma_async_tx_descriptor *tx = NULL;
42 41 int src_off = 0;
43 42 int i;
44 43  
... ... @@ -125,9 +124,14 @@
125 124 int xor_src_cnt;
126 125 int src_off = 0;
127 126 void *dest_buf;
128   - void **srcs = (void **) src_list;
  127 + void **srcs;
129 128  
130   - /* reuse the 'src_list' array to convert to buffer pointers */
  129 + if (submit->scribble)
  130 + srcs = submit->scribble;
  131 + else
  132 + srcs = (void **) src_list;
  133 +
  134 + /* convert to buffer pointers */
131 135 for (i = 0; i < src_cnt; i++)
132 136 srcs[i] = page_address(src_list[i]) + offset;
133 137  
134 138  
135 139  
136 140  
... ... @@ -178,17 +182,26 @@
178 182 struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
179 183 &dest, 1, src_list,
180 184 src_cnt, len);
  185 + dma_addr_t *dma_src = NULL;
  186 +
181 187 BUG_ON(src_cnt <= 1);
182 188  
183   - if (chan) {
  189 + if (submit->scribble)
  190 + dma_src = submit->scribble;
  191 + else if (sizeof(dma_addr_t) <= sizeof(struct page *))
  192 + dma_src = (dma_addr_t *) src_list;
  193 +
  194 + if (dma_src && chan) {
184 195 /* run the xor asynchronously */
185 196 pr_debug("%s (async): len: %zu\n", __func__, len);
186 197  
187 198 return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
188   - submit);
  199 + dma_src, submit);
189 200 } else {
190 201 /* run the xor synchronously */
191 202 pr_debug("%s (sync): len: %zu\n", __func__, len);
  203 + WARN_ONCE(chan, "%s: no space for dma address conversion\n",
  204 + __func__);
192 205  
193 206 /* in the sync case the dest is an implied source
194 207 * (assumes the dest is the first source)
195 208  
... ... @@ -241,11 +254,16 @@
241 254 src_cnt, len);
242 255 struct dma_device *device = chan ? chan->device : NULL;
243 256 struct dma_async_tx_descriptor *tx = NULL;
  257 + dma_addr_t *dma_src = NULL;
244 258  
245 259 BUG_ON(src_cnt <= 1);
246 260  
247   - if (device && src_cnt <= device->max_xor) {
248   - dma_addr_t *dma_src = (dma_addr_t *) src_list;
  261 + if (submit->scribble)
  262 + dma_src = submit->scribble;
  263 + else if (sizeof(dma_addr_t) <= sizeof(struct page *))
  264 + dma_src = (dma_addr_t *) src_list;
  265 +
  266 + if (dma_src && device && src_cnt <= device->max_xor) {
249 267 unsigned long dma_prep_flags;
250 268 int i;
251 269  
... ... @@ -275,6 +293,9 @@
275 293 enum async_tx_flags flags_orig = submit->flags;
276 294  
277 295 pr_debug("%s: (sync) len: %zu\n", __func__, len);
  296 + WARN_ONCE(device && src_cnt <= device->max_xor,
  297 + "%s: no space for dma address conversion\n",
  298 + __func__);
278 299  
279 300 submit->flags |= ASYNC_TX_XOR_DROP_DST;
280 301 submit->flags &= ~ASYNC_TX_ACK;
... ... @@ -292,29 +313,6 @@
292 313 return tx;
293 314 }
294 315 EXPORT_SYMBOL_GPL(async_xor_val);
295   -
296   -static int __init async_xor_init(void)
297   -{
298   - #ifdef CONFIG_DMA_ENGINE
299   - /* To conserve stack space the input src_list (array of page pointers)
300   - * is reused to hold the array of dma addresses passed to the driver.
301   - * This conversion is only possible when dma_addr_t is less than the
302   - * the size of a pointer. HIGHMEM64G is known to violate this
303   - * assumption.
304   - */
305   - BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
306   - #endif
307   -
308   - return 0;
309   -}
310   -
311   -static void __exit async_xor_exit(void)
312   -{
313   - do { } while (0);
314   -}
315   -
316   -module_init(async_xor_init);
317   -module_exit(async_xor_exit);
318 316  
319 317 MODULE_AUTHOR("Intel Corporation");
320 318 MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
... ... @@ -4,7 +4,7 @@
4 4  
5 5 menuconfig DMADEVICES
6 6 bool "DMA Engine support"
7   - depends on !HIGHMEM64G && HAS_DMA
  7 + depends on HAS_DMA
8 8 help
9 9 DMA engines can do asynchronous data transfers without
10 10 involving the host CPU. Currently, this framework can be