Commit 7bf1a0d7385fd5b4cddbc623dbea39be5cc1145a

Authored by Peter Ujfalusi
Committed by Tero Kristo
1 parent 64fa8f4c2e

dmaengine: edma: Optimize memcpy operation

If the transfer is shorted then 64K we can complete it with one ACNT burst
by configuring ACNT to the length of the copy, this require one paRAM slot.
Otherwise we use two paRAM slots for the copy:
slot1: will copy (length / 32767) number of 32767 byte long blocks
slot2: will be configured to copy the remaining data.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Vignesh R <vigneshr@ti.com>

Showing 1 changed file with 70 additions and 20 deletions Side-by-side Diff

... ... @@ -328,18 +328,16 @@
328 328 */
329 329 static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
330 330 dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
331   - enum dma_slave_buswidth dev_width, unsigned int dma_length,
  331 + unsigned int acnt, unsigned int dma_length,
332 332 enum dma_transfer_direction direction)
333 333 {
334 334 struct edma_chan *echan = to_edma_chan(chan);
335 335 struct device *dev = chan->device->dev;
336 336 struct edmacc_param *param = &epset->param;
337   - int acnt, bcnt, ccnt, cidx;
  337 + int bcnt, ccnt, cidx;
338 338 int src_bidx, dst_bidx, src_cidx, dst_cidx;
339 339 int absync;
340 340  
341   - acnt = dev_width;
342   -
343 341 /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
344 342 if (!burst)
345 343 burst = 1;
346 344  
347 345  
348 346  
349 347  
350 348  
351 349  
352 350  
353 351  
... ... @@ -541,41 +539,93 @@
541 539 struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
542 540 size_t len, unsigned long tx_flags)
543 541 {
544   - int ret;
  542 + int ret, nslots;
545 543 struct edma_desc *edesc;
546 544 struct device *dev = chan->device->dev;
547 545 struct edma_chan *echan = to_edma_chan(chan);
548   - unsigned int width;
  546 + unsigned int width, pset_len;
549 547  
550 548 if (unlikely(!echan || !len))
551 549 return NULL;
552 550  
553   - edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC);
  551 + if (len < SZ_64K) {
  552 + /*
  553 + * Transfer size less than 64K can be handled with one paRAM
  554 + * slot. ACNT = length
  555 + */
  556 + width = len;
  557 + pset_len = len;
  558 + nslots = 1;
  559 + } else {
  560 + /*
  561 + * Transfer size bigger than 64K will be handled with maximum of
  562 + * two paRAM slots.
  563 + * slot1: ACNT = 32767, length1: (length / 32767)
  564 + * slot2: the remaining amount of data.
  565 + */
  566 + width = SZ_32K - 1;
  567 + pset_len = rounddown(len, width);
  568 + /* One slot is enough for lengths multiple of (SZ_32K -1) */
  569 + if (unlikely(pset_len == len))
  570 + nslots = 1;
  571 + else
  572 + nslots = 2;
  573 + }
  574 +
  575 + edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
  576 + GFP_ATOMIC);
554 577 if (!edesc) {
555 578 dev_dbg(dev, "Failed to allocate a descriptor\n");
556 579 return NULL;
557 580 }
558 581  
559   - edesc->pset_nr = 1;
  582 + edesc->pset_nr = nslots;
  583 + edesc->residue = edesc->residue_stat = len;
  584 + edesc->direction = DMA_MEM_TO_MEM;
  585 + edesc->echan = echan;
560 586  
561   - width = 1 << __ffs((src | dest | len));
562   - if (width > DMA_SLAVE_BUSWIDTH_64_BYTES)
563   - width = DMA_SLAVE_BUSWIDTH_64_BYTES;
564   -
565 587 ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
566   - width, len, DMA_MEM_TO_MEM);
567   - if (ret < 0)
  588 + width, pset_len, DMA_MEM_TO_MEM);
  589 + if (ret < 0) {
  590 + kfree(edesc);
568 591 return NULL;
  592 + }
569 593  
570 594 edesc->absync = ret;
571 595  
572   - /*
573   - * Enable intermediate transfer chaining to re-trigger channel
574   - * on completion of every TR, and enable transfer-completion
575   - * interrupt on completion of the whole transfer.
576   - */
577 596 edesc->pset[0].param.opt |= ITCCHEN;
578   - edesc->pset[0].param.opt |= TCINTEN;
  597 + if (nslots == 1) {
  598 + /* Enable transfer complete interrupt */
  599 + edesc->pset[0].param.opt |= TCINTEN;
  600 + } else {
  601 + /* Enable transfer complete chaining for the first slot */
  602 + edesc->pset[0].param.opt |= TCCHEN;
  603 +
  604 + if (echan->slot[1] < 0) {
  605 + echan->slot[1] =
  606 + edma_alloc_slot(EDMA_CTLR(echan->ch_num),
  607 + EDMA_SLOT_ANY);
  608 + if (echan->slot[1] < 0) {
  609 + kfree(edesc);
  610 + dev_err(dev, "%s: Failed to allocate slot\n",
  611 + __func__);
  612 + return NULL;
  613 + }
  614 + }
  615 + dest += pset_len;
  616 + src += pset_len;
  617 + pset_len = width = len % (SZ_32K - 1);
  618 +
  619 + ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1,
  620 + width, pset_len, DMA_MEM_TO_MEM);
  621 + if (ret < 0) {
  622 + kfree(edesc);
  623 + return NULL;
  624 + }
  625 +
  626 + edesc->pset[1].param.opt |= ITCCHEN;
  627 + edesc->pset[1].param.opt |= TCINTEN;
  628 + }
579 629  
580 630 return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
581 631 }