Here's a snippet of code that uses maple DMA to do memcpy() and memset() for 32-bit int's. All of the DMA channels support MEM2MEM. Testing copying 1000 32-bit words took 107us (299mbs) with DMA and 348us (92 mbs) with a for-loop. Setting 1000 32-bit words with a 32-bit value took 94us (340mbs) with DMA and 223us (144mbs) with a for-loop. A C++ implementation could use overloaded methods.
#define DMAn DMA1
#define DMA_CHn DMA_CH1
volatile static bool DMADONE;
void dma_isr(){
dma_detach_interrupt(DMAn, DMA_CHn);
DMADONE = true;
}
void memcpy32(uint32_t *dst, uint32_t *src, uint32_t n) {
DMADONE = false;
dma_init(DMAn);
dma_setup_transfer( DMAn, DMA_CHn,
dst, DMA_SIZE_32BITS,
src, DMA_SIZE_32BITS,
( DMA_FROM_MEM | DMA_MEM_2_MEM
| DMA_MINC_MODE | DMA_PINC_MODE
| DMA_TRNS_CMPLT //tell me at the end
)
);
dma_attach_interrupt(DMAn, DMA_CHn, dma_isr);
dma_set_priority(DMAn, DMA_CHn, DMA_PRIORITY_VERY_HIGH);
dma_set_num_transfers(DMAn, DMA_CHn, n);
dma_enable(DMAn, DMA_CHn); //enable it..
while(!DMADONE);
dma_disable(DMAn, DMA_CHn);
}
void memset32(uint32_t *dst, uint32_t word, uint32_t n) {
DMADONE = false;
dma_init(DMAn);
dma_setup_transfer( DMAn, DMA_CHn,
dst, DMA_SIZE_32BITS,
&word, DMA_SIZE_32BITS,
( DMA_FROM_MEM | DMA_MEM_2_MEM
| DMA_PINC_MODE
| DMA_TRNS_CMPLT //tell me at the end
)
);
dma_attach_interrupt(DMAn, DMA_CHn, dma_isr);
dma_set_priority(DMAn, DMA_CHn, DMA_PRIORITY_VERY_HIGH);
dma_set_num_transfers(DMAn, DMA_CHn, n);
dma_enable(DMAn, DMA_CHn); //enable it..
while(!DMADONE);
dma_disable(DMAn, DMA_CHn);
}