Here is an example source for 32F417 SPI and DMA.
/*
*
* DMA-only version of HAL_SPI2_TransmitReceive() but fixed for SPI3 and xx-only options added.
*
* For use where fast transfers are needed, on the limit of SPI3 speed so with zero gaps. This is impossible
* to do by polling at 10.5mbps or 21mbps and is probably marginal at 5.25mbps. The 16 bit SPI mode just
* manages gap-free with polling but works only with even block sizes, and has the "first byte problem" which
* DMA gets around.
*
* ** DMA ONLY SO NO CCM ACCESS SO THE TWO BUFFERS HAVE TO BE "STATIC" **
*
* This function is blocking so the caller can set CS=1 right away (check device data sheet!). A non-blocking
* version would make sense only if transmitting only (txonly=true) but the caller would have to tidy up
* the DMA and SPI3->CR2.
*
* _spi3_set_mode() can still be used to set up the clock speed, phase, etc; this function does nothing
* to SPI3 other than to enable/disable DMA. It does however make sense only if SPI3 is a Master.
*
* Two modes, obviously mutually exclusive, for tx-only and rx-only:
* If txonly, dumps rx data so you don't need to allocate a buffer for it
* If rxonly, transmits all-0x00 so you don't need to feed SPI with some "known garbage"
*
* The rx-only mode is superfluous in most cases but it does avoid shifting non-0x00 data to the device
* while we are reading data out of it. With some devices this can matter. The ADS1118 ADC is one such.
*
* Because this function needs to work with _spi3_set_mode(), care is taken to not modify the SPI config.
*
* The yield parameter just releases control to RTOS, so equal-priority tasks don't have to wait until DMA
* finished. This might be useful for long/slow DMA transfers. Usually not useful.
* 31/8/22 if true, worsens corruption of incoming USB VCP data, a lot more than it is corrupted
* anyway!
*
* Returns (false) if memory is in CCM, size=0, null pointers...
*
* NULL pointers are allowed if using the txonly or rxonly modes; then the unused one can be NULL.
*
*
*/
bool SPI3_DMA_TransmitReceive(uint8_t *pTxData, uint8_t *pRxData, uint16_t Size, bool txonly, bool rxonly, bool yield)
{
// Check for invalid inputs
if ( Size==0 ) return (false);
if ( (pTxData==NULL) && !rxonly ) return (false);
if ( (pRxData==NULL) && !txonly ) return (false);
uint32_t txadd = (uint32_t) pTxData;
uint32_t rxadd = (uint32_t) pRxData;
static uint8_t txonly_target=0; // must not be in CCM
static uint8_t rxonly_source=0; // must not be in CCM
// Test for CCMRAM (rw) : ORIGIN = 0x10000000, LENGTH = 64K (linkerscript.ld)
if ( ((txadd>=0x10000000) && (txadd<0x10010000)) || ((rxadd>=0x10000000) && (rxadd<0x10010000)) )
{
return(false);
}
// DMA ch 1 clock enable already done in b_main.c
//RCC->AHB1ENR |= (1u << 21); // DMA1EN=1 - DMA1 clock enable
//hang_around_us(1); // give it a chance to wake up
SPI3->CR1 &= ~SPI_CR1_SPE; // disable SPI
// DMA1 Ch 0 Stream 0 is SPI3 RX
DMA1_Stream0->CR = 0; // disable DMA so all regs can be written
// Wait until DMA clears the EN bit by itself.
// Should be pointless since the previous transfer waited for end of transfer (hardware bug)
//while ( (DMA1_Stream0->CR & DMA_SxCR_EN) != 0) {};
DMA1->LIFCR = (0x03d << 0); // clear int flags & transfer complete - 111101 stream 0
DMA1_Stream0->NDTR = Size;
if (txonly)
{
DMA1_Stream0->M0AR = (uint32_t) &txonly_target; // memory address to dump rx data to
}
else
{
DMA1_Stream0->M0AR = rxadd; // memory address in normal mode
}
DMA1_Stream0->PAR = (uint32_t) &(SPI3->DR); // peripheral address
DMA1_Stream0->FCR = 0; // direct mode
if (txonly)
{
DMA1_Stream0->CR = 0 << 25 // CHSEL: ch 0
| 0 << 23 // MBURST: memory burst - single transfer
| 0 << 21 // PBURST: peripheral burst - single transfer
| 3 << 16 // PL: highest priority
| 0 << 15 // PINCOS: no peripheral address increment offset
| 0 << 13 // MSIZE: memory data size: byte
| 0 << 11 // PSIZE: peripheral data size: byte
| 0 << 10 // MINC: memory address increment: 0
| 0 << 9 // PINC: peripheral address increment: 0
| 0 << 8 // CIRC: no circular mode
| 0 << 6 // DIR: peripheral to memory
| 0 << 5 // PFCTRL: DMA is flow controller
| 1 << 0; // EN: enable stream
}
else
{
DMA1_Stream0->CR = 0 << 25 // CHSEL: ch 0
| 0 << 23 // MBURST: memory burst - single transfer
| 0 << 21 // PBURST: peripheral burst - single transfer
| 3 << 16 // PL: highest priority
| 0 << 15 // PINCOS: no peripheral address increment offset
| 0 << 13 // MSIZE: memory data size: byte
| 0 << 11 // PSIZE: peripheral data size: byte
| 1 << 10 // MINC: memory address increment: 1
| 0 << 9 // PINC: peripheral address increment: 0
| 0 << 8 // CIRC: no circular mode
| 0 << 6 // DIR: peripheral to memory
| 0 << 5 // PFCTRL: DMA is flow controller
| 1 << 0; // EN: enable stream
}
// DMA1 Ch 0 Stream 7 is SPI3 TX
DMA1_Stream7->CR = 0; // disable DMA so all regs can be written
// Wait until DMA clears the EN bit by itself.
// Should be pointless since the previous transfer waited for end of transfer (hardware bug)
//while ( (DMA1_Stream7->CR & DMA_SxCR_EN) != 0) {};
DMA1->HIFCR = (0x03d << 22); // clear int flags & transfer complete - 111101 stream 7
DMA1_Stream7->NDTR = Size;
if (rxonly)
{
DMA1_Stream7->M0AR = (uint32_t) &rxonly_source; // memory address to fetch dummy tx data from
}
else
{
DMA1_Stream7->M0AR = txadd; // memory address in normal mode
}
DMA1_Stream7->PAR = (uint32_t) &(SPI3->DR); // peripheral address
DMA1_Stream7->FCR = 0; // direct mode
if (rxonly)
{
DMA1_Stream7->CR = 0 << 25 // CHSEL: ch 0
| 0 << 23 // MBURST: memory burst - single transfer
| 0 << 21 // PBURST: peripheral burst - single transfer
| 0 << 16 // PL: priority low
| 0 << 15 // PINCOS: no peripheral address increment offset
| 0 << 13 // MSIZE: memory data size: byte
| 0 << 11 // PSIZE: peripheral data size: byte
| 0 << 10 // MINC: memory address increment: 0
| 0 << 9 // PINC: peripheral address increment: 0
| 0 << 8 // CIRC: no circular mode
| 1 << 6 // DIR: memory to peripheral
| 0 << 5 // PFCTRL: DMA is flow controller
| 1 << 0; // EN: enable stream
}
else
{
DMA1_Stream7->CR = 0 << 25 // CHSEL: ch 0
| 0 << 23 // MBURST: memory burst - single transfer
| 0 << 21 // PBURST: peripheral burst - single transfer
| 0 << 16 // PL: priority low
| 0 << 15 // PINCOS: no peripheral address increment offset
| 0 << 13 // MSIZE: memory data size: byte
| 0 << 11 // PSIZE: peripheral data size: byte
| 1 << 10 // MINC: memory address increment: 1
| 0 << 9 // PINC: peripheral address increment: 0
| 0 << 8 // CIRC: no circular mode
| 1 << 6 // DIR: memory to peripheral
| 0 << 5 // PFCTRL: DMA is flow controller
| 1 << 0; // EN: enable stream
}
// Config SPI3 to let DMA handle the data. These need to be cleared when transfer complete!
// This starts the transfer
SPI3->CR2 |= 3; // TXDMAEN, RXDMAEN: 11 - both set in one go
SPI3->CR1 |= (1<<6); // SPE=1 enable SPI
// Wait for DMA to finish. Blocking is necessary to prevent device CS=1 too early.
// There could be a timeout here but a failure is impossible short of duff silicon, because
// we are a Master and generating the SPI clock.
// 20/11/22: A primitive timeout has been implemented anyway.
volatile uint32_t limit = 0;
while(true)
{
limit--;
if (limit==0) break;
// Either end-transfer detection method below works but the NDTR may be less reliable
// http://efton.sk/STM32/gotcha/g20.html
// [url]https://www.eevblog.com/forum/microcontrollers/32f4-3-ways-to-detect-end-of-dma-transfer/[/url]
#if 0
uint16_t temp1;
temp1 = DMA1_Stream0->NDTR;
if ( temp1 == 0 ) break; // transfer count = 0
#else
uint32_t temp2;
temp2 = DMA1->LISR;
if ( (temp2 & (1<<5)) !=0 ) break; // TCIF0
#endif
if (yield) osDelay(1); // release to RTOS (see notes in comments)
// Third method of detecting end of transfer: wait for DMA to set EN to 0.
// All 3 methods produce the same time from last SPI clock to CS=1 (1.8us).
//if ( (DMA1_Stream0->CR & DMA_SxCR_EN) == 0) break;
}
SPI3->CR2 &= ~3; // TXDMAEN, RXDMAEN: 00 - both cleared in one go
// Clear int pending flags. They get cleared at the top of this function anyway, but...
DMA1->LIFCR = (0x03d << 0); // clear int flags & transfer complete - 111101 stream 0
DMA1->HIFCR = (0x03d << 22); // clear int flags & transfer complete - 111101 stream 7
// Clear any rx data and the overrun flag in case not all received data was read
// A small delay is needed because the very last transfer may not propagate to DR.
// Not needed because RX DMA has read out everything
hang_around_us(1);
SPI3->DR;
SPI3->DR;
SPI3->SR;
SPI3->CR1 &= ~(1<<6); // SPE=0 disable SPI
return (true);
}