From 3165c680d63c1f2307964b3dad1e17e4261a49ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 24 May 2021 20:06:31 +0200 Subject: [PATCH] dma: don't swap buffers * This uses a new closure-based method to the DMA HAL implementation which gives access to the inactive buffer directly. * It removes changing addresses, the third buffer for DBM, the inactive address poisoning, and allows the cancellation of the redundant repeat memory barriers and compiler fences. * This is now around 20 instructions per buffer down from about 100 cycles before. * Also introduces a new `SampleBuffer` type alias. * The required unpacking of the resources structure is a bit annoying but could probably abstraced away. TODO: * Test * Adapt `lockin` --- Cargo.lock | 3 +- Cargo.toml | 4 +- src/bin/dual-iir.rs | 100 ++++++++++++++++++------------ src/hardware/adc.rs | 65 +++++++------------ src/hardware/dac.rs | 43 +++++-------- src/hardware/design_parameters.rs | 2 + 6 files changed, 107 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 274b2bb..49ac680 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -802,8 +802,7 @@ dependencies = [ [[package]] name = "stm32h7xx-hal" version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67034b80041bc33a48df1c1c435b6ae3bb18c35e42aa7e702ce8363b96793398" +source = "git+https://github.com/quartiq/stm32h7xx-hal.git?rev=cca4ecc#cca4ecc3e0cc8cb2f7a9652c4099d50b44977493" dependencies = [ "bare-metal 1.0.0", "cast", diff --git a/Cargo.toml b/Cargo.toml index 790363a..0d84c27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,7 +56,9 @@ rev = "523d71d" [dependencies.stm32h7xx-hal] features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"] -version = "0.9.0" +# version = "0.9.0" +git = "https://github.com/quartiq/stm32h7xx-hal.git" +rev = "cca4ecc" [patch.crates-io.miniconf] git = "https://github.com/quartiq/miniconf.git" diff --git a/src/bin/dual-iir.rs b/src/bin/dual-iir.rs index 29eb474..dec0697 100644 --- a/src/bin/dual-iir.rs +++ b/src/bin/dual-iir.rs @@ -2,6 +2,8 @@ #![no_std] #![no_main] +use core::sync::atomic::{fence, Ordering}; + use stabilizer::{hardware, net}; use miniconf::Miniconf; @@ -120,51 +122,73 @@ const APP: () = { /// Because the ADC and DAC operate at the same rate, these two constraints actually implement /// the same time bounds, meeting one also means the other is also met. #[task(binds=DMA1_STR4, resources=[adcs, digital_inputs, dacs, iir_state, settings, telemetry], priority=2)] - fn process(c: process::Context) { - let adc_samples = [ - c.resources.adcs.0.acquire_buffer(), - c.resources.adcs.1.acquire_buffer(), - ]; + fn process(mut c: process::Context) { + let adc0 = &mut c.resources.adcs.0; + let adc1 = &mut c.resources.adcs.1; + let dac0 = &mut c.resources.dacs.0; + let dac1 = &mut c.resources.dacs.1; + let di = &c.resources.digital_inputs; + let settings = &c.resources.settings; + let iir_state = &mut c.resources.iir_state; + let telemetry = &mut c.resources.telemetry; + adc0.with_buffer(|a0| { + adc1.with_buffer(|a1| { + dac0.with_buffer(|d0| { + dac1.with_buffer(|d1| { + let adc_samples = [a0, a1]; + let dac_samples = [d0, d1]; - let dac_samples = [ - c.resources.dacs.0.acquire_buffer(), - c.resources.dacs.1.acquire_buffer(), - ]; + let digital_inputs = + [di.0.is_high().unwrap(), di.1.is_high().unwrap()]; - let digital_inputs = [ - c.resources.digital_inputs.0.is_high().unwrap(), - c.resources.digital_inputs.1.is_high().unwrap(), - ]; + let hold = settings.force_hold + || (digital_inputs[1] && settings.allow_hold); - let hold = c.resources.settings.force_hold - || (digital_inputs[1] && c.resources.settings.allow_hold); + // Preserve instruction and data ordering w.r.t. DMA flag access. + fence(Ordering::SeqCst); - for channel in 0..adc_samples.len() { - for sample in 0..adc_samples[0].len() { - let mut y = f32::from(adc_samples[channel][sample] as i16); - for i in 0..c.resources.iir_state[channel].len() { - y = c.resources.settings.iir_ch[channel][i].update( - &mut c.resources.iir_state[channel][i], - y, - hold, - ); - } - // Note(unsafe): The filter limits ensure that the value is in range. - // The truncation introduces 1/2 LSB distortion. - let y = unsafe { y.to_int_unchecked::() }; - // Convert to DAC code - dac_samples[channel][sample] = DacCode::from(y).0; - } - } + for channel in 0..adc_samples.len() { + adc_samples[channel] + .iter() + .zip(dac_samples[channel].iter_mut()) + .map(|(ai, di)| { + let x = f32::from(*ai as i16); + let y = settings.iir_ch[channel] + .iter() + .zip(iir_state[channel].iter_mut()) + .fold(x, |yi, (ch, state)| { + ch.update(state, yi, hold) + }); + // Note(unsafe): The filter limits must ensure that + // the value is in range. + // The truncation introduces 1/2 LSB distortion. + let y: i16 = + unsafe { y.to_int_unchecked() }; + // Convert to DAC code + *di = DacCode::from(y).0; + }) + .last(); + } - // Update telemetry measurements. - c.resources.telemetry.adcs = - [AdcCode(adc_samples[0][0]), AdcCode(adc_samples[1][0])]; + // Update telemetry measurements. + telemetry.adcs = [ + AdcCode(adc_samples[0][0]), + AdcCode(adc_samples[1][0]), + ]; - c.resources.telemetry.dacs = - [DacCode(dac_samples[0][0]), DacCode(dac_samples[1][0])]; + telemetry.dacs = [ + DacCode(dac_samples[0][0]), + DacCode(dac_samples[1][0]), + ]; - c.resources.telemetry.digital_inputs = digital_inputs; + telemetry.digital_inputs = digital_inputs; + + // Preserve instruction and data ordering w.r.t. DMA flag access. + fence(Ordering::SeqCst); + }) + }) + }) + }); } #[idle(resources=[network], spawn=[settings_update])] diff --git a/src/hardware/adc.rs b/src/hardware/adc.rs index 9289097..101d762 100644 --- a/src/hardware/adc.rs +++ b/src/hardware/adc.rs @@ -29,15 +29,9 @@ ///! available. When enough samples have been collected, a transfer-complete interrupt is generated ///! and the ADC samples are available for processing. ///! -///! The SPI peripheral internally has an 8- or 16-byte TX and RX FIFO, which corresponds to a 4- or -///! 8-sample buffer for incoming ADC samples. During the handling of the DMA transfer completion, -///! there is a small window where buffers are swapped over where it's possible that a sample could -///! be lost. In order to avoid this, the SPI RX FIFO is effectively used as a "sample overflow" -///! region and can buffer a number of samples until the next DMA transfer is configured. If a DMA -///! transfer is still not set in time, the SPI peripheral will generate an input-overrun interrupt. -///! This interrupt then serves as a means of detecting if samples have been lost, which will occur -///! whenever data processing takes longer than the collection period. -///! +///! After a complete transfer of a batch of samples, the inactive buffer is available to the +///! user for processing. The processing must complete before the DMA transfer of the next batch +///! completes. ///! ///! ## Starting Data Collection ///! @@ -68,13 +62,12 @@ ///! sample DMA requests, which can be completed by setting e.g. ADC0's comparison to a counter ///! value of 0 and ADC1's comparison to a counter value of 1. ///! -///! In this implementation, single buffer mode DMA transfers are used because the SPI RX FIFO can -///! be used as a means to both detect and buffer ADC samples during the buffer swap-over. Because -///! of this, double-buffered mode does not offer any advantages over single-buffered mode (unless -///! double-buffered mode offers less overhead due to the DMA disable/enable procedure). +///! In this implementation, double buffer mode DMA transfers are used because the SPI RX FIFOs +///! have finite depth, FIFO access is slower than AXISRAM access, and because the single +///! buffer mode DMA disable/enable and buffer update sequence is slow. use stm32h7xx_hal as hal; -use super::design_parameters::SAMPLE_BUFFER_SIZE; +use super::design_parameters::{SampleBuffer, SAMPLE_BUFFER_SIZE}; use super::timers; use hal::dma::{ @@ -119,8 +112,7 @@ static mut SPI_EOT_CLEAR: [u32; 1] = [0x00]; // processed). Note that the contents of AXI SRAM is uninitialized, so the buffer contents on // startup are undefined. The dimensions are `ADC_BUF[adc_index][ping_pong_index][sample_index]`. #[link_section = ".axisram.buffers"] -static mut ADC_BUF: [[[u16; SAMPLE_BUFFER_SIZE]; 2]; 2] = - [[[0; SAMPLE_BUFFER_SIZE]; 2]; 2]; +static mut ADC_BUF: [[SampleBuffer; 2]; 2] = [[[0; SAMPLE_BUFFER_SIZE]; 2]; 2]; macro_rules! adc_input { ($name:ident, $index:literal, $trigger_stream:ident, $data_stream:ident, $clear_stream:ident, @@ -192,12 +184,11 @@ macro_rules! adc_input { /// Represents data associated with ADC. pub struct $name { - next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>, transfer: Transfer< hal::dma::dma::$data_stream, hal::spi::Spi, PeripheralToMemory, - &'static mut [u16; SAMPLE_BUFFER_SIZE], + &'static mut SampleBuffer, hal::dma::DBTransfer, >, trigger_transfer: Transfer< @@ -316,6 +307,7 @@ macro_rules! adc_input { // data stream is used to trigger a transfer completion interrupt. let data_config = DmaConfig::default() .memory_increment(true) + .double_buffer(true) .transfer_complete_interrupt($index == 1) .priority(Priority::VeryHigh); @@ -333,17 +325,14 @@ macro_rules! adc_input { Transfer::init( data_stream, spi, - // Note(unsafe): The ADC_BUF[$index][0] is "owned" by this peripheral. + // Note(unsafe): The ADC_BUF[$index] is "owned" by this peripheral. // It shall not be used anywhere else in the module. unsafe { &mut ADC_BUF[$index][0] }, - None, + unsafe { Some(&mut ADC_BUF[$index][1]) }, data_config, ); Self { - // Note(unsafe): The ADC_BUF[$index][1] is "owned" by this peripheral. It - // shall not be used anywhere else in the module. - next_buffer: unsafe { Some(&mut ADC_BUF[$index][1]) }, transfer: data_transfer, trigger_transfer, clear_transfer, @@ -364,27 +353,17 @@ macro_rules! adc_input { } - /// Obtain a buffer filled with ADC samples. + /// Wait for the transfer of the currently active buffer to complete, + /// then call a function on the now inactive buffer and acknowledge the + /// transfer complete flag. /// - /// # Returns - /// A reference to the underlying buffer that has been filled with ADC samples. - pub fn acquire_buffer(&mut self) -> &[u16; SAMPLE_BUFFER_SIZE] { - // Wait for the transfer to fully complete before continuing. Note: If a device - // hangs up, check that this conditional is passing correctly, as there is no - // time-out checks here in the interest of execution speed. - while !self.transfer.get_transfer_complete_flag() {} - - let next_buffer = self.next_buffer.take().unwrap(); - - // Start the next transfer. - self.transfer.clear_interrupts(); - let (prev_buffer, _, _) = - self.transfer.next_transfer(next_buffer).unwrap(); - - // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 - self.next_buffer.replace(prev_buffer); - - self.next_buffer.as_ref().unwrap() + /// NOTE(unsafe): Memory safety and access ordering is not guaranteed + /// (see the HAL DMA docs). + pub fn with_buffer(&mut self, f: F) -> R + where + F: FnOnce(&mut SampleBuffer) -> R, + { + unsafe { self.transfer.next_dbm_transfer_with(|buf, _current| f(buf)) } } } } diff --git a/src/hardware/dac.rs b/src/hardware/dac.rs index 56f3033..91f8610 100644 --- a/src/hardware/dac.rs +++ b/src/hardware/dac.rs @@ -52,7 +52,7 @@ ///! served promptly after the transfer completes. use stm32h7xx_hal as hal; -use super::design_parameters::SAMPLE_BUFFER_SIZE; +use super::design_parameters::{SampleBuffer, SAMPLE_BUFFER_SIZE}; use super::timers; use hal::dma::{ @@ -66,8 +66,7 @@ use hal::dma::{ // processed). Note that the contents of AXI SRAM is uninitialized, so the buffer contents on // startup are undefined. The dimensions are `ADC_BUF[adc_index][ping_pong_index][sample_index]`. #[link_section = ".axisram.buffers"] -static mut DAC_BUF: [[[u16; SAMPLE_BUFFER_SIZE]; 3]; 2] = - [[[0; SAMPLE_BUFFER_SIZE]; 3]; 2]; +static mut DAC_BUF: [[SampleBuffer; 2]; 2] = [[[0; SAMPLE_BUFFER_SIZE]; 2]; 2]; /// Custom type for referencing DAC output codes. /// The internal integer is the raw code written to the DAC output register. @@ -137,13 +136,12 @@ macro_rules! dac_output { /// Represents data associated with DAC. pub struct $name { - next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>, // Note: SPI TX functionality may not be used from this structure to ensure safety with DMA. transfer: Transfer< hal::dma::dma::$data_stream, $spi, MemoryToPeripheral, - &'static mut [u16; SAMPLE_BUFFER_SIZE], + &'static mut SampleBuffer, hal::dma::DBTransfer, >, } @@ -198,33 +196,26 @@ macro_rules! dac_output { trigger_config, ); - Self { - transfer, - // Note(unsafe): This buffer is only used once and provided for the next DMA transfer. - next_buffer: unsafe { Some(&mut DAC_BUF[$index][2]) }, - } + Self { transfer } } pub fn start(&mut self) { self.transfer.start(|spi| spi.start_dma()); } - /// Acquire the next output buffer to populate it with DAC codes. - pub fn acquire_buffer(&mut self) -> &mut [u16; SAMPLE_BUFFER_SIZE] { - // Note: If a device hangs up, check that this conditional is passing correctly, as - // there is no time-out checks here in the interest of execution speed. - while !self.transfer.get_transfer_complete_flag() {} - - let next_buffer = self.next_buffer.take().unwrap(); - - // Start the next transfer. - let (prev_buffer, _, _) = - self.transfer.next_transfer(next_buffer).unwrap(); - - // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 - self.next_buffer.replace(prev_buffer); - - self.next_buffer.as_mut().unwrap() + /// Wait for the transfer of the currently active buffer to complete, + /// then call a function on the now inactive buffer and acknowledge the + /// transfer complete flag. + /// + /// NOTE(unsafe): Memory safety and access ordering is not guaranteed + /// (see the HAL DMA docs). + pub fn with_buffer(&mut self, f: F) -> R + where + F: FnOnce(&mut SampleBuffer) -> R, + { + unsafe { + self.transfer.next_dbm_transfer_with(|buf, _current| f(buf)) + } } } }; diff --git a/src/hardware/design_parameters.rs b/src/hardware/design_parameters.rs index 9a4279b..d04cc75 100644 --- a/src/hardware/design_parameters.rs +++ b/src/hardware/design_parameters.rs @@ -50,5 +50,7 @@ pub const ADC_SAMPLE_TICKS: u16 = 1 << ADC_SAMPLE_TICKS_LOG2; pub const SAMPLE_BUFFER_SIZE_LOG2: u8 = 3; pub const SAMPLE_BUFFER_SIZE: usize = 1 << SAMPLE_BUFFER_SIZE_LOG2; +pub type SampleBuffer = [u16; SAMPLE_BUFFER_SIZE]; + // The MQTT broker IPv4 address pub const MQTT_BROKER: [u8; 4] = [10, 34, 16, 10];