From 3a59f3e989c125d886f4a7d9b29ed1eb60aa71c8 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Wed, 11 Nov 2020 18:28:48 +0100 Subject: [PATCH 01/44] Adding WIP updates for digital input stamping --- Cargo.lock | 1 - Cargo.toml | 5 +- src/adc.rs | 22 +++++-- src/digital_input_stamper.rs | 84 ++++++++++++++++++++++++++ src/hrtimer.rs | 4 +- src/main.rs | 77 ++++++++++++++++-------- src/pounder/mod.rs | 6 +- src/sampling_timer.rs | 112 +++++++++++++++++++++++++++++++++++ 8 files changed, 273 insertions(+), 38 deletions(-) create mode 100644 src/digital_input_stamper.rs create mode 100644 src/sampling_timer.rs diff --git a/Cargo.lock b/Cargo.lock index f247a6c..c6c3f58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -501,7 +501,6 @@ dependencies = [ [[package]] name = "stm32h7xx-hal" version = "0.8.0" -source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/dma-rtic-example#d8cb6fa5099282665f5e5068a9dcdc9ebaa63240" dependencies = [ "bare-metal 1.0.0", "cast", diff --git a/Cargo.toml b/Cargo.toml index 049e61c..5b41667 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,8 +54,9 @@ path = "ad9959" [dependencies.stm32h7xx-hal] features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"] -git = "https://github.com/quartiq/stm32h7xx-hal" -branch = "feature/dma-rtic-example" +# git = "https://github.com/quartiq/stm32h7xx-hal" +# branch = "feature/dma-rtic-example" +path = "../stm32h7xx-hal" [features] semihosting = ["panic-semihosting", "cortex-m-log/semihosting"] diff --git a/src/adc.rs b/src/adc.rs index e3310f4..9971fbb 100644 --- a/src/adc.rs +++ b/src/adc.rs @@ -14,8 +14,8 @@ ///! both transfers are completed before reading the data. This is usually not significant for ///! busy-waiting because the transfers should complete at approximately the same time. use super::{ - hal, DMAReq, DmaConfig, MemoryToPeripheral, PeripheralToMemory, Priority, - TargetAddress, Transfer, + hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, + PeripheralToMemory, Priority, TargetAddress, Transfer, }; // The desired ADC input buffer size. This is use configurable. @@ -142,11 +142,18 @@ impl Adc0Input { /// * `trigger_stream` - The DMA stream used to trigger each ADC transfer by writing a word into /// the SPI TX FIFO. /// * `data_stream` - The DMA stream used to read samples received over SPI into a data buffer. + /// * `_trigger_channel` - The ADC sampling timer output compare channel for read triggers. pub fn new( spi: hal::spi::Spi, trigger_stream: hal::dma::dma::Stream0, data_stream: hal::dma::dma::Stream1, + trigger_channel: sampling_timer::Timer2Channel1, ) -> Self { + // Generate DMA events when an output compare of the timer hitting zero (timer roll over) + // occurs. + trigger_channel.listen_dma(); + trigger_channel.to_output_compare(0); + // The trigger stream constantly writes to the TX FIFO using a static word (dont-care // contents). Thus, neither the memory or peripheral address ever change. This is run in // circular mode to be completed at every DMA request. @@ -224,7 +231,7 @@ impl Adc0Input { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); self.next_buffer.replace(prev_buffer); @@ -256,11 +263,18 @@ impl Adc1Input { /// * `spi` - The SPI interface connected to ADC1. /// * `trigger_stream` - The DMA stream used to trigger ADC conversions on the SPI interface. /// * `data_stream` - The DMA stream used to read ADC samples from the SPI RX FIFO. + /// * `trigger_channel` - The ADC sampling timer output compare channel for read triggers. pub fn new( spi: hal::spi::Spi, trigger_stream: hal::dma::dma::Stream2, data_stream: hal::dma::dma::Stream3, + trigger_channel: sampling_timer::Timer2Channel2, ) -> Self { + // Generate DMA events when an output compare of the timer hitting zero (timer roll over) + // occurs. + trigger_channel.listen_dma(); + trigger_channel.to_output_compare(0); + // The trigger stream constantly writes to the TX FIFO using a static word (dont-care // contents). Thus, neither the memory or peripheral address ever change. This is run in // circular mode to be completed at every DMA request. @@ -339,7 +353,7 @@ impl Adc1Input { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); self.next_buffer.replace(prev_buffer); diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs new file mode 100644 index 0000000..88b1c58 --- /dev/null +++ b/src/digital_input_stamper.rs @@ -0,0 +1,84 @@ +use super::{hal, sampling_timer, DmaConfig, PeripheralToMemory, Transfer}; + +const INPUT_BUFFER_SIZE: usize = 1; + +#[link_section = ".axisram.buffers"] +static mut BUF0: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE]; + +#[link_section = ".axisram.buffers"] +static mut BUF1: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE]; + +pub struct InputStamper { + _di0_trigger: hal::gpio::gpioa::PA3>, + timestamp_buffer: heapless::Vec, + next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>, + transfer: Transfer< + hal::dma::dma::Stream4, + sampling_timer::Timer2Channel4, + PeripheralToMemory, + &'static mut [u16; INPUT_BUFFER_SIZE], + >, +} + +impl InputStamper { + pub fn new( + trigger: hal::gpio::gpioa::PA3>, + stream: hal::dma::dma::Stream4, + timer_channel: sampling_timer::Timer2Channel4, + ) -> Self { + // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the + // capture source. + timer_channel.listen_dma(); + timer_channel.to_input_capture(sampling_timer::CC4S_A::TI4); + + // Set up the DMA transfer. + let dma_config = DmaConfig::default() + .memory_increment(true) + .peripheral_increment(false); + + let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = + Transfer::init( + stream, + timer_channel, + unsafe { &mut BUF0 }, + None, + dma_config, + ); + + timestamp_transfer.start(|_| {}); + + Self { + timestamp_buffer: heapless::Vec::new(), + next_buffer: unsafe { Some(&mut BUF1) }, + transfer: timestamp_transfer, + _di0_trigger: trigger, + } + } + + pub fn transfer_complete_handler(&mut self) { + let next_buffer = self.next_buffer.take().unwrap(); + self.transfer.clear_interrupts(); + let (prev_buffer, _, remaining_transfers) = + self.transfer.next_transfer(next_buffer).unwrap(); + + let valid_count = prev_buffer.len() - remaining_transfers; + self.timestamp_buffer + .extend_from_slice(&prev_buffer[..valid_count]) + .unwrap(); + + self.next_buffer.replace(prev_buffer); + } + + pub fn with_timestamps(&mut self, f: F) + where + F: FnOnce(&[u16]), + { + // First, run the transfer complete handler to retrieve any timestamps that are pending in + // the DMA transfer. + self.transfer_complete_handler(); + + f(self.timestamp_buffer.as_ref()); + + self.timestamp_buffer.clear(); + } +} diff --git a/src/hrtimer.rs b/src/hrtimer.rs index d344396..47ea5c2 100644 --- a/src/hrtimer.rs +++ b/src/hrtimer.rs @@ -47,7 +47,8 @@ impl HighResTimerE { let minimum_duration = set_duration + set_offset; let source_frequency: u32 = self.clocks.timy_ker_ck().0; - let source_cycles = (minimum_duration * source_frequency as f32) as u32 + 1; + let source_cycles = + (minimum_duration * source_frequency as f32) as u32 + 1; // Determine the clock divider, which may be 1, 2, or 4. We will choose a clock divider that // allows us the highest resolution per tick, so lower dividers are favored. @@ -92,7 +93,6 @@ impl HighResTimerE { } } - // Enable the timer now that it is configured. self.master.mcr.modify(|_, w| w.tecen().set_bit()); } diff --git a/src/main.rs b/src/main.rs index f1d6541..d8a5d7e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -59,10 +59,12 @@ static mut DES_RING: ethernet::DesRing = ethernet::DesRing::new(); mod adc; mod afe; mod dac; +mod digital_input_stamper; mod eeprom; mod hrtimer; mod iir; mod pounder; +mod sampling_timer; mod server; use adc::{Adc0Input, Adc1Input, AdcInputs}; @@ -185,11 +187,10 @@ const APP: () = { adcs: AdcInputs, dacs: DacOutputs, + input_stamper: digital_input_stamper::InputStamper, eeprom_i2c: hal::i2c::I2c, - timer: hal::timer::Timer, - profiles: heapless::spsc::Queue<[u32; 4], heapless::consts::U32>, // Note: It appears that rustfmt generates a format that GDB cannot recognize, which @@ -267,6 +268,16 @@ const APP: () = { let dma_streams = hal::dma::dma::StreamsTuple::new(dp.DMA1, ccdr.peripheral.DMA1); + // Configure timer 2 to trigger conversions for the ADC + let timer2 = dp.TIM2.timer( + SAMPLE_FREQUENCY_KHZ.khz(), + ccdr.peripheral.TIM2, + &ccdr.clocks, + ); + + let mut sampling_timer = sampling_timer::SamplingTimer::new(timer2); + let sampling_timer_channels = sampling_timer.channels(); + // Configure the SPI interfaces to the ADCs and DACs. let adcs = { let adc0 = { @@ -299,7 +310,12 @@ const APP: () = { &ccdr.clocks, ); - Adc0Input::new(spi, dma_streams.0, dma_streams.1) + Adc0Input::new( + spi, + dma_streams.0, + dma_streams.1, + sampling_timer_channels.ch1, + ) }; let adc1 = { @@ -332,7 +348,12 @@ const APP: () = { &ccdr.clocks, ); - Adc1Input::new(spi, dma_streams.2, dma_streams.3) + Adc1Input::new( + spi, + dma_streams.2, + dma_streams.3, + sampling_timer_channels.ch2, + ) }; AdcInputs::new(adc0, adc1) @@ -478,9 +499,7 @@ const APP: () = { }; let mut reset_pin = gpioa.pa0.into_push_pull_output(); - let mut io_update = gpiog - .pg7 - .into_push_pull_output(); + let mut io_update = gpiog.pg7.into_push_pull_output(); let ad9959 = ad9959::Ad9959::new( qspi_interface, @@ -736,22 +755,17 @@ const APP: () = { // Utilize the cycle counter for RTIC scheduling. cp.DWT.enable_cycle_counter(); - // Configure timer 2 to trigger conversions for the ADC - let timer2 = dp.TIM2.timer( - SAMPLE_FREQUENCY_KHZ.khz(), - ccdr.peripheral.TIM2, - &ccdr.clocks, - ); - { - // Listen to the CH1 and CH2 comparison events. These channels should have a value of - // zero loaded into them, so the event should occur whenever the timer overflows. Note - // that we use channels instead of timer updates because each SPI DMA transfer needs a - // unique request line. - let t2_regs = unsafe { &*hal::stm32::TIM2::ptr() }; - t2_regs - .dier - .modify(|_, w| w.cc1de().set_bit().cc2de().set_bit()); - } + let input_stamper = { + let trigger = gpioa.pa3.into_alternate_af1(); + digital_input_stamper::InputStamper::new( + trigger, + dma_streams.4, + sampling_timer_channels.ch4, + ) + }; + + // Start sampling ADCs. + sampling_timer.start(); init::LateResources { afe0: afe0, @@ -760,7 +774,8 @@ const APP: () = { adcs, dacs, - timer: timer2, + input_stamper, + pounder: pounder_devices, eeprom_i2c, @@ -772,6 +787,11 @@ const APP: () = { } } + #[task(binds=DMA1_STR4, resources=[input_stamper], priority = 2)] + fn digital_stamper(c: digital_stamper::Context) { + let _timestamps = c.resources.input_stamper.transfer_complete_handler(); + } + #[task(binds = TIM3, resources=[dacs, profiles, pounder], priority = 3)] fn dac_update(c: dac_update::Context) { c.resources.dacs.update(); @@ -812,10 +832,15 @@ const APP: () = { c.resources.pounder.lock(|pounder| { if let Some(pounder) = pounder { profiles.lock(|profiles| { - let profile = pounder.ad9959.serialize_profile(pounder::Channel::Out0.into(), + let profile = pounder + .ad9959 + .serialize_profile( + pounder::Channel::Out0.into(), 100_000_000_f32, 0.0_f32, - *adc0 as f32 / 0xFFFF as f32).unwrap(); + *adc0 as f32 / 0xFFFF as f32, + ) + .unwrap(); profiles.enqueue(profile).unwrap(); }); diff --git a/src/pounder/mod.rs b/src/pounder/mod.rs index 0e32a22..c19096d 100644 --- a/src/pounder/mod.rs +++ b/src/pounder/mod.rs @@ -124,9 +124,9 @@ impl QspiInterface { unsafe { qspi_regs.dlr.write(|w| w.dl().bits(0xFFFF_FFFF)); - qspi_regs - .ccr - .modify(|_, w| w.imode().bits(0).fmode().bits(0).admode().bits(0)); + qspi_regs.ccr.modify(|_, w| { + w.imode().bits(0).fmode().bits(0).admode().bits(0) + }); } self.streaming = true; diff --git a/src/sampling_timer.rs b/src/sampling_timer.rs new file mode 100644 index 0000000..f299ad6 --- /dev/null +++ b/src/sampling_timer.rs @@ -0,0 +1,112 @@ +use super::hal; + +use hal::dma::{dma::DMAReq, traits::TargetAddress, PeripheralToMemory}; +pub use hal::stm32::tim2::ccmr2_input::CC4S_A; + +pub struct SamplingTimer { + timer: hal::timer::Timer, + channels: Option, +} + +impl SamplingTimer { + pub fn new(mut timer: hal::timer::Timer) -> Self { + timer.pause(); + + Self { + timer, + channels: Some(TimerChannels::new()), + } + } + + pub fn channels(&mut self) -> TimerChannels { + self.channels.take().unwrap() + } + + pub fn start(&mut self) { + self.timer.reset_counter(); + self.timer.resume(); + } +} + +pub struct TimerChannels { + pub ch1: Timer2Channel1, + pub ch2: Timer2Channel2, + pub ch3: Timer2Channel3, + pub ch4: Timer2Channel4, +} + +impl TimerChannels { + fn new() -> Self { + Self { + ch1: Timer2Channel1 {}, + ch2: Timer2Channel2 {}, + ch3: Timer2Channel3 {}, + ch4: Timer2Channel4 {}, + } + } +} + +pub struct Timer2Channel1 {} + +impl Timer2Channel1 { + pub fn listen_dma(&self) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + regs.dier.modify(|_, w| w.cc1de().set_bit()); + } + + pub fn to_output_compare(&self, value: u32) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + assert!(value <= regs.arr.read().bits()); + regs.ccr1.write(|w| w.ccr().bits(value)); + regs.ccmr1_output() + .modify(|_, w| unsafe { w.cc1s().bits(0) }); + } +} + +pub struct Timer2Channel2 {} + +impl Timer2Channel2 { + pub fn listen_dma(&self) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + regs.dier.modify(|_, w| w.cc2de().set_bit()); + } + + pub fn to_output_compare(&self, value: u32) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + assert!(value <= regs.arr.read().bits()); + regs.ccr2.write(|w| w.ccr().bits(value)); + regs.ccmr1_output() + .modify(|_, w| unsafe { w.cc2s().bits(0) }); + } +} + +pub struct Timer2Channel3 {} + +pub struct Timer2Channel4 {} + +unsafe impl TargetAddress for Timer2Channel4 { + type MemSize = u16; + + const REQUEST_LINE: Option = Some(DMAReq::TIM2_CH4 as u8); + + fn address(&self) -> u32 { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + ®s.dmar as *const _ as u32 + } +} + +impl Timer2Channel4 { + pub fn listen_dma(&self) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + regs.dier.modify(|_, w| w.cc4de().set_bit()); + } + + pub fn to_input_capture(&self, trig: CC4S_A) { + let regs = unsafe { &*hal::stm32::TIM2::ptr() }; + regs.ccmr2_input().modify(|_, w| w.cc4s().variant(trig)); + + // Update the DMA control burst regs to point to CCR4. + regs.dcr + .modify(|_, w| unsafe { w.dbl().bits(1).dba().bits(16) }); + } +} From fc81c8b5d8b0d9ee63844cd2b1f8b58c3f6b6a72 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 7 Dec 2020 17:29:36 +0100 Subject: [PATCH 02/44] Updating API --- Cargo.lock | 2 +- Cargo.toml | 4 +- src/adc.rs | 2 +- src/dac.rs | 2 +- src/digital_input_stamper.rs | 12 ++-- src/main.rs | 93 ++---------------------- src/sampling_timer.rs | 134 ++++++++++++++++++++--------------- 7 files changed, 91 insertions(+), 158 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9cf3a58..edc2864 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -517,7 +517,7 @@ dependencies = [ [[package]] name = "stm32h7xx-hal" version = "0.8.0" -source = "git+https://github.com/stm32-rs/stm32h7xx-hal?branch=dma#0bfeeca4ce120c1b7c6d140a7da73a4372b874d8" +source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/number-of-transfers#e70a78788e74be5281321213b53e8cd1d213550e" dependencies = [ "bare-metal 1.0.0", "cast", diff --git a/Cargo.toml b/Cargo.toml index 7217589..f1acbe0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,8 +53,8 @@ default-features = false [dependencies.stm32h7xx-hal] features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"] -git = "https://github.com/stm32-rs/stm32h7xx-hal" -branch = "dma" +git = "https://github.com/quartiq/stm32h7xx-hal" +branch = "feature/number-of-transfers" [features] semihosting = ["panic-semihosting", "cortex-m-log/semihosting"] diff --git a/src/adc.rs b/src/adc.rs index e9120aa..4017f7f 100644 --- a/src/adc.rs +++ b/src/adc.rs @@ -195,7 +195,7 @@ macro_rules! adc_input { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); self.next_buffer.replace(prev_buffer); // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 diff --git a/src/dac.rs b/src/dac.rs index d96109c..00c24d4 100644 --- a/src/dac.rs +++ b/src/dac.rs @@ -143,7 +143,7 @@ macro_rules! dac_output { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 88b1c58..71cd5b1 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -13,8 +13,8 @@ pub struct InputStamper { timestamp_buffer: heapless::Vec, next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>, transfer: Transfer< - hal::dma::dma::Stream4, - sampling_timer::Timer2Channel4, + hal::dma::dma::Stream6, + sampling_timer::tim2::Channel4InputCapture, PeripheralToMemory, &'static mut [u16; INPUT_BUFFER_SIZE], >, @@ -23,13 +23,13 @@ pub struct InputStamper { impl InputStamper { pub fn new( trigger: hal::gpio::gpioa::PA3>, - stream: hal::dma::dma::Stream4, - timer_channel: sampling_timer::Timer2Channel4, + stream: hal::dma::dma::Stream6, + timer_channel: sampling_timer::tim2::Channel4, ) -> Self { // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the // capture source. timer_channel.listen_dma(); - timer_channel.to_input_capture(sampling_timer::CC4S_A::TI4); + let input_capture = timer_channel.to_input_capture(sampling_timer::tim2::CC4S_A::TI4); // Set up the DMA transfer. let dma_config = DmaConfig::default() @@ -39,7 +39,7 @@ impl InputStamper { let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = Transfer::init( stream, - timer_channel, + input_capture, unsafe { &mut BUF0 }, None, dma_config, diff --git a/src/main.rs b/src/main.rs index 453e8ab..f168c95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -70,9 +70,7 @@ mod adc; mod afe; mod dac; mod digital_input_stamper; -mod eeprom; mod hrtimer; -mod iir; mod design_parameters; mod eeprom; mod pounder; @@ -775,7 +773,7 @@ const APP: () = { let trigger = gpioa.pa3.into_alternate_af1(); digital_input_stamper::InputStamper::new( trigger, - dma_streams.4, + dma_streams.6, sampling_timer_channels.ch4, ) }; @@ -796,14 +794,12 @@ const APP: () = { net_interface: network_interface, eth_mac, mac_addr, - - profiles: heapless::spsc::Queue::new(), } } - #[task(binds=DMA1_STR4, resources=[input_stamper], priority = 2)] + #[task(binds=DMA1_STR6, resources=[input_stamper], priority = 2)] fn digital_stamper(c: digital_stamper::Context) { - let _timestamps = c.resources.input_stamper.transfer_complete_handler(); + panic!("Timestamp overflow") } #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)] @@ -911,44 +907,7 @@ const APP: () = { Ok::(state) }), "stabilizer/afe0/gain": (|| c.resources.afes.0.get_gain()), - "stabilizer/afe1/gain": (|| c.resources.afes.1.get_gain()), - "pounder/in0": (|| { - match c.resources.pounder { - Some(pounder) => - pounder.get_input_channel_state(pounder::Channel::In0), - _ => Err(pounder::Error::Access), - } - }), - "pounder/in1": (|| { - match c.resources.pounder { - Some(pounder) => - pounder.get_input_channel_state(pounder::Channel::In1), - _ => Err(pounder::Error::Access), - } - }), - "pounder/out0": (|| { - match c.resources.pounder { - Some(pounder) => - pounder.get_output_channel_state(pounder::Channel::Out0), - _ => Err(pounder::Error::Access), - } - }), - "pounder/out1": (|| { - match c.resources.pounder { - Some(pounder) => - pounder.get_output_channel_state(pounder::Channel::Out1), - _ => Err(pounder::Error::Access), - } - }), ->>>>>>> master - "pounder/dds/clock": (|| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => pounder.get_dds_clock_config(), - _ => Err(pounder::Error::Access), - } - }) - }) + "stabilizer/afe1/gain": (|| c.resources.afes.1.get_gain()) ], modifiable_attributes: [ @@ -996,50 +955,6 @@ const APP: () = { Ok::(req) }) }), - "pounder/in0": pounder::ChannelState, (|state| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => - pounder.set_channel_state(pounder::Channel::In0, state), - _ => Err(pounder::Error::Access), - } - }) - }), - "pounder/in1": pounder::ChannelState, (|state| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => - pounder.set_channel_state(pounder::Channel::In1, state), - _ => Err(pounder::Error::Access), - } - }) - }), - "pounder/out0": pounder::ChannelState, (|state| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => - pounder.set_channel_state(pounder::Channel::Out0, state), - _ => Err(pounder::Error::Access), - } - }) - }), - "pounder/out1": pounder::ChannelState, (|state| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => - pounder.set_channel_state(pounder::Channel::Out1, state), - _ => Err(pounder::Error::Access), - } - }) - }), - "pounder/dds/clock": pounder::DdsClockConfig, (|config| { - c.resources.pounder.lock(|pounder| { - match pounder { - Some(pounder) => pounder.configure_dds_clock(config), - _ => Err(pounder::Error::Access), - } - }) - }), "stabilizer/afe0/gain": afe::Gain, (|gain| { c.resources.afes.0.set_gain(gain); Ok::<(), ()>(()) diff --git a/src/sampling_timer.rs b/src/sampling_timer.rs index b7f9480..3218996 100644 --- a/src/sampling_timer.rs +++ b/src/sampling_timer.rs @@ -1,6 +1,5 @@ ///! The sampling timer is used for managing ADC sampling and external reference timestamping. use super::hal; -pub use hal::stm32::tim2::ccmr2_input::CC4S_A; /// The timer used for managing ADC sampling. pub struct SamplingTimer { @@ -36,12 +35,54 @@ impl SamplingTimer { } } -macro_rules! timer_channel { - ($name:ident, $TY:ty, ($ccxde:expr, $ccrx:expr, $ccmrx_output:expr, $ccxs:expr)) => { - pub struct $name {} - +macro_rules! timer_channels { + ($TY:ty) => { paste::paste! { - impl $name { + pub mod [< $TY:lower >] { + pub use hal::stm32::[< $TY:lower >]::ccmr1_input::{CC1S_A, CC2S_A}; + pub use hal::stm32::[< $TY:lower >]::ccmr2_input::{CC3S_A, CC4S_A}; + + use stm32h7xx_hal as hal; + use hal::dma::{traits::TargetAddress, PeripheralToMemory, dma::DMAReq}; + use hal::stm32::TIM2; + + /// The channels representing the timer. + pub struct Channels { + pub ch1: Channel1, + pub ch2: Channel2, + pub ch3: Channel3, + pub ch4: Channel4, + } + + impl Channels { + /// Construct a new set of channels. + /// + /// Note(unsafe): This is only safe to call once. + pub unsafe fn new() -> Self { + Self { + ch1: Channel1::new(), + ch2: Channel2::new(), + ch3: Channel3::new(), + ch4: Channel4::new(), + } + } + } + + timer_channels!(1, $TY, ccmr1); + timer_channels!(2, $TY, ccmr1); + timer_channels!(3, $TY, ccmr2); + timer_channels!(4, $TY, ccmr2); + } + } + }; + + ($index:expr, $TY:ty, $ccmrx:expr) => { + paste::paste! { + pub struct [< Channel $index >] {} + + pub struct [< Channel $index InputCapture>] {} + + impl [< Channel $index >] { /// Construct a new timer channel. /// /// Note(unsafe): This function must only be called once. Once constructed, the @@ -50,71 +91,48 @@ macro_rules! timer_channel { Self {} } - /// Allow CH4 to generate DMA requests. + /// Allow the channel to generate DMA requests. pub fn listen_dma(&self) { let regs = unsafe { &*<$TY>::ptr() }; - regs.dier.modify(|_, w| w.[< $ccxde >]().set_bit()); + regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); } - /// Operate CH2 as an output-compare. + /// Operate the channel as an output-compare. /// /// # Args /// * `value` - The value to compare the sampling timer's counter against. pub fn to_output_compare(&self, value: u32) { let regs = unsafe { &*<$TY>::ptr() }; assert!(value <= regs.arr.read().bits()); - regs.[< $ccrx >].write(|w| w.ccr().bits(value)); - regs.[< $ccmrx_output >]() - .modify(|_, w| unsafe { w.[< $ccxs >]().bits(0) }); + regs.[< ccr $index >].write(|w| w.ccr().bits(value)); + regs.[< $ccmrx _output >]() + .modify(|_, w| unsafe { w.[< cc $index s >]().bits(0) }); + } + + /// Operate the channel in input-capture mode. + /// + /// # Args + /// * `input` - The input source for the input capture event. + pub fn to_input_capture(self, input: hal::stm32::[<$TY:lower>]::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ + let regs = unsafe { &*<$TY>::ptr() }; + regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input)); + + [< Channel $index InputCapture >] {} + } + } + + unsafe impl TargetAddress for [< Channel $index InputCapture >] { + type MemSize = u16; + + const REQUEST_LINE: Option = Some(DMAReq::[< $TY _CH $index >]as u8); + + fn address(&self) -> u32 { + let regs = unsafe { &*<$TY>::ptr() }; + ®s.[] as *const _ as u32 } } } }; } -pub mod tim2 { - use stm32h7xx_hal as hal; - - /// The channels representing the timer. - pub struct Channels { - pub ch1: Channel1, - pub ch2: Channel2, - pub ch3: Channel3, - pub ch4: Channel4, - } - - impl Channels { - /// Construct a new set of channels. - /// - /// Note(unsafe): This is only safe to call once. - pub unsafe fn new() -> Self { - Self { - ch1: Channel1::new(), - ch2: Channel2::new(), - ch3: Channel3::new(), - ch4: Channel4::new(), - } - } - } - - timer_channel!( - Channel1, - hal::stm32::TIM2, - (cc1de, ccr1, ccmr1_output, cc1s) - ); - timer_channel!( - Channel2, - hal::stm32::TIM2, - (cc2de, ccr2, ccmr1_output, cc1s) - ); - timer_channel!( - Channel3, - hal::stm32::TIM2, - (cc3de, ccr3, ccmr2_output, cc3s) - ); - timer_channel!( - Channel4, - hal::stm32::TIM2, - (cc4de, ccr4, ccmr2_output, cc4s) - ); -} +timer_channels!(TIM2); From ec046bc42d3490f7e788d92d76107cb2e3d58058 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 7 Dec 2020 17:58:36 +0100 Subject: [PATCH 03/44] Refactoring timer timestamping --- src/adc.rs | 12 ++-- src/dac.rs | 8 +-- src/digital_input_stamper.rs | 25 ++++---- src/main.rs | 36 ++++++++---- src/{sampling_timer.rs => timers.rs} | 85 +++++++++++++++------------- 5 files changed, 91 insertions(+), 75 deletions(-) rename src/{sampling_timer.rs => timers.rs} (64%) diff --git a/src/adc.rs b/src/adc.rs index 4017f7f..8d2b61a 100644 --- a/src/adc.rs +++ b/src/adc.rs @@ -14,8 +14,8 @@ ///! both transfers are completed before reading the data. This is usually not significant for ///! busy-waiting because the transfers should complete at approximately the same time. use super::{ - hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, - PeripheralToMemory, Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, + hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, PeripheralToMemory, + Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, }; // The following data is written by the timer ADC sample trigger into each of the SPI TXFIFOs. Note @@ -38,12 +38,10 @@ macro_rules! adc_input { /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO /// whenever the tim2 update dma request occurs. struct $spi { - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, } impl $spi { - pub fn new( - _channel: sampling_timer::tim2::$trigger_channel, - ) -> Self { + pub fn new(_channel: timers::tim2::$trigger_channel) -> Self { Self { _channel } } } @@ -100,7 +98,7 @@ macro_rules! adc_input { hal::stm32::DMA1, >, data_stream: hal::dma::dma::$data_stream, - trigger_channel: sampling_timer::tim2::$trigger_channel, + trigger_channel: timers::tim2::$trigger_channel, ) -> Self { // Generate DMA events when an output compare of the timer hitting zero (timer roll over) // occurs. diff --git a/src/dac.rs b/src/dac.rs index 00c24d4..06a6362 100644 --- a/src/dac.rs +++ b/src/dac.rs @@ -4,7 +4,7 @@ ///! configured to generate a DMA write into the SPI TXFIFO, which initiates a SPI transfer and ///! results in DAC update for both channels. use super::{ - hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress, + hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, }; @@ -22,12 +22,12 @@ macro_rules! dac_output { /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO struct $spi { spi: hal::spi::Spi, - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, } impl $spi { pub fn new( - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, spi: hal::spi::Spi, ) -> Self { Self { _channel, spi } @@ -73,7 +73,7 @@ macro_rules! dac_output { pub fn new( spi: hal::spi::Spi, stream: hal::dma::dma::$data_stream, - trigger_channel: sampling_timer::tim2::$trigger_channel, + trigger_channel: timers::tim2::$trigger_channel, ) -> Self { // Generate DMA events when an output compare of the timer hitting zero (timer roll over) // occurs. diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 71cd5b1..8b8f3af 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -1,20 +1,17 @@ -use super::{hal, sampling_timer, DmaConfig, PeripheralToMemory, Transfer}; +use super::{hal, timers, DmaConfig, PeripheralToMemory, Transfer}; const INPUT_BUFFER_SIZE: usize = 1; #[link_section = ".axisram.buffers"] -static mut BUF0: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE]; - -#[link_section = ".axisram.buffers"] -static mut BUF1: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE]; +static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2]; pub struct InputStamper { - _di0_trigger: hal::gpio::gpioa::PA3>, + _di0_trigger: hal::gpio::gpioa::PA3>, timestamp_buffer: heapless::Vec, next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>, transfer: Transfer< hal::dma::dma::Stream6, - sampling_timer::tim2::Channel4InputCapture, + timers::tim5::Channel4InputCapture, PeripheralToMemory, &'static mut [u16; INPUT_BUFFER_SIZE], >, @@ -22,17 +19,19 @@ pub struct InputStamper { impl InputStamper { pub fn new( - trigger: hal::gpio::gpioa::PA3>, + trigger: hal::gpio::gpioa::PA3>, stream: hal::dma::dma::Stream6, - timer_channel: sampling_timer::tim2::Channel4, + timer_channel: timers::tim5::Channel4, ) -> Self { - // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the + // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the // capture source. timer_channel.listen_dma(); - let input_capture = timer_channel.to_input_capture(sampling_timer::tim2::CC4S_A::TI4); + let input_capture = + timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4); // Set up the DMA transfer. let dma_config = DmaConfig::default() + .transfer_complete_interrupt(true) .memory_increment(true) .peripheral_increment(false); @@ -40,7 +39,7 @@ impl InputStamper { Transfer::init( stream, input_capture, - unsafe { &mut BUF0 }, + unsafe { &mut BUF[0] }, None, dma_config, ); @@ -49,7 +48,7 @@ impl InputStamper { Self { timestamp_buffer: heapless::Vec::new(), - next_buffer: unsafe { Some(&mut BUF1) }, + next_buffer: unsafe { Some(&mut BUF[1]) }, transfer: timestamp_transfer, _di0_trigger: trigger, } diff --git a/src/main.rs b/src/main.rs index f168c95..425b401 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,13 +69,13 @@ static mut DES_RING: ethernet::DesRing = ethernet::DesRing::new(); mod adc; mod afe; mod dac; -mod digital_input_stamper; -mod hrtimer; mod design_parameters; +mod digital_input_stamper; mod eeprom; +mod hrtimer; mod pounder; -mod sampling_timer; mod server; +mod timers; use adc::{Adc0Input, Adc1Input}; use dac::{Dac0Output, Dac1Output}; @@ -285,9 +285,22 @@ const APP: () = { &ccdr.clocks, ); - let mut sampling_timer = sampling_timer::SamplingTimer::new(timer2); + let mut sampling_timer = timers::SamplingTimer::new(timer2); let sampling_timer_channels = sampling_timer.channels(); + let mut timestamp_timer = { + // TODO: This needs to be precisely controlled via the prescaler of the timer. + let timer5 = dp.TIM5.timer( + (SAMPLE_FREQUENCY_KHZ / SAMPLE_BUFFER_SIZE as u32).khz(), + ccdr.peripheral.TIM5, + &ccdr.clocks, + ); + + timers::TimestampTimer::new(timer5) + }; + + let timestamp_timer_channels = timestamp_timer.channels(); + // Configure the SPI interfaces to the ADCs and DACs. let adcs = { let adc0 = { @@ -770,16 +783,17 @@ const APP: () = { cp.DWT.enable_cycle_counter(); let input_stamper = { - let trigger = gpioa.pa3.into_alternate_af1(); + let trigger = gpioa.pa3.into_alternate_af2(); digital_input_stamper::InputStamper::new( trigger, dma_streams.6, - sampling_timer_channels.ch4, + timestamp_timer_channels.ch4, ) }; // Start sampling ADCs. sampling_timer.start(); + timestamp_timer.start(); init::LateResources { afes: (afe0, afe1), @@ -797,11 +811,6 @@ const APP: () = { } } - #[task(binds=DMA1_STR6, resources=[input_stamper], priority = 2)] - fn digital_stamper(c: digital_stamper::Context) { - panic!("Timestamp overflow") - } - #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)] fn process(c: process::Context) { let adc_samples = [ @@ -833,6 +842,11 @@ const APP: () = { c.resources.dacs.1.release_buffer(dac1); } + #[task(binds=DMA1_STR6, priority = 2)] + fn digital_stamper(_: digital_stamper::Context) { + panic!("Timestamp overflow") + } + #[idle(resources=[net_interface, pounder, mac_addr, eth_mac, iir_state, iir_ch, afes])] fn idle(mut c: idle::Context) -> ! { let mut socket_set_entries: [_; 8] = Default::default(); diff --git a/src/sampling_timer.rs b/src/timers.rs similarity index 64% rename from src/sampling_timer.rs rename to src/timers.rs index 3218996..36f8c01 100644 --- a/src/sampling_timer.rs +++ b/src/timers.rs @@ -1,50 +1,51 @@ ///! The sampling timer is used for managing ADC sampling and external reference timestamping. use super::hal; -/// The timer used for managing ADC sampling. -pub struct SamplingTimer { - timer: hal::timer::Timer, - channels: Option, -} - -impl SamplingTimer { - /// Construct the sampling timer. - pub fn new(mut timer: hal::timer::Timer) -> Self { - timer.pause(); - - Self { - timer, - // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any - // of the underlying timer channel registers, as ownership of the channels is now - // provided through the associated channel structures. We additionally guarantee this - // can only be called once because there is only one Timer2 and this resource takes - // ownership of it once instantiated. - channels: unsafe { Some(tim2::Channels::new()) }, - } - } - - /// Get the timer capture/compare channels. - pub fn channels(&mut self) -> tim2::Channels { - self.channels.take().unwrap() - } - - /// Start the sampling timer. - pub fn start(&mut self) { - self.timer.reset_counter(); - self.timer.resume(); - } -} - macro_rules! timer_channels { - ($TY:ty) => { + ($name:ident, $TY:ident) => { paste::paste! { + + /// The timer used for managing ADC sampling. + pub struct $name { + timer: hal::timer::Timer]>, + channels: Option<[< $TY:lower >]::Channels>, + } + + impl $name { + /// Construct the sampling timer. + pub fn new(mut timer: hal::timer::Timer]>) -> Self { + timer.pause(); + + Self { + timer, + // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any + // of the underlying timer channel registers, as ownership of the channels is now + // provided through the associated channel structures. We additionally guarantee this + // can only be called once because there is only one Timer2 and this resource takes + // ownership of it once instantiated. + channels: unsafe { Some([< $TY:lower >]::Channels::new()) }, + } + } + + /// Get the timer capture/compare channels. + pub fn channels(&mut self) -> [< $TY:lower >]::Channels { + self.channels.take().unwrap() + } + + /// Start the sampling timer. + pub fn start(&mut self) { + self.timer.reset_counter(); + self.timer.resume(); + } + } + pub mod [< $TY:lower >] { - pub use hal::stm32::[< $TY:lower >]::ccmr1_input::{CC1S_A, CC2S_A}; - pub use hal::stm32::[< $TY:lower >]::ccmr2_input::{CC3S_A, CC4S_A}; + pub use hal::stm32::tim2::ccmr1_input::{CC1S_A, CC2S_A}; + pub use hal::stm32::tim2::ccmr2_input::{CC3S_A, CC4S_A}; use stm32h7xx_hal as hal; use hal::dma::{traits::TargetAddress, PeripheralToMemory, dma::DMAReq}; - use hal::stm32::TIM2; + use hal::stm32::$TY; /// The channels representing the timer. pub struct Channels { @@ -92,6 +93,7 @@ macro_rules! timer_channels { } /// Allow the channel to generate DMA requests. + #[allow(dead_code)] pub fn listen_dma(&self) { let regs = unsafe { &*<$TY>::ptr() }; regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); @@ -101,6 +103,7 @@ macro_rules! timer_channels { /// /// # Args /// * `value` - The value to compare the sampling timer's counter against. + #[allow(dead_code)] pub fn to_output_compare(&self, value: u32) { let regs = unsafe { &*<$TY>::ptr() }; assert!(value <= regs.arr.read().bits()); @@ -113,7 +116,8 @@ macro_rules! timer_channels { /// /// # Args /// * `input` - The input source for the input capture event. - pub fn to_input_capture(self, input: hal::stm32::[<$TY:lower>]::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ + #[allow(dead_code)] + pub fn to_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ let regs = unsafe { &*<$TY>::ptr() }; regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input)); @@ -135,4 +139,5 @@ macro_rules! timer_channels { }; } -timer_channels!(TIM2); +timer_channels!(SamplingTimer, TIM2); +timer_channels!(TimestampTimer, TIM5); From b191a3f01df139ef17ac3a9cfe951a2b8b7d47c8 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 7 Dec 2020 18:11:46 +0100 Subject: [PATCH 04/44] Updating timestamp timer to be more precise --- src/main.rs | 19 +++++++++++++------ src/timers.rs | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index 425b401..2520ad9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -289,14 +289,21 @@ const APP: () = { let sampling_timer_channels = sampling_timer.channels(); let mut timestamp_timer = { - // TODO: This needs to be precisely controlled via the prescaler of the timer. - let timer5 = dp.TIM5.timer( - (SAMPLE_FREQUENCY_KHZ / SAMPLE_BUFFER_SIZE as u32).khz(), - ccdr.peripheral.TIM5, - &ccdr.clocks, + // The timer frequency is manually adjusted below, so the 1KHz setting here is a + // dont-care. + let timer5 = + dp.TIM5.timer(1.khz(), ccdr.peripheral.TIM5, &ccdr.clocks); + + // The time stamp timer must run at exactly a multiple of the sample timer based on the + // batch size. To accomodate this, we manually set the period identical to the sample + // timer, but use a prescaler that is `BATCH_SIZE` longer. + let mut timer = timers::TimestampTimer::new(timer5); + timer.set_period(sampling_timer.get_period()); + timer.set_prescaler( + sampling_timer.get_prescaler() * SAMPLE_BUFFER_SIZE as u16, ); - timers::TimestampTimer::new(timer5) + timer }; let timestamp_timer_channels = timestamp_timer.channels(); diff --git a/src/timers.rs b/src/timers.rs index 36f8c01..a3c2dce 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -32,8 +32,36 @@ macro_rules! timer_channels { self.channels.take().unwrap() } - /// Start the sampling timer. - pub fn start(&mut self) { + #[allow(dead_code)] + pub fn get_prescaler(&self) -> u16 { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + regs.psc.read().psc().bits() + 1 + } + + #[allow(dead_code)] + pub fn set_prescaler(&mut self, prescaler: u16) { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + assert!(prescaler >= 1); + regs.psc.write(|w| w.psc().bits(prescaler - 1)); + } + + #[allow(dead_code)] + pub fn get_period(&self) -> u32 { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + regs.arr.read().arr().bits() + } + + #[allow(dead_code)] + pub fn set_period(&mut self, period: u32) { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + regs.arr.write(|w| w.arr().bits(period)); + } + + /// Start the timer. + pub fn start(mut self) { + // Force a refresh of the frequency settings. + self.timer.apply_freq(); + self.timer.reset_counter(); self.timer.resume(); } From 6eaf2cc073f58ac6e5a001248170af89d5348810 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 7 Dec 2020 18:19:20 +0100 Subject: [PATCH 05/44] Updating timestamp buffer logic --- src/digital_input_stamper.rs | 21 ++------------------- src/main.rs | 14 ++++++++------ 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 8b8f3af..a8ce1d9 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -7,7 +7,6 @@ static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2]; pub struct InputStamper { _di0_trigger: hal::gpio::gpioa::PA3>, - timestamp_buffer: heapless::Vec, next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>, transfer: Transfer< hal::dma::dma::Stream6, @@ -47,37 +46,21 @@ impl InputStamper { timestamp_transfer.start(|_| {}); Self { - timestamp_buffer: heapless::Vec::new(), next_buffer: unsafe { Some(&mut BUF[1]) }, transfer: timestamp_transfer, _di0_trigger: trigger, } } - pub fn transfer_complete_handler(&mut self) { + pub fn acquire_buffer(&mut self) -> &[u16] { let next_buffer = self.next_buffer.take().unwrap(); - self.transfer.clear_interrupts(); let (prev_buffer, _, remaining_transfers) = self.transfer.next_transfer(next_buffer).unwrap(); let valid_count = prev_buffer.len() - remaining_transfers; - self.timestamp_buffer - .extend_from_slice(&prev_buffer[..valid_count]) - .unwrap(); self.next_buffer.replace(prev_buffer); - } - pub fn with_timestamps(&mut self, f: F) - where - F: FnOnce(&[u16]), - { - // First, run the transfer complete handler to retrieve any timestamps that are pending in - // the DMA transfer. - self.transfer_complete_handler(); - - f(self.timestamp_buffer.as_ref()); - - self.timestamp_buffer.clear(); + &self.next_buffer.as_ref().unwrap()[..valid_count] } } diff --git a/src/main.rs b/src/main.rs index 2520ad9..2c07284 100644 --- a/src/main.rs +++ b/src/main.rs @@ -818,7 +818,7 @@ const APP: () = { } } - #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)] + #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch, input_stamper], priority=2)] fn process(c: process::Context) { let adc_samples = [ c.resources.adcs.0.acquire_buffer(), @@ -829,6 +829,8 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; + let _timestamps = c.resources.input_stamper.acquire_buffer(); + for channel in 0..adc_samples.len() { for sample in 0..adc_samples[0].len() { let x = f32::from(adc_samples[channel][sample] as i16); @@ -849,11 +851,6 @@ const APP: () = { c.resources.dacs.1.release_buffer(dac1); } - #[task(binds=DMA1_STR6, priority = 2)] - fn digital_stamper(_: digital_stamper::Context) { - panic!("Timestamp overflow") - } - #[idle(resources=[net_interface, pounder, mac_addr, eth_mac, iir_state, iir_ch, afes])] fn idle(mut c: idle::Context) -> ! { let mut socket_set_entries: [_; 8] = Default::default(); @@ -1008,6 +1005,11 @@ const APP: () = { } } + #[task(binds=DMA1_STR6, priority = 2)] + fn di0_timestamp(_: di0_timestamp::Context) { + panic!("DI0 Timestamp overflow") + } + #[task(binds = ETH, priority = 1)] fn eth(_: eth::Context) { unsafe { ethernet::interrupt_handler() } From 551cc5d7421e7865497e96d345791e6c00561771 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 7 Dec 2020 18:44:45 +0100 Subject: [PATCH 06/44] Adding support for DBM --- src/digital_input_stamper.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index a8ce1d9..ba65b66 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -1,18 +1,16 @@ -use super::{hal, timers, DmaConfig, PeripheralToMemory, Transfer}; - -const INPUT_BUFFER_SIZE: usize = 1; +use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer}; #[link_section = ".axisram.buffers"] -static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2]; +static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 3] = [[0; SAMPLE_BUFFER_SIZE]; 3]; pub struct InputStamper { _di0_trigger: hal::gpio::gpioa::PA3>, - next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>, + next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>, transfer: Transfer< hal::dma::dma::Stream6, timers::tim5::Channel4InputCapture, PeripheralToMemory, - &'static mut [u16; INPUT_BUFFER_SIZE], + &'static mut [u16; SAMPLE_BUFFER_SIZE], >, } @@ -32,21 +30,25 @@ impl InputStamper { let dma_config = DmaConfig::default() .transfer_complete_interrupt(true) .memory_increment(true) + .circular_buffer(true) + .double_buffer(true) .peripheral_increment(false); + // This needs to operate in double-buffer+circular mode so that we don't potentially drop + // input timestamps. let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = Transfer::init( stream, input_capture, unsafe { &mut BUF[0] }, - None, + unsafe { Some(&mut BUF[1]) }, dma_config, ); timestamp_transfer.start(|_| {}); Self { - next_buffer: unsafe { Some(&mut BUF[1]) }, + next_buffer: unsafe { Some(&mut BUF[2]) }, transfer: timestamp_transfer, _di0_trigger: trigger, } From f2e4f497fa96d2e38dfb59b946f6385efeab9109 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 11:29:59 +0100 Subject: [PATCH 07/44] Removing DBM from input stamper --- src/digital_input_stamper.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index ba65b66..3df8eee 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -1,7 +1,7 @@ use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer}; #[link_section = ".axisram.buffers"] -static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 3] = [[0; SAMPLE_BUFFER_SIZE]; 3]; +static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2]; pub struct InputStamper { _di0_trigger: hal::gpio::gpioa::PA3>, @@ -30,25 +30,23 @@ impl InputStamper { let dma_config = DmaConfig::default() .transfer_complete_interrupt(true) .memory_increment(true) - .circular_buffer(true) - .double_buffer(true) .peripheral_increment(false); - // This needs to operate in double-buffer+circular mode so that we don't potentially drop - // input timestamps. + // TODO: This needs to operate in double-buffer+circular mode so that we don't potentially + // drop input timestamps. let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = Transfer::init( stream, input_capture, unsafe { &mut BUF[0] }, - unsafe { Some(&mut BUF[1]) }, + None, dma_config, ); timestamp_transfer.start(|_| {}); Self { - next_buffer: unsafe { Some(&mut BUF[2]) }, + next_buffer: unsafe { Some(&mut BUF[1]) }, transfer: timestamp_transfer, _di0_trigger: trigger, } From a134340726f94fd1b6df3f894aae503518e7c3fa Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 13:53:34 +0100 Subject: [PATCH 08/44] Adding direct and DMA collection support for DI0 timestamps --- src/digital_input_stamper.rs | 146 +++++++++++++++++++++++++++-------- src/main.rs | 11 +-- src/timers.rs | 58 +++++++++++++- 3 files changed, 173 insertions(+), 42 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 3df8eee..fe8c86b 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -1,66 +1,146 @@ -use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer}; +///! Digital Input 0 (DI0) reference clock timestamper +///! +///! This module provides a means of timestamping the rising edges of an external reference clock on +///! the DI0 with a timer value from TIM5. +///! +///! This module only supports input clocks on DI0 and may or may not utilize DMA to collect +///! timestamps. +///! +///! # Design +///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is +///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0 +///! triggers, the current TIM5 capture value is recorded as a timestamp. This timestamp can be +///! either directly read from the timer channel or can be collected asynchronously via DMA +///! collection. +///! +///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many +///! timestamps as there are samples, but it is intended that this DMA transfer should never +///! complete. Instead, when all samples are collected, the module pauses the DMA transfer and +///! checks to see how many timestamps were collected. These collected timestamps are then returned +///! for further processing. +///! +///! To prevent silently discarding timestamps, the TIm5 input capture over-capture interrupt is +///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR +///! which handles the over-capture. +///! +///! # Tradeoffs +///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they +///! are being prematurely stopped (such is the case here). As such, for a sample batch size of 1, +///! this can take up a significant amount of the total available processing time for the samples. +///! To avoid this, the module does not use DMA when the sample batch size is one. Instead, the +///! module manually checks for any captured timestamps from the timer capture channel manually. In +///! this mode, the maximum input clock frequency supported is equal to the configured sample rate. +///! +///! There is a small window while the DMA buffers are swapped where a timestamp could potentially +///! be lost. To prevent this, the `acuire_buffer()` method should not be pre-empted. Any lost +///! timestamp will trigger an over-capture interrupt. +use super::{ + hal, timers, DmaConfig, PeripheralToMemory, Transfer, SAMPLE_BUFFER_SIZE, +}; +// The DMA buffers must exist in a location where DMA can access. By default, RAM uses DTCM, which +// is off-limits to the normal DMA peripheral. Instead, we use AXISRAM. #[link_section = ".axisram.buffers"] -static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2]; +static mut BUF: [[u32; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2]; +/// The timestamper for DI0 reference clock inputs. pub struct InputStamper { _di0_trigger: hal::gpio::gpioa::PA3>, - next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>, - transfer: Transfer< - hal::dma::dma::Stream6, - timers::tim5::Channel4InputCapture, - PeripheralToMemory, - &'static mut [u16; SAMPLE_BUFFER_SIZE], + next_buffer: Option<&'static mut [u32; SAMPLE_BUFFER_SIZE]>, + transfer: Option< + Transfer< + hal::dma::dma::Stream6, + timers::tim5::Channel4InputCapture, + PeripheralToMemory, + &'static mut [u32; SAMPLE_BUFFER_SIZE], + >, >, + capture_channel: Option, } impl InputStamper { + /// Construct the DI0 input timestamper. + /// + /// # Args + /// * `trigger` - The capture trigger input pin. + /// * `stream` - The DMA stream to use for collecting timestamps. + /// * `timer_channel - The timer channel used for capturing timestamps. + /// * `batch_size` - The number of samples collected per processing batch. pub fn new( trigger: hal::gpio::gpioa::PA3>, stream: hal::dma::dma::Stream6, timer_channel: timers::tim5::Channel4, + batch_size: usize, ) -> Self { // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the // capture source. - timer_channel.listen_dma(); let input_capture = timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4); - // Set up the DMA transfer. - let dma_config = DmaConfig::default() - .transfer_complete_interrupt(true) - .memory_increment(true) - .peripheral_increment(false); + // Listen for over-capture events, which indicates an over-run of DI0 timestamps. + input_capture.listen_overcapture(); - // TODO: This needs to operate in double-buffer+circular mode so that we don't potentially - // drop input timestamps. - let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = - Transfer::init( - stream, - input_capture, - unsafe { &mut BUF[0] }, - None, - dma_config, - ); + // For small batch sizes, the overhead of DMA can become burdensome to the point where + // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only + // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach. + let (transfer, input_capture) = if batch_size >= 2 { + input_capture.listen_dma(); - timestamp_transfer.start(|_| {}); + // Set up the DMA transfer. + let dma_config = DmaConfig::default().memory_increment(true); + + let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = + Transfer::init( + stream, + input_capture, + unsafe { &mut BUF[0] }, + None, + dma_config, + ); + + timestamp_transfer.start(|_| {}); + (Some(timestamp_transfer), None) + } else { + (None, Some(input_capture)) + }; Self { next_buffer: unsafe { Some(&mut BUF[1]) }, - transfer: timestamp_transfer, + transfer, + capture_channel: input_capture, _di0_trigger: trigger, } } - pub fn acquire_buffer(&mut self) -> &[u16] { - let next_buffer = self.next_buffer.take().unwrap(); - let (prev_buffer, _, remaining_transfers) = - self.transfer.next_transfer(next_buffer).unwrap(); + /// Get all of the timestamps that have occurred during the last processing cycle. + pub fn acquire_buffer(&mut self) -> &[u32] { + // If we are using DMA, finish the transfer and swap over buffers. + if self.transfer.is_some() { + let next_buffer = self.next_buffer.take().unwrap(); - let valid_count = prev_buffer.len() - remaining_transfers; + let (prev_buffer, _, remaining_transfers) = self + .transfer + .as_mut() + .unwrap() + .next_transfer(next_buffer) + .unwrap(); + let valid_count = prev_buffer.len() - remaining_transfers; - self.next_buffer.replace(prev_buffer); + self.next_buffer.replace(prev_buffer); - &self.next_buffer.as_ref().unwrap()[..valid_count] + // Note that we likely didn't finish the transfer, so only return the number of + // timestamps actually collected. + &self.next_buffer.as_ref().unwrap()[..valid_count] + } else { + // If we aren't using DMA, just manually check the input capture channel for a + // timestamp. + match self.capture_channel.as_mut().unwrap().latest_capture() { + Some(stamp) => { + self.next_buffer.as_mut().unwrap()[0] = stamp; + &self.next_buffer.as_ref().unwrap()[..1] + } + None => &[], + } + } } } diff --git a/src/main.rs b/src/main.rs index e798294..469f114 100644 --- a/src/main.rs +++ b/src/main.rs @@ -808,6 +808,7 @@ const APP: () = { trigger, dma_streams.6, timestamp_timer_channels.ch4, + SAMPLE_BUFFER_SIZE, ) }; @@ -1030,11 +1031,6 @@ const APP: () = { } } - #[task(binds=DMA1_STR6, priority = 2)] - fn di0_timestamp(_: di0_timestamp::Context) { - panic!("DI0 Timestamp overflow") - } - #[task(binds = ETH, priority = 1)] fn eth(_: eth::Context) { unsafe { ethernet::interrupt_handler() } @@ -1060,6 +1056,11 @@ const APP: () = { panic!("DAC1 output error"); } + #[task(binds = TIM5, priority = 3)] + fn di0(_: di0::Context) { + panic!("DI0 timestamp overrun"); + } + extern "C" { // hw interrupt handlers for RTIC to use for scheduling tasks // one per priority diff --git a/src/timers.rs b/src/timers.rs index a3c2dce..74b4731 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -2,7 +2,7 @@ use super::hal; macro_rules! timer_channels { - ($name:ident, $TY:ident) => { + ($name:ident, $TY:ident, u32) => { paste::paste! { /// The timer used for managing ADC sampling. @@ -32,12 +32,14 @@ macro_rules! timer_channels { self.channels.take().unwrap() } + /// Get the prescaler of a timer. #[allow(dead_code)] pub fn get_prescaler(&self) -> u16 { let regs = unsafe { &*hal::stm32::$TY::ptr() }; regs.psc.read().psc().bits() + 1 } + /// Manually set the prescaler of the timer. #[allow(dead_code)] pub fn set_prescaler(&mut self, prescaler: u16) { let regs = unsafe { &*hal::stm32::$TY::ptr() }; @@ -45,12 +47,14 @@ macro_rules! timer_channels { regs.psc.write(|w| w.psc().bits(prescaler - 1)); } + /// Get the period of the timer. #[allow(dead_code)] pub fn get_period(&self) -> u32 { let regs = unsafe { &*hal::stm32::$TY::ptr() }; regs.arr.read().arr().bits() } + /// Manually set the period of the timer. #[allow(dead_code)] pub fn set_period(&mut self, period: u32) { let regs = unsafe { &*hal::stm32::$TY::ptr() }; @@ -107,8 +111,10 @@ macro_rules! timer_channels { ($index:expr, $TY:ty, $ccmrx:expr) => { paste::paste! { + /// A capture/compare channel of the timer. pub struct [< Channel $index >] {} + /// A capture channel of the timer. pub struct [< Channel $index InputCapture>] {} impl [< Channel $index >] { @@ -153,8 +159,52 @@ macro_rules! timer_channels { } } + impl [< Channel $index InputCapture >] { + /// Get the latest capture from the channel. + #[allow(dead_code)] + pub fn latest_capture(&mut self) -> Option { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + let sr = regs.sr.read(); + let ccx = regs.[< ccr $index >].read(); + if sr.[< cc $index if >]().bit_is_set() { + regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit()); + Some(ccx.ccr().bits()) + } else { + None + } + } + + /// Listen for over-capture events on the timer channel. + /// + /// # Note + /// An over-capture event is when a previous capture was lost due to a new capture. + /// + /// "Listening" is equivalent to enabling the interrupt for the event. + #[allow(dead_code)] + pub fn listen_overcapture(&self) { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.dier.modify(|_, w| w.[]().set_bit()); + } + + /// Allow the channel to generate DMA requests. + #[allow(dead_code)] + pub fn listen_dma(&self) { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); + } + } + + // Note(unsafe): This manually implements DMA support for input-capture channels. This + // is safe as it is only completed once per channel and each DMA request is allocated to + // each channel as the owner. unsafe impl TargetAddress for [< Channel $index InputCapture >] { - type MemSize = u16; + type MemSize = u32; const REQUEST_LINE: Option = Some(DMAReq::[< $TY _CH $index >]as u8); @@ -167,5 +217,5 @@ macro_rules! timer_channels { }; } -timer_channels!(SamplingTimer, TIM2); -timer_channels!(TimestampTimer, TIM5); +timer_channels!(SamplingTimer, TIM2, u32); +timer_channels!(TimestampTimer, TIM5, u32); From 1a1d8fd8b93c5527298837903fc160f50ab1cd9b Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 14:25:46 +0100 Subject: [PATCH 09/44] Adding precise control of timer periods and prescalers --- src/design_parameters.rs | 3 ++ src/main.rs | 63 +++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/src/design_parameters.rs b/src/design_parameters.rs index 9835568..414a9e2 100644 --- a/src/design_parameters.rs +++ b/src/design_parameters.rs @@ -4,3 +4,6 @@ pub const ADC_SETUP_TIME: f32 = 220e-9; /// The maximum DAC/ADC serial clock line frequency. This is a hardware limit. pub const ADC_DAC_SCK_MHZ_MAX: u32 = 50; + +/// The optimal counting frequency of the hardware timers used for timestamping and sampling. +pub const TIMER_FREQUENCY_MHZ: u32 = 100; diff --git a/src/main.rs b/src/main.rs index 469f114..14505e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,6 +30,8 @@ extern crate panic_halt; #[macro_use] extern crate log; +use core::convert::TryInto; + // use core::sync::atomic::{AtomicU32, AtomicBool, Ordering}; use cortex_m_rt::exception; use rtic::cyccnt::{Instant, U32Ext}; @@ -54,11 +56,12 @@ use smoltcp::wire::Ipv4Address; use heapless::{consts::*, String}; -// The desired sampling frequency of the ADCs. -const SAMPLE_FREQUENCY_KHZ: u32 = 500; +// The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is +// equal to 10ns per tick. +const ADC_SAMPLE_TICKS: u32 = 128; // The desired ADC sample processing buffer size. -const SAMPLE_BUFFER_SIZE: usize = 1; +const SAMPLE_BUFFER_SIZE: usize = 8; // The number of cascaded IIR biquads per channel. Select 1 or 2! const IIR_CASCADE_LENGTH: usize = 1; @@ -282,29 +285,51 @@ const APP: () = { hal::dma::dma::StreamsTuple::new(dp.DMA1, ccdr.peripheral.DMA1); // Configure timer 2 to trigger conversions for the ADC - let timer2 = dp.TIM2.timer( - SAMPLE_FREQUENCY_KHZ.khz(), - ccdr.peripheral.TIM2, - &ccdr.clocks, - ); + let mut sampling_timer = { + // The timer frequency is manually adjusted below, so the 1KHz setting here is a + // dont-care. + let mut timer2 = + dp.TIM2.timer(1.khz(), ccdr.peripheral.TIM2, &ccdr.clocks); + + // Configure the timer to count at the designed tick rate. We will manually set the + // period below. + timer2.pause(); + timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz()); + + let mut sampling_timer = timers::SamplingTimer::new(timer2); + sampling_timer.set_period(ADC_SAMPLE_TICKS - 1); + + sampling_timer + }; - let mut sampling_timer = timers::SamplingTimer::new(timer2); let sampling_timer_channels = sampling_timer.channels(); let mut timestamp_timer = { // The timer frequency is manually adjusted below, so the 1KHz setting here is a // dont-care. - let timer5 = + let mut timer5 = dp.TIM5.timer(1.khz(), ccdr.peripheral.TIM5, &ccdr.clocks); + // Configure the timer to count at the designed tick rate. We will manually set the + // period below. + timer5.pause(); + timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz()); + // The time stamp timer must run at exactly a multiple of the sample timer based on the // batch size. To accomodate this, we manually set the period identical to the sample // timer, but use a prescaler that is `BATCH_SIZE` longer. let mut timer = timers::TimestampTimer::new(timer5); - timer.set_period(sampling_timer.get_period()); - timer.set_prescaler( - sampling_timer.get_prescaler() * SAMPLE_BUFFER_SIZE as u16, - ); + + let period: u32 = { + let batch_duration: u64 = + SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; + let batches_per_overflow: u64 = + (1u64 + u32::MAX as u64) / batch_duration; + let period: u64 = batch_duration * batches_per_overflow - 1u64; + period.try_into().unwrap() + }; + + timer.set_period(period); timer }; @@ -667,9 +692,13 @@ const APP: () = { ); // Ensure that we have enough time for an IO-update every sample. - assert!( - 1.0 / (1000 * SAMPLE_FREQUENCY_KHZ) as f32 > 900_e-9 - ); + let sample_frequency = + (design_parameters::TIMER_FREQUENCY_MHZ as f32 + * 1_000_000.0) + / ADC_SAMPLE_TICKS as f32; + + let sample_period = 1.0 / sample_frequency; + assert!(sample_period > 900_e-9); hrtimer }; From 4da892b8a27c743ac59a2cdf1244145325db5830 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 15:15:51 +0100 Subject: [PATCH 10/44] Update src/digital_input_stamper.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Robert Jördens --- src/digital_input_stamper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index fe8c86b..eaf9683 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -9,7 +9,7 @@ ///! # Design ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is ///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0 -///! triggers, the current TIM5 capture value is recorded as a timestamp. This timestamp can be +///! triggers, the current TIM5 counter value is captured and recorded as a timestamp. This timestamp can be ///! either directly read from the timer channel or can be collected asynchronously via DMA ///! collection. ///! From 3886dab961a7b2bc41edc1a336af81485db490a7 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 15:15:58 +0100 Subject: [PATCH 11/44] Update src/digital_input_stamper.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Robert Jördens --- src/digital_input_stamper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index eaf9683..621264d 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -19,7 +19,7 @@ ///! checks to see how many timestamps were collected. These collected timestamps are then returned ///! for further processing. ///! -///! To prevent silently discarding timestamps, the TIm5 input capture over-capture interrupt is +///! To prevent silently discarding timestamps, the TIM5 input capture over-capture interrupt is ///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR ///! which handles the over-capture. ///! From 645a1cd83256ec650e69e2827c9a5987362bb457 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 16:14:27 +0100 Subject: [PATCH 12/44] Updating timestamper after testing --- src/digital_input_stamper.rs | 42 ++++++++++++++++++++++---------- src/main.rs | 12 +++------ src/timers.rs | 47 ++++++++++++++---------------------- 3 files changed, 51 insertions(+), 50 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 621264d..d4204f3 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -8,10 +8,10 @@ ///! ///! # Design ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is -///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0 -///! triggers, the current TIM5 counter value is captured and recorded as a timestamp. This timestamp can be -///! either directly read from the timer channel or can be collected asynchronously via DMA -///! collection. +///! then run in a free-running mode with a configured tick rate (PSC) and maximum count value +///! (ARR). Whenever an edge on DI0 triggers, the current TIM5 counter value is captured and +///! recorded as a timestamp. This timestamp can be either directly read from the timer channel or +///! can be collected asynchronously via DMA collection. ///! ///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many ///! timestamps as there are samples, but it is intended that this DMA transfer should never @@ -19,9 +19,9 @@ ///! checks to see how many timestamps were collected. These collected timestamps are then returned ///! for further processing. ///! -///! To prevent silently discarding timestamps, the TIM5 input capture over-capture interrupt is -///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR -///! which handles the over-capture. +///! To prevent silently discarding timestamps, the TIM5 input capture over-capture flag is +///! continually checked. Any over-capture event (which indicates an overwritten timestamp) then +///! triggers a panic to indicate the dropped timestamp so that design parameters can be adjusted. ///! ///! # Tradeoffs ///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they @@ -77,9 +77,6 @@ impl InputStamper { let input_capture = timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4); - // Listen for over-capture events, which indicates an over-run of DI0 timestamps. - input_capture.listen_overcapture(); - // For small batch sizes, the overhead of DMA can become burdensome to the point where // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach. @@ -89,7 +86,7 @@ impl InputStamper { // Set up the DMA transfer. let dma_config = DmaConfig::default().memory_increment(true); - let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = + let timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = Transfer::init( stream, input_capture, @@ -97,8 +94,6 @@ impl InputStamper { None, dma_config, ); - - timestamp_transfer.start(|_| {}); (Some(timestamp_transfer), None) } else { (None, Some(input_capture)) @@ -112,12 +107,29 @@ impl InputStamper { } } + /// Start capture timestamps on DI0. + pub fn start(&mut self) { + if let Some(transfer) = &mut self.transfer { + transfer.start(|capture_channel| { + capture_channel.enable(); + }); + } else { + self.capture_channel.as_mut().unwrap().enable(); + } + } + /// Get all of the timestamps that have occurred during the last processing cycle. pub fn acquire_buffer(&mut self) -> &[u32] { // If we are using DMA, finish the transfer and swap over buffers. if self.transfer.is_some() { let next_buffer = self.next_buffer.take().unwrap(); + self.transfer.as_mut().unwrap().pause(|channel| { + if channel.check_overcapture() { + panic!("DI0 timestamp overrun"); + } + }); + let (prev_buffer, _, remaining_transfers) = self .transfer .as_mut() @@ -132,6 +144,10 @@ impl InputStamper { // timestamps actually collected. &self.next_buffer.as_ref().unwrap()[..valid_count] } else { + if self.capture_channel.as_ref().unwrap().check_overcapture() { + panic!("DI0 timestamp overrun"); + } + // If we aren't using DMA, just manually check the input capture channel for a // timestamp. match self.capture_channel.as_mut().unwrap().latest_capture() { diff --git a/src/main.rs b/src/main.rs index 14505e8..a22fe1c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -58,10 +58,10 @@ use heapless::{consts::*, String}; // The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is // equal to 10ns per tick. -const ADC_SAMPLE_TICKS: u32 = 128; +const ADC_SAMPLE_TICKS: u32 = 256; // The desired ADC sample processing buffer size. -const SAMPLE_BUFFER_SIZE: usize = 8; +const SAMPLE_BUFFER_SIZE: usize = 1; // The number of cascaded IIR biquads per channel. Select 1 or 2! const IIR_CASCADE_LENGTH: usize = 1; @@ -831,7 +831,7 @@ const APP: () = { // Utilize the cycle counter for RTIC scheduling. cp.DWT.enable_cycle_counter(); - let input_stamper = { + let mut input_stamper = { let trigger = gpioa.pa3.into_alternate_af2(); digital_input_stamper::InputStamper::new( trigger, @@ -844,6 +844,7 @@ const APP: () = { // Start sampling ADCs. sampling_timer.start(); timestamp_timer.start(); + input_stamper.start(); init::LateResources { afes: (afe0, afe1), @@ -1085,11 +1086,6 @@ const APP: () = { panic!("DAC1 output error"); } - #[task(binds = TIM5, priority = 3)] - fn di0(_: di0::Context) { - panic!("DI0 timestamp overrun"); - } - extern "C" { // hw interrupt handlers for RTIC to use for scheduling tasks // one per priority diff --git a/src/timers.rs b/src/timers.rs index 74b4731..03bc0aa 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -32,21 +32,6 @@ macro_rules! timer_channels { self.channels.take().unwrap() } - /// Get the prescaler of a timer. - #[allow(dead_code)] - pub fn get_prescaler(&self) -> u16 { - let regs = unsafe { &*hal::stm32::$TY::ptr() }; - regs.psc.read().psc().bits() + 1 - } - - /// Manually set the prescaler of the timer. - #[allow(dead_code)] - pub fn set_prescaler(&mut self, prescaler: u16) { - let regs = unsafe { &*hal::stm32::$TY::ptr() }; - assert!(prescaler >= 1); - regs.psc.write(|w| w.psc().bits(prescaler - 1)); - } - /// Get the period of the timer. #[allow(dead_code)] pub fn get_period(&self) -> u32 { @@ -176,20 +161,6 @@ macro_rules! timer_channels { } } - /// Listen for over-capture events on the timer channel. - /// - /// # Note - /// An over-capture event is when a previous capture was lost due to a new capture. - /// - /// "Listening" is equivalent to enabling the interrupt for the event. - #[allow(dead_code)] - pub fn listen_overcapture(&self) { - // Note(unsafe): This channel owns all access to the specific timer channel. - // Only atomic operations on completed on the timer registers. - let regs = unsafe { &*<$TY>::ptr() }; - regs.dier.modify(|_, w| w.[]().set_bit()); - } - /// Allow the channel to generate DMA requests. #[allow(dead_code)] pub fn listen_dma(&self) { @@ -198,6 +169,24 @@ macro_rules! timer_channels { let regs = unsafe { &*<$TY>::ptr() }; regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); } + + /// Enable the input capture to begin capturing timer values. + #[allow(dead_code)] + pub fn enable(&mut self) { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.ccer.modify(|_, w| w.[< cc $index e >]().set_bit()); + } + + /// Check if an over-capture event has occurred. + #[allow(dead_code)] + pub fn check_overcapture(&self) -> bool { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.sr.read().[< cc $index of >]().bit_is_set() + } } // Note(unsafe): This manually implements DMA support for input-capture channels. This From 2e0681ebccc3c0c0cb8e3388b5fc4f0688e19d01 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 8 Dec 2020 16:38:07 +0100 Subject: [PATCH 13/44] Fixing power-of-two calculation --- src/main.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index a22fe1c..308a4f0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -325,7 +325,16 @@ const APP: () = { SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; let batches_per_overflow: u64 = (1u64 + u32::MAX as u64) / batch_duration; - let period: u64 = batch_duration * batches_per_overflow - 1u64; + + // Calculate the largest power-of-two that is less than `batches_per_overflow`. + // This is completed by eliminating the least significant bits of the value until + // only the msb remains, which is always a power of two. + let mut j = batches_per_overflow; + while (j & (j - 1)) != 0 { + j = j & (j - 1); + } + + let period: u64 = batch_duration * j - 1u64; period.try_into().unwrap() }; From fc81f3d55db88f954dd64225cb6bc96a84cf563c Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Tue, 15 Dec 2020 14:34:14 +0100 Subject: [PATCH 14/44] Removing DMA support from DI0 timestamping --- src/digital_input_stamper.rs | 130 ++++++----------------------------- src/main.rs | 4 +- 2 files changed, 23 insertions(+), 111 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index d4204f3..5843528 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -3,9 +3,6 @@ ///! This module provides a means of timestamping the rising edges of an external reference clock on ///! the DI0 with a timer value from TIM5. ///! -///! This module only supports input clocks on DI0 and may or may not utilize DMA to collect -///! timestamps. -///! ///! # Design ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is ///! then run in a free-running mode with a configured tick rate (PSC) and maximum count value @@ -13,12 +10,6 @@ ///! recorded as a timestamp. This timestamp can be either directly read from the timer channel or ///! can be collected asynchronously via DMA collection. ///! -///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many -///! timestamps as there are samples, but it is intended that this DMA transfer should never -///! complete. Instead, when all samples are collected, the module pauses the DMA transfer and -///! checks to see how many timestamps were collected. These collected timestamps are then returned -///! for further processing. -///! ///! To prevent silently discarding timestamps, the TIM5 input capture over-capture flag is ///! continually checked. Any over-capture event (which indicates an overwritten timestamp) then ///! triggers a panic to indicate the dropped timestamp so that design parameters can be adjusted. @@ -27,35 +18,18 @@ ///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they ///! are being prematurely stopped (such is the case here). As such, for a sample batch size of 1, ///! this can take up a significant amount of the total available processing time for the samples. -///! To avoid this, the module does not use DMA when the sample batch size is one. Instead, the -///! module manually checks for any captured timestamps from the timer capture channel manually. In -///! this mode, the maximum input clock frequency supported is equal to the configured sample rate. +///! This module checks for any captured timestamps from the timer capture channel manually. In +///! this mode, the maximum input clock frequency supported is dependant on the sampling rate and +///! batch size. ///! -///! There is a small window while the DMA buffers are swapped where a timestamp could potentially -///! be lost. To prevent this, the `acuire_buffer()` method should not be pre-empted. Any lost -///! timestamp will trigger an over-capture interrupt. -use super::{ - hal, timers, DmaConfig, PeripheralToMemory, Transfer, SAMPLE_BUFFER_SIZE, -}; - -// The DMA buffers must exist in a location where DMA can access. By default, RAM uses DTCM, which -// is off-limits to the normal DMA peripheral. Instead, we use AXISRAM. -#[link_section = ".axisram.buffers"] -static mut BUF: [[u32; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2]; +///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If +///! timestamping is desired in DI1, a separate timer + capture channel will be necessary. +use super::{hal, timers}; /// The timestamper for DI0 reference clock inputs. pub struct InputStamper { _di0_trigger: hal::gpio::gpioa::PA3>, - next_buffer: Option<&'static mut [u32; SAMPLE_BUFFER_SIZE]>, - transfer: Option< - Transfer< - hal::dma::dma::Stream6, - timers::tim5::Channel4InputCapture, - PeripheralToMemory, - &'static mut [u32; SAMPLE_BUFFER_SIZE], - >, - >, - capture_channel: Option, + capture_channel: timers::tim5::Channel4InputCapture, } impl InputStamper { @@ -63,100 +37,40 @@ impl InputStamper { /// /// # Args /// * `trigger` - The capture trigger input pin. - /// * `stream` - The DMA stream to use for collecting timestamps. /// * `timer_channel - The timer channel used for capturing timestamps. - /// * `batch_size` - The number of samples collected per processing batch. pub fn new( trigger: hal::gpio::gpioa::PA3>, - stream: hal::dma::dma::Stream6, timer_channel: timers::tim5::Channel4, - batch_size: usize, ) -> Self { // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the // capture source. let input_capture = timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4); - // For small batch sizes, the overhead of DMA can become burdensome to the point where - // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only - // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach. - let (transfer, input_capture) = if batch_size >= 2 { - input_capture.listen_dma(); - - // Set up the DMA transfer. - let dma_config = DmaConfig::default().memory_increment(true); - - let timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> = - Transfer::init( - stream, - input_capture, - unsafe { &mut BUF[0] }, - None, - dma_config, - ); - (Some(timestamp_transfer), None) - } else { - (None, Some(input_capture)) - }; - Self { - next_buffer: unsafe { Some(&mut BUF[1]) }, - transfer, capture_channel: input_capture, _di0_trigger: trigger, } } - /// Start capture timestamps on DI0. + /// Start to capture timestamps on DI0. pub fn start(&mut self) { - if let Some(transfer) = &mut self.transfer { - transfer.start(|capture_channel| { - capture_channel.enable(); - }); - } else { - self.capture_channel.as_mut().unwrap().enable(); - } + self.capture_channel.enable(); } - /// Get all of the timestamps that have occurred during the last processing cycle. - pub fn acquire_buffer(&mut self) -> &[u32] { - // If we are using DMA, finish the transfer and swap over buffers. - if self.transfer.is_some() { - let next_buffer = self.next_buffer.take().unwrap(); - - self.transfer.as_mut().unwrap().pause(|channel| { - if channel.check_overcapture() { - panic!("DI0 timestamp overrun"); - } - }); - - let (prev_buffer, _, remaining_transfers) = self - .transfer - .as_mut() - .unwrap() - .next_transfer(next_buffer) - .unwrap(); - let valid_count = prev_buffer.len() - remaining_transfers; - - self.next_buffer.replace(prev_buffer); - - // Note that we likely didn't finish the transfer, so only return the number of - // timestamps actually collected. - &self.next_buffer.as_ref().unwrap()[..valid_count] - } else { - if self.capture_channel.as_ref().unwrap().check_overcapture() { - panic!("DI0 timestamp overrun"); - } - - // If we aren't using DMA, just manually check the input capture channel for a - // timestamp. - match self.capture_channel.as_mut().unwrap().latest_capture() { - Some(stamp) => { - self.next_buffer.as_mut().unwrap()[0] = stamp; - &self.next_buffer.as_ref().unwrap()[..1] - } - None => &[], - } + /// Get the latest timestamp that has occurred. + /// + /// # Note + /// This function must be called sufficiently often. If an over-capture event occurs, this + /// function will panic, as this indicates a timestamp was inadvertently dropped. + /// + /// To prevent timestamp loss, the batch size and sampling rate must be adjusted such that at + /// most one timestamp will occur in each data processing cycle. + pub fn latest_timestamp(&mut self) -> Option { + if self.capture_channel.check_overcapture() { + panic!("DI0 timestamp overrun"); } + + self.capture_channel.latest_capture() } } diff --git a/src/main.rs b/src/main.rs index 308a4f0..5ae2b0f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -844,9 +844,7 @@ const APP: () = { let trigger = gpioa.pa3.into_alternate_af2(); digital_input_stamper::InputStamper::new( trigger, - dma_streams.6, timestamp_timer_channels.ch4, - SAMPLE_BUFFER_SIZE, ) }; @@ -882,7 +880,7 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; - let _timestamps = c.resources.input_stamper.acquire_buffer(); + let _timestamp = c.resources.input_stamper.latest_timestamp(); for channel in 0..adc_samples.len() { for sample in 0..adc_samples[0].len() { From e89db65722672bbbbc9178b5ba1214126854f17a Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 15:25:31 -0800 Subject: [PATCH 15/44] rename trig.rs -> cossin.rs --- dsp/src/{trig.rs => cossin.rs} | 0 dsp/src/lib.rs | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename dsp/src/{trig.rs => cossin.rs} (100%) diff --git a/dsp/src/trig.rs b/dsp/src/cossin.rs similarity index 100% rename from dsp/src/trig.rs rename to dsp/src/cossin.rs diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 6dd20f7..ef0c131 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -18,10 +18,11 @@ pub fn shift_round(x: i32, shift: usize) -> i32 { (x + (1 << (shift - 1))) >> shift } +pub mod atan2; +pub mod cossin; pub mod iir; pub mod lockin; pub mod pll; -pub mod trig; pub mod unwrap; #[cfg(test)] From 17f9f0750eee1ec0fff98d46720e8a45b8702148 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:01:50 -0800 Subject: [PATCH 16/44] dsp: move abs to lib.rs --- dsp/src/iir.rs | 12 +----------- dsp/src/lib.rs | 13 +++++++++++++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index c6f2100..48c92e9 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -2,23 +2,13 @@ use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; use core::f32; +use super::abs; // These are implemented here because core::f32 doesn't have them (yet). // They are naive and don't handle inf/nan. // `compiler-intrinsics`/llvm should have better (robust, universal, and // faster) implementations. -fn abs(x: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if x >= T::default() { - x - } else { - -x - } -} - fn copysign(x: T, y: T) -> T where T: PartialOrd + Default + Neg, diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index ef0c131..2fbd121 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,6 +1,8 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] +use core::ops::Neg; + pub type Complex = (T, T); /// Round up half. @@ -18,6 +20,17 @@ pub fn shift_round(x: i32, shift: usize) -> i32 { (x + (1 << (shift - 1))) >> shift } +fn abs(x: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if x >= T::default() { + x + } else { + -x + } +} + pub mod atan2; pub mod cossin; pub mod iir; From 6d651da758f44d0a8b6702cb4e364da3fad276fa Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:02:17 -0800 Subject: [PATCH 17/44] dsp: add f64 isclose testing function --- dsp/src/testing.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs index 1a8e109..098ec87 100644 --- a/dsp/src/testing.rs +++ b/dsp/src/testing.rs @@ -1,6 +1,10 @@ use super::Complex; -pub fn isclose(a: f32, b: f32, rtol: f32, atol: f32) -> bool { +pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool { + (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol +} + +pub fn isclosef(a: f32, b: f32, rtol: f32, atol: f32) -> bool { (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol } @@ -10,7 +14,7 @@ pub fn complex_isclose( rtol: f32, atol: f32, ) -> bool { - isclose(a.0, b.0, rtol, atol) && isclose(a.1, b.1, rtol, atol) + isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol) } pub fn complex_allclose( From 5d055b01a03f31b0950ce5e36c214a1fc6289a96 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:02:42 -0800 Subject: [PATCH 18/44] dsp: add atan2 --- dsp/src/atan2.rs | 126 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 dsp/src/atan2.rs diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs new file mode 100644 index 0000000..a5f4d3e --- /dev/null +++ b/dsp/src/atan2.rs @@ -0,0 +1,126 @@ +use super::{abs, shift_round}; + +/// 2-argument arctangent function. +/// +/// This implementation uses all integer arithmetic for fast +/// computation. It is designed to have high accuracy near the axes +/// and lower away from the axes. It is additionally designed so that +/// the error changes slowly with respect to the angle. +/// +/// # Arguments +/// +/// * `y` - Y-axis component. +/// * `x` - X-axis component. +/// +/// # Returns +/// +/// The angle between the x-axis and the ray to the point (x,y). The +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// corresponds to an angle of -pi and i32::MAX corresponds to an +/// angle of +pi. +pub fn atan2(y: i32, x: i32) -> i32 { + let y = y >> 16; + let x = x >> 16; + + let ux = abs::(x); + let uy = abs::(y); + + // Uses the general procedure described in the following + // Mathematics stack exchange answer: + // + // https://math.stackexchange.com/a/1105038/583981 + // + // The atan approximation method has been modified to be cheaper + // to compute and to be more compatible with integer + // arithmetic. The approximation technique used here is + // + // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // + // which is taken from Rajan 2006: Efficient Approximations for + // the Arctangent Function. + let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + + if max == 0 { + return 0; + } + + let ratio = (min << 15) / max; + + let mut angle = { + // pi/4, referenced to i16::MAX + const PI_4_FACTOR: i32 = 25735; + // 0.285, referenced to i16::MAX + const FACTOR_0285: i32 = 9339; + // 1/pi, referenced to u16::MAX + const PI_INVERTED_FACTOR: i32 = 20861; + + let r1 = shift_round(ratio * PI_4_FACTOR, 15); + let r2 = shift_round( + (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), + 15, + ); + (r1 + r2) * PI_INVERTED_FACTOR + }; + + if uy > ux { + angle = (i32::MAX >> 1) - angle; + } + + if x < 0 { + angle = i32::MAX - angle; + } + + if y < 0 { + angle *= -1; + } + + angle +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::consts::PI; + use crate::testing::isclose; + + fn angle_to_axis(angle: f64) -> f64 { + let angle = angle % (PI / 2.); + (PI / 2. - angle).min(angle) + } + + #[test] + fn absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } +} From e257545321cb27fd808c35f5c7d8ddedca632fbb Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:14:11 -0800 Subject: [PATCH 19/44] fix formatting --- dsp/src/atan2.rs | 2 +- dsp/src/iir.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs index a5f4d3e..2643d19 100644 --- a/dsp/src/atan2.rs +++ b/dsp/src/atan2.rs @@ -80,8 +80,8 @@ pub fn atan2(y: i32, x: i32) -> i32 { #[cfg(test)] mod tests { use super::*; - use core::f64::consts::PI; use crate::testing::isclose; + use core::f64::consts::PI; fn angle_to_axis(angle: f64) -> f64 { let angle = angle % (PI / 2.); diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 48c92e9..5ff0970 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,8 +1,8 @@ use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; -use core::f32; use super::abs; +use core::f32; // These are implemented here because core::f32 doesn't have them (yet). // They are naive and don't handle inf/nan. From 7c4f6082068d8ae3bda2896f29b00c7b4ab50ee4 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:26:44 -0800 Subject: [PATCH 20/44] move cossin and atan2 into the same trig file --- dsp/benches/cossin.rs | 2 +- dsp/src/atan2.rs | 126 --------------------------------- dsp/src/lib.rs | 3 +- dsp/src/{cossin.rs => trig.rs} | 123 +++++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 130 deletions(-) delete mode 100644 dsp/src/atan2.rs rename dsp/src/{cossin.rs => trig.rs} (57%) diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs index 4e23774..9f88e1b 100644 --- a/dsp/benches/cossin.rs +++ b/dsp/benches/cossin.rs @@ -1,6 +1,6 @@ use core::f32::consts::PI; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::trig::cossin; +use dsp::cossin::cossin; fn cossin_bench(c: &mut Criterion) { let zi = -0x7304_2531_i32; diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs deleted file mode 100644 index 2643d19..0000000 --- a/dsp/src/atan2.rs +++ /dev/null @@ -1,126 +0,0 @@ -use super::{abs, shift_round}; - -/// 2-argument arctangent function. -/// -/// This implementation uses all integer arithmetic for fast -/// computation. It is designed to have high accuracy near the axes -/// and lower away from the axes. It is additionally designed so that -/// the error changes slowly with respect to the angle. -/// -/// # Arguments -/// -/// * `y` - Y-axis component. -/// * `x` - X-axis component. -/// -/// # Returns -/// -/// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN to i32::MAX, where i32::MIN -/// corresponds to an angle of -pi and i32::MAX corresponds to an -/// angle of +pi. -pub fn atan2(y: i32, x: i32) -> i32 { - let y = y >> 16; - let x = x >> 16; - - let ux = abs::(x); - let uy = abs::(y); - - // Uses the general procedure described in the following - // Mathematics stack exchange answer: - // - // https://math.stackexchange.com/a/1105038/583981 - // - // The atan approximation method has been modified to be cheaper - // to compute and to be more compatible with integer - // arithmetic. The approximation technique used here is - // - // pi / 4 * x + 0.285 * x * (1 - abs(x)) - // - // which is taken from Rajan 2006: Efficient Approximations for - // the Arctangent Function. - let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; - - if max == 0 { - return 0; - } - - let ratio = (min << 15) / max; - - let mut angle = { - // pi/4, referenced to i16::MAX - const PI_4_FACTOR: i32 = 25735; - // 0.285, referenced to i16::MAX - const FACTOR_0285: i32 = 9339; - // 1/pi, referenced to u16::MAX - const PI_INVERTED_FACTOR: i32 = 20861; - - let r1 = shift_round(ratio * PI_4_FACTOR, 15); - let r2 = shift_round( - (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), - 15, - ); - (r1 + r2) * PI_INVERTED_FACTOR - }; - - if uy > ux { - angle = (i32::MAX >> 1) - angle; - } - - if x < 0 { - angle = i32::MAX - angle; - } - - if y < 0 { - angle *= -1; - } - - angle -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::testing::isclose; - use core::f64::consts::PI; - - fn angle_to_axis(angle: f64) -> f64 { - let angle = angle % (PI / 2.); - (PI / 2. - angle).min(angle) - } - - #[test] - fn absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; - } - - for &x in test_vals.iter() { - for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } - } - } -} diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 2fbd121..90f62f6 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -31,11 +31,10 @@ where } } -pub mod atan2; -pub mod cossin; pub mod iir; pub mod lockin; pub mod pll; +pub mod trig; pub mod unwrap; #[cfg(test)] diff --git a/dsp/src/cossin.rs b/dsp/src/trig.rs similarity index 57% rename from dsp/src/cossin.rs rename to dsp/src/trig.rs index 5a99232..72435e0 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/trig.rs @@ -1,8 +1,85 @@ -use super::Complex; +use super::{abs, shift_round, Complex}; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); +/// 2-argument arctangent function. +/// +/// This implementation uses all integer arithmetic for fast +/// computation. It is designed to have high accuracy near the axes +/// and lower away from the axes. It is additionally designed so that +/// the error changes slowly with respect to the angle. +/// +/// # Arguments +/// +/// * `y` - Y-axis component. +/// * `x` - X-axis component. +/// +/// # Returns +/// +/// The angle between the x-axis and the ray to the point (x,y). The +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// corresponds to an angle of -pi and i32::MAX corresponds to an +/// angle of +pi. +pub fn atan2(y: i32, x: i32) -> i32 { + let y = y >> 16; + let x = x >> 16; + + let ux = abs::(x); + let uy = abs::(y); + + // Uses the general procedure described in the following + // Mathematics stack exchange answer: + // + // https://math.stackexchange.com/a/1105038/583981 + // + // The atan approximation method has been modified to be cheaper + // to compute and to be more compatible with integer + // arithmetic. The approximation technique used here is + // + // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // + // which is taken from Rajan 2006: Efficient Approximations for + // the Arctangent Function. + let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + + if max == 0 { + return 0; + } + + let ratio = (min << 15) / max; + + let mut angle = { + // pi/4, referenced to i16::MAX + const PI_4_FACTOR: i32 = 25735; + // 0.285, referenced to i16::MAX + const FACTOR_0285: i32 = 9339; + // 1/pi, referenced to u16::MAX + const PI_INVERTED_FACTOR: i32 = 20861; + + let r1 = shift_round(ratio * PI_4_FACTOR, 15); + let r2 = shift_round( + (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), + 15, + ); + (r1 + r2) * PI_INVERTED_FACTOR + }; + + if uy > ux { + angle = (i32::MAX >> 1) - angle; + } + + if x < 0 { + angle = i32::MAX - angle; + } + + if y < 0 { + angle *= -1; + } + + angle +} + /// Compute the cosine and sine of an angle. /// This is ported from the MiSoC cossin core. /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py) @@ -75,6 +152,14 @@ pub fn cossin(phase: i32) -> Complex { #[cfg(test)] mod tests { use super::*; + use crate::testing::isclose; + use core::f64::consts::PI; + + fn angle_to_axis(angle: f64) -> f64 { + let angle = angle % (PI / 2.); + (PI / 2. - angle).min(angle) + } + #[test] fn error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. @@ -143,4 +228,40 @@ mod tests { assert!(max_err.0 < 1.1e-5); assert!(max_err.1 < 1.1e-5); } + + #[test] + fn absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } } From 85ae70fe6205ce25e6b80840c92aaf1e4221d2a5 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:28:49 -0800 Subject: [PATCH 21/44] rename trig tests to delineate between cossin and atan2 --- dsp/src/trig.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 72435e0..4dc26be 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -161,7 +161,7 @@ mod tests { } #[test] - fn error_max_rms_all_phase() { + fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64; const MAX_PHASE: f64 = (1i64 << 32) as f64; @@ -230,7 +230,7 @@ mod tests { } #[test] - fn absolute_error() { + fn atan2_absolute_error() { const NUM_VALS: usize = 1_001; let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; let val_bounds: (f64, f64) = (-1., 1.); From 2ddaab8fae34b4f01d2f2029eb18f3676e41ab56 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:57:18 -0800 Subject: [PATCH 22/44] dsp: fix bench import path --- dsp/benches/cossin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs index 9f88e1b..4e23774 100644 --- a/dsp/benches/cossin.rs +++ b/dsp/benches/cossin.rs @@ -1,6 +1,6 @@ use core::f32::consts::PI; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::cossin::cossin; +use dsp::trig::cossin; fn cossin_bench(c: &mut Criterion) { let zi = -0x7304_2531_i32; From d9d500743f41aa150c055263aedfdba36c4ffbd0 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 08:02:54 -0800 Subject: [PATCH 23/44] simplify atan initial angle expression --- dsp/src/trig.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 4dc26be..e306356 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -50,19 +50,13 @@ pub fn atan2(y: i32, x: i32) -> i32 { let ratio = (min << 15) / max; let mut angle = { - // pi/4, referenced to i16::MAX - const PI_4_FACTOR: i32 = 25735; - // 0.285, referenced to i16::MAX - const FACTOR_0285: i32 = 9339; - // 1/pi, referenced to u16::MAX - const PI_INVERTED_FACTOR: i32 = 20861; + const K1: i32 = + ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32; + const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32; - let r1 = shift_round(ratio * PI_4_FACTOR, 15); - let r2 = shift_round( - (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), - 15, - ); - (r1 + r2) * PI_INVERTED_FACTOR + let ratio_squared = shift_round(ratio * ratio, 15); + + ratio * K1 - K2 * ratio_squared }; if uy > ux { From d7111a3aa811deed34f01c0682ee6fada8978c61 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 08:04:53 -0800 Subject: [PATCH 24/44] dsp/trig: let compiler infer type parameter in atan2 abs call --- dsp/src/trig.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index e306356..51e8b2e 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -25,8 +25,8 @@ pub fn atan2(y: i32, x: i32) -> i32 { let y = y >> 16; let x = x >> 16; - let ux = abs::(x); - let uy = abs::(y); + let ux = abs(x); + let uy = abs(y); // Uses the general procedure described in the following // Mathematics stack exchange answer: From 5717991ada1847549b3154a20803f21c5688c3e5 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:31:18 -0800 Subject: [PATCH 25/44] atan2: result range is from i32::MIN+1 to i32::MAX --- dsp/src/trig.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 51e8b2e..57e4fec 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -18,7 +18,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// # Returns /// /// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1 /// corresponds to an angle of -pi and i32::MAX corresponds to an /// angle of +pi. pub fn atan2(y: i32, x: i32) -> i32 { From cb38c3e3bd3f28f32bfba3ed2abf048772af45a9 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:31:38 -0800 Subject: [PATCH 26/44] atan2: clarify sharing bits between atan argument and constant factors --- dsp/src/trig.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 57e4fec..d25d50b 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -47,16 +47,21 @@ pub fn atan2(y: i32, x: i32) -> i32 { return 0; } - let ratio = (min << 15) / max; + // We need to share the 31 available non-sign bits between the + // atan argument and constant factors used in the atan + // approximation. Sharing the bits roughly equally between them + // gives good accuracy. + const ATAN_ARGUMENT_BITS: usize = 15; + let ratio = (min << ATAN_ARGUMENT_BITS) / max; let mut angle = { - const K1: i32 = - ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32; - const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32; + const K1: i32 = ((1. / 4. + 0.285 / PI) + * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) + as i32; + const K2: i32 = + ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32; - let ratio_squared = shift_round(ratio * ratio, 15); - - ratio * K1 - K2 * ratio_squared + ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) }; if uy > ux { From 1f28949bc5d859144a6fc96cdf28cd6959dd2e29 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:47:39 -0800 Subject: [PATCH 27/44] atan2: store sign bits and greater of |x| and |y| --- dsp/src/trig.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index d25d50b..53aecfa 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -1,4 +1,4 @@ -use super::{abs, shift_round, Complex}; +use super::{shift_round, Complex}; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); @@ -22,11 +22,18 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// corresponds to an angle of -pi and i32::MAX corresponds to an /// angle of +pi. pub fn atan2(y: i32, x: i32) -> i32 { - let y = y >> 16; - let x = x >> 16; + let mut y = y >> 16; + let mut x = x >> 16; - let ux = abs(x); - let uy = abs(y); + let sign = ((y >> 14) & 2) | ((x >> 15) & 1); + if sign & 1 == 1 { + x *= -1; + } + if sign & 2 == 2 { + y *= -1; + } + + let y_greater = y > x; // Uses the general procedure described in the following // Mathematics stack exchange answer: @@ -41,7 +48,7 @@ pub fn atan2(y: i32, x: i32) -> i32 { // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + let (min, max) = if y_greater { (x, y) } else { (y, x) }; if max == 0 { return 0; @@ -64,15 +71,15 @@ pub fn atan2(y: i32, x: i32) -> i32 { ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) }; - if uy > ux { + if y_greater { angle = (i32::MAX >> 1) - angle; } - if x < 0 { + if sign & 1 == 1 { angle = i32::MAX - angle; } - if y < 0 { + if sign & 2 == 2 { angle *= -1; } From 56641d5838cba59fa55d82af2e0d495185b227cc Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:02:35 -0800 Subject: [PATCH 28/44] atan2: specify why we cannot use more than 15 bits for the atan argument --- dsp/src/trig.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 53aecfa..6d0acff 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -57,7 +57,9 @@ pub fn atan2(y: i32, x: i32) -> i32 { // We need to share the 31 available non-sign bits between the // atan argument and constant factors used in the atan // approximation. Sharing the bits roughly equally between them - // gives good accuracy. + // gives good accuracy. Additionally, we cannot increase the + // number of atan argument bits beyond 15 because we must square + // it. const ATAN_ARGUMENT_BITS: usize = 15; let ratio = (min << ATAN_ARGUMENT_BITS) / max; From 09a744f59c5771fa044acd1694809e4d055ef157 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:03:16 -0800 Subject: [PATCH 29/44] dsp: move iir generic math functions to top-level module scope --- dsp/src/iir.rs | 69 +------------------------------------------------- dsp/src/lib.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 69 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 5ff0970..dbec8d8 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,75 +1,8 @@ -use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; -use super::abs; +use super::{abs, copysign, macc, max, min}; use core::f32; -// These are implemented here because core::f32 doesn't have them (yet). -// They are naive and don't handle inf/nan. -// `compiler-intrinsics`/llvm should have better (robust, universal, and -// faster) implementations. - -fn copysign(x: T, y: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if (x >= T::default() && y >= T::default()) - || (x <= T::default() && y <= T::default()) - { - x - } else { - -x - } -} - -#[cfg(not(feature = "nightly"))] -fn max(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x > y { - x - } else { - y - } -} - -#[cfg(not(feature = "nightly"))] -fn min(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x < y { - x - } else { - y - } -} - -#[cfg(feature = "nightly")] -fn max(x: f32, y: f32) -> f32 { - core::intrinsics::maxnumf32(x, y) -} - -#[cfg(feature = "nightly")] -fn min(x: f32, y: f32) -> f32 { - core::intrinsics::minnumf32(x, y) -} - -// Multiply-accumulate vectors `x` and `a`. -// -// A.k.a. dot product. -// Rust/LLVM optimize this nicely. -fn macc(y0: T, x: &[T], a: &[T]) -> T -where - T: Add + Mul + Copy, -{ - x.iter() - .zip(a) - .map(|(x, a)| *x * *a) - .fold(y0, |y, xa| y + xa) -} - /// IIR state and coefficients type. /// /// To represent the IIR state (input and output memory) during the filter update diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 90f62f6..67b1882 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,7 +1,7 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] -use core::ops::Neg; +use core::ops::{Add, Mul, Neg}; pub type Complex = (T, T); @@ -31,6 +31,72 @@ where } } +// These are implemented here because core::f32 doesn't have them (yet). +// They are naive and don't handle inf/nan. +// `compiler-intrinsics`/llvm should have better (robust, universal, and +// faster) implementations. + +fn copysign(x: T, y: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if (x >= T::default() && y >= T::default()) + || (x <= T::default() && y <= T::default()) + { + x + } else { + -x + } +} + +#[cfg(not(feature = "nightly"))] +fn max(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x > y { + x + } else { + y + } +} + +#[cfg(not(feature = "nightly"))] +fn min(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x < y { + x + } else { + y + } +} + +#[cfg(feature = "nightly")] +fn max(x: f32, y: f32) -> f32 { + core::intrinsics::maxnumf32(x, y) +} + +#[cfg(feature = "nightly")] +fn min(x: f32, y: f32) -> f32 { + core::intrinsics::minnumf32(x, y) +} + +// Multiply-accumulate vectors `x` and `a`. +// +// A.k.a. dot product. +// Rust/LLVM optimize this nicely. +fn macc(y0: T, x: &[T], a: &[T]) -> T +where + T: Add + Mul + Copy, +{ + x.iter() + .zip(a) + .map(|(x, a)| *x * *a) + .fold(y0, |y, xa| y + xa) +} + pub mod iir; pub mod lockin; pub mod pll; From 6ffc42021edbcf34e4eabd2a3213b8d191e22e5e Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:09:12 -0800 Subject: [PATCH 30/44] move atan2 test before cossin test to mimic function order --- dsp/src/trig.rs | 72 ++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 6d0acff..ccf9292 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -168,6 +168,42 @@ mod tests { (PI / 2. - angle).min(angle) } + #[test] + fn atan2_absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } + #[test] fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. @@ -236,40 +272,4 @@ mod tests { assert!(max_err.0 < 1.1e-5); assert!(max_err.1 < 1.1e-5); } - - #[test] - fn atan2_absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; - } - - for &x in test_vals.iter() { - for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } - } - } } From 9c5e68ceea82c8f9096473cfb74d0ef930d34843 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 11:34:39 -0800 Subject: [PATCH 31/44] atan2: test min and max angle inputs --- dsp/src/trig.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index ccf9292..5d73846 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -179,10 +179,10 @@ mod tests { test_vals[i] = val_bounds.0 + i as f64 * val_delta; } + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; for &x in test_vals.iter() { for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() / i16::MAX as f64; let tol = atol + rtol * angle_to_axis(actual).abs(); @@ -202,6 +202,29 @@ mod tests { } } } + + // test min and max explicitly + for (x, y) in [ + ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32), + ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32), + ] + .iter() + { + let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64; + let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64; + let actual = + (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64; + let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI; + let tol = atol + rtol * angle_to_axis(actual).abs(); + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", *x, *y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } } #[test] From 17cf71f22bc3978cb50f21c50367f108aa8f9ee9 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 11:39:32 -0800 Subject: [PATCH 32/44] atan2: replace min, max with x, y --- dsp/src/trig.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 5d73846..13ce844 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -48,9 +48,11 @@ pub fn atan2(y: i32, x: i32) -> i32 { // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - let (min, max) = if y_greater { (x, y) } else { (y, x) }; + if y_greater { + core::mem::swap(&mut x, &mut y); + } - if max == 0 { + if x == 0 { return 0; } @@ -61,7 +63,7 @@ pub fn atan2(y: i32, x: i32) -> i32 { // number of atan argument bits beyond 15 because we must square // it. const ATAN_ARGUMENT_BITS: usize = 15; - let ratio = (min << ATAN_ARGUMENT_BITS) / max; + let ratio = (y << ATAN_ARGUMENT_BITS) / x; let mut angle = { const K1: i32 = ((1. / 4. + 0.285 / PI) From 3125365a1580731d104d74f3023f7c8fb5e1ff9e Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 14:01:57 -0800 Subject: [PATCH 33/44] add atan2 host benchmark --- dsp/Cargo.toml | 2 +- dsp/benches/cossin.rs | 13 ------------- dsp/benches/trig.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 14 deletions(-) delete mode 100644 dsp/benches/cossin.rs create mode 100644 dsp/benches/trig.rs diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml index 8313a49..548e64f 100644 --- a/dsp/Cargo.toml +++ b/dsp/Cargo.toml @@ -12,7 +12,7 @@ serde = { version = "1.0", features = ["derive"], default-features = false } criterion = "0.3" [[bench]] -name = "cossin" +name = "trig" harness = false [features] diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs deleted file mode 100644 index 4e23774..0000000 --- a/dsp/benches/cossin.rs +++ /dev/null @@ -1,13 +0,0 @@ -use core::f32::consts::PI; -use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::trig::cossin; - -fn cossin_bench(c: &mut Criterion) { - let zi = -0x7304_2531_i32; - let zf = zi as f32 / i32::MAX as f32 * PI; - c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); - c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); -} - -criterion_group!(benches, cossin_bench); -criterion_main!(benches); diff --git a/dsp/benches/trig.rs b/dsp/benches/trig.rs new file mode 100644 index 0000000..19b6cce --- /dev/null +++ b/dsp/benches/trig.rs @@ -0,0 +1,28 @@ +use core::f32::consts::PI; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use dsp::trig::{atan2, cossin}; + +fn atan2_bench(c: &mut Criterion) { + let xi = (10 << 16) as i32; + let xf = xi as f32 / i32::MAX as f32; + + let yi = (-26_328 << 16) as i32; + let yf = yi as f32 / i32::MAX as f32; + + c.bench_function("atan2(y, x)", |b| { + b.iter(|| atan2(black_box(yi), black_box(xi))) + }); + c.bench_function("y.atan2(x)", |b| { + b.iter(|| black_box(yf).atan2(black_box(xf))) + }); +} + +fn cossin_bench(c: &mut Criterion) { + let zi = -0x7304_2531_i32; + let zf = zi as f32 / i32::MAX as f32 * PI; + c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); + c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); +} + +criterion_group!(benches, atan2_bench, cossin_bench); +criterion_main!(benches); From 7e794373f45c942cd3f108b32afe743cf52cf777 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 14:21:39 -0800 Subject: [PATCH 34/44] atan2: fix output range description --- dsp/src/trig.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 13ce844..9873d7e 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -18,9 +18,9 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// # Returns /// /// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1 -/// corresponds to an angle of -pi and i32::MAX corresponds to an -/// angle of +pi. +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// represents -pi and, equivalently, +pi. i32::MAX represents one +/// count less than +pi. pub fn atan2(y: i32, x: i32) -> i32 { let mut y = y >> 16; let mut x = x >> 16; From 12d5945d811062cfa06b83cc4d51b8962d0646a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Fri, 18 Dec 2020 15:46:21 +0100 Subject: [PATCH 35/44] dsp/testing: simplify --- dsp/src/testing.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs index 098ec87..4a14f22 100644 --- a/dsp/src/testing.rs +++ b/dsp/src/testing.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] use super::Complex; pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool { @@ -23,9 +24,7 @@ pub fn complex_allclose( rtol: f32, atol: f32, ) -> bool { - let mut result: bool = true; - a.iter().zip(b.iter()).for_each(|(i, j)| { - result &= complex_isclose(*i, *j, rtol, atol); - }); - result + a.iter() + .zip(b) + .all(|(&i, &j)| complex_isclose(i, j, rtol, atol)) } From 8d9af70c19d2606a99b68a70cb03b2834f152f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sun, 20 Dec 2020 20:35:26 +0100 Subject: [PATCH 36/44] trig/atan2: refine * use dynamic scaling of the inputs to get accurate ratios (effectively floating point) to maintain accuracy for small arguments * this also allows shifting later and keep more bits * use u32 ratio to keep one more bit * merge the corner case unittests into the big test value list * print rms, absolute and axis-relative angle * simplify the correction expression to get rid of one multiplication * use 5 bit for the correction constant and 15 bits for r * least squares optimal correction constant, this lowers the max error below 5e-5 --- dsp/src/trig.rs | 154 +++++++++++++++++++++--------------------------- 1 file changed, 66 insertions(+), 88 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 9873d7e..3f96609 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -1,4 +1,4 @@ -use super::{shift_round, Complex}; +use super::Complex; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); @@ -22,18 +22,25 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// represents -pi and, equivalently, +pi. i32::MAX represents one /// count less than +pi. pub fn atan2(y: i32, x: i32) -> i32 { - let mut y = y >> 16; - let mut x = x >> 16; + let sign = (x < 0, y < 0); - let sign = ((y >> 14) & 2) | ((x >> 15) & 1); - if sign & 1 == 1 { - x *= -1; - } - if sign & 2 == 2 { - y *= -1; - } + let mut y = y.wrapping_abs() as u32; + let mut x = x.wrapping_abs() as u32; let y_greater = y > x; + if y_greater { + core::mem::swap(&mut y, &mut x); + } + + let z = (16 - y.leading_zeros() as i32).max(0); + + x >>= z; + if x == 0 { + return 0; + } + y >>= z; + let r = (y << 16) / x; + debug_assert!(r <= 1 << 16); // Uses the general procedure described in the following // Mathematics stack exchange answer: @@ -44,47 +51,37 @@ pub fn atan2(y: i32, x: i32) -> i32 { // to compute and to be more compatible with integer // arithmetic. The approximation technique used here is // - // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // pi / 4 * r + C * r * (1 - abs(r)) // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - if y_greater { - core::mem::swap(&mut x, &mut y); - } + // + // The least mean squared error solution is C = 0.279 (no the 0.285 that + // Rajan uses). K = C*4/pi. + // Q5 for K provides sufficient correction accuracy while preserving + // as much smoothness of the quadratic correction as possible. + const FP_K: usize = 5; + const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32; + // debug_assert!(K == 11); - if x == 0 { - return 0; - } - - // We need to share the 31 available non-sign bits between the - // atan argument and constant factors used in the atan - // approximation. Sharing the bits roughly equally between them - // gives good accuracy. Additionally, we cannot increase the - // number of atan argument bits beyond 15 because we must square - // it. - const ATAN_ARGUMENT_BITS: usize = 15; - let ratio = (y << ATAN_ARGUMENT_BITS) / x; - - let mut angle = { - const K1: i32 = ((1. / 4. + 0.285 / PI) - * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) - as i32; - const K2: i32 = - ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32; - - ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) - }; + // `r` is unsigned Q16.16 and <= 1 + // `angle` is signed Q1.31 with 1 << 31 == +- pi + // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use + // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range. + let mut angle = ((r << 13) + + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1))) + as i32; if y_greater { - angle = (i32::MAX >> 1) - angle; + angle = (1 << 30) - angle; } - if sign & 1 == 1 { + if sign.0 { angle = i32::MAX - angle; } - if sign & 2 == 2 { - angle *= -1; + if sign.1 { + angle = angle.wrapping_neg(); } angle @@ -162,7 +159,6 @@ pub fn cossin(phase: i32) -> Complex { #[cfg(test)] mod tests { use super::*; - use crate::testing::isclose; use core::f64::consts::PI; fn angle_to_axis(angle: f64) -> f64 { @@ -172,61 +168,43 @@ mod tests { #[test] fn atan2_absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; + const N: usize = 321; + let mut test_vals = [0i32; N + 4]; + let scale = (1i64 << 31) as f64; + for i in 0..N { + test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32; } - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; + assert!(test_vals.contains(&i32::MIN)); + test_vals[N] = i32::MAX; + test_vals[N + 1] = 0; + test_vals[N + 2] = -1; + test_vals[N + 3] = 1; + + let mut rms_err = 0f64; + let mut abs_err = 0f64; + let mut rel_err = 0f64; + for &x in test_vals.iter() { for &y in test_vals.iter() { - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; + let want = (y as f64 / scale).atan2(x as f64 / scale); + let have = atan2(y, x) as f64 * PI / scale; - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); + let err = (have - want).abs(); + abs_err = abs_err.max(err); + rms_err += err * err; + if err > 3e-5 { + rel_err = rel_err.max(err / angle_to_axis(want)); } } } - - // test min and max explicitly - for (x, y) in [ - ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32), - ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32), - ] - .iter() - { - let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64; - let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64; - let actual = - (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64; - let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI; - let tol = atol + rtol * angle_to_axis(actual).abs(); - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", *x, *y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } + rms_err = rms_err.sqrt() / test_vals.len() as f64; + println!("max abs err: {:.2e}", abs_err); + println!("rms abs err: {:.2e}", rms_err); + println!("max rel err: {:.2e}", rel_err); + assert!(abs_err < 5e-3); + assert!(rms_err < 3e-3); + assert!(rel_err < 0.6); } #[test] From cc42c0c477c03293994692ff8f072c643b1f2f11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 22 Dec 2020 16:49:12 +0100 Subject: [PATCH 37/44] iir_int: add optimized integer iir implementation --- dsp/src/iir_int.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++ dsp/src/lib.rs | 1 + 2 files changed, 59 insertions(+) create mode 100644 dsp/src/iir_int.rs diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs new file mode 100644 index 0000000..1a4a6a9 --- /dev/null +++ b/dsp/src/iir_int.rs @@ -0,0 +1,58 @@ +use serde::{Deserialize, Serialize}; + +pub type IIRState = [i32; 5]; + +fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { + // Rounding bias, half up + let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); + let y = x + .iter() + .zip(a) + .map(|(x, a)| *x as i64 * *a as i64) + .fold(y0, |y, xa| y + xa); + (y >> shift) as i32 +} + +/// Integer biquad IIR +/// +/// See `dsp::iir::IIR` for general implementation details. +/// Offset and limiting disabled to suit lowpass applications. +/// Coefficient scaling fixed and optimized. +#[derive(Copy, Clone, Deserialize, Serialize)] +pub struct IIR { + pub ba: IIRState, + // pub y_offset: i32, + // pub y_min: i32, + // pub y_max: i32, +} + +impl IIR { + /// Coefficient fixed point: signed Q2.30. + /// Tailored to low-passes PI, II etc. + const SHIFT: u32 = 30; + + /// Feed a new input value into the filter, update the filter state, and + /// return the new output. Only the state `xy` is modified. + /// + /// # Arguments + /// * `xy` - Current filter state. + /// * `x0` - New input. + pub fn update(&self, xy: &mut IIRState, x0: i32) -> i32 { + let n = self.ba.len(); + debug_assert!(xy.len() == n); + // `xy` contains x0 x1 y0 y1 y2 + // Increment time x1 x2 y1 y2 y3 + // Shift x1 x1 x2 y1 y2 + // This unrolls better than xy.rotate_right(1) + xy.copy_within(0..n - 1, 1); + // Store x0 x0 x1 x2 y1 y2 + xy[0] = x0; + // Compute y0 by multiply-accumulate + let y0 = macc(0, xy, &self.ba, IIR::SHIFT); + // Limit y0 + // let y0 = y0.max(self.y_min).min(self.y_max); + // Store y0 x0 x1 y0 y1 y2 + xy[n / 2] = y0; + y0 + } +} diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 67b1882..fb189fa 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -98,6 +98,7 @@ where } pub mod iir; +pub mod iir_int; pub mod lockin; pub mod pll; pub mod trig; From 67b6990fc027d5e575d1cb1476405f9cd7dc6e2d Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 4 Jan 2021 17:12:24 +0100 Subject: [PATCH 38/44] Addressing PR review --- Cargo.lock | 2 +- Cargo.toml | 4 ++-- openocd.gdb | 3 +++ src/design_parameters.rs | 6 +++-- src/digital_input_stamper.rs | 38 +++++++++++++++++++++++++++++-- src/main.rs | 43 ++++++++++-------------------------- src/timers.rs | 4 ++-- 7 files changed, 60 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index edc2864..f7082af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -517,7 +517,7 @@ dependencies = [ [[package]] name = "stm32h7xx-hal" version = "0.8.0" -source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/number-of-transfers#e70a78788e74be5281321213b53e8cd1d213550e" +source = "git+https://github.com/stm32-rs/stm32h7xx-hal?branch=dma#25ee0f3a9ae27d1fd6bb390d6045aa312f29f096" dependencies = [ "bare-metal 1.0.0", "cast", diff --git a/Cargo.toml b/Cargo.toml index f1acbe0..7217589 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,8 +53,8 @@ default-features = false [dependencies.stm32h7xx-hal] features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"] -git = "https://github.com/quartiq/stm32h7xx-hal" -branch = "feature/number-of-transfers" +git = "https://github.com/stm32-rs/stm32h7xx-hal" +branch = "dma" [features] semihosting = ["panic-semihosting", "cortex-m-log/semihosting"] diff --git a/openocd.gdb b/openocd.gdb index e903a33..a96f8d4 100644 --- a/openocd.gdb +++ b/openocd.gdb @@ -18,6 +18,9 @@ load # tbreak cortex_m_rt::reset_handler monitor reset halt +source ../../PyCortexMDebug/cmdebug/svd_gdb.py +svd_load ~/Downloads/STM32H743x.svd + # cycle counter delta tool, place two bkpts around the section set var $cc=0xe0001004 define qq diff --git a/src/design_parameters.rs b/src/design_parameters.rs index 414a9e2..40be7b6 100644 --- a/src/design_parameters.rs +++ b/src/design_parameters.rs @@ -1,9 +1,11 @@ +use super::hal::time::MegaHertz; + /// The ADC setup time is the number of seconds after the CSn line goes low before the serial clock /// may begin. This is used for performing the internal ADC conversion. pub const ADC_SETUP_TIME: f32 = 220e-9; /// The maximum DAC/ADC serial clock line frequency. This is a hardware limit. -pub const ADC_DAC_SCK_MHZ_MAX: u32 = 50; +pub const ADC_DAC_SCK_MAX: MegaHertz = MegaHertz(50); /// The optimal counting frequency of the hardware timers used for timestamping and sampling. -pub const TIMER_FREQUENCY_MHZ: u32 = 100; +pub const TIMER_FREQUENCY: MegaHertz = MegaHertz(100); diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 5843528..43ff9c5 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -24,7 +24,41 @@ ///! ///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If ///! timestamping is desired in DI1, a separate timer + capture channel will be necessary. -use super::{hal, timers}; +use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS}; + +/// Calculate the period of the digital input timestampe timer. +/// +/// # Note +/// The period returned will be 1 less than the required period in timer ticks. The value returned +/// can be immediately programmed into a hardware timer period register. +/// +/// The period is calcualted to be some power-of-two multiple of the batch size, such that N batches +/// will occur between each timestamp timer overflow. +/// +/// # Returns +/// A 32-bit value that can be programmed into a hardware timer period register. +pub fn calculate_timestamp_timer_period() -> u32 { + // Calculate how long a single batch requires in timer ticks. + let batch_duration_ticks: u64 = SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; + + // Calculate the largest power-of-two that is less than or equal to + // `batches_per_overflow`. This is completed by eliminating the least significant + // bits of the value until only the msb remains, which is always a power of two. + let batches_per_overflow: u64 = + (1u64 + u32::MAX as u64) / batch_duration_ticks; + let mut j = batches_per_overflow; + while (j & (j - 1)) != 0 { + j = j & (j - 1); + } + + // Once the number of batches per timestamp overflow is calculated, we can figure out the final + // period of the timestamp timer. The period is always 1 larger than the value configured in the + // register. + let period: u64 = batch_duration_ticks * j - 1u64; + assert!(period < u32::MAX as u64); + + period as u32 +} /// The timestamper for DI0 reference clock inputs. pub struct InputStamper { @@ -45,7 +79,7 @@ impl InputStamper { // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the // capture source. let input_capture = - timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4); + timer_channel.into_input_capture(timers::tim5::CC4S_A::TI4); Self { capture_channel: input_capture, diff --git a/src/main.rs b/src/main.rs index 5ae2b0f..52c5650 100644 --- a/src/main.rs +++ b/src/main.rs @@ -30,8 +30,6 @@ extern crate panic_halt; #[macro_use] extern crate log; -use core::convert::TryInto; - // use core::sync::atomic::{AtomicU32, AtomicBool, Ordering}; use cortex_m_rt::exception; use rtic::cyccnt::{Instant, U32Ext}; @@ -294,10 +292,10 @@ const APP: () = { // Configure the timer to count at the designed tick rate. We will manually set the // period below. timer2.pause(); - timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz()); + timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY); let mut sampling_timer = timers::SamplingTimer::new(timer2); - sampling_timer.set_period(ADC_SAMPLE_TICKS - 1); + sampling_timer.set_period_ticks(ADC_SAMPLE_TICKS - 1); sampling_timer }; @@ -313,32 +311,15 @@ const APP: () = { // Configure the timer to count at the designed tick rate. We will manually set the // period below. timer5.pause(); - timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz()); + timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY); // The time stamp timer must run at exactly a multiple of the sample timer based on the - // batch size. To accomodate this, we manually set the period identical to the sample - // timer, but use a prescaler that is `BATCH_SIZE` longer. + // batch size. To accomodate this, we manually set the prescaler identical to the sample + // timer, but use a period that is longer. let mut timer = timers::TimestampTimer::new(timer5); - let period: u32 = { - let batch_duration: u64 = - SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; - let batches_per_overflow: u64 = - (1u64 + u32::MAX as u64) / batch_duration; - - // Calculate the largest power-of-two that is less than `batches_per_overflow`. - // This is completed by eliminating the least significant bits of the value until - // only the msb remains, which is always a power of two. - let mut j = batches_per_overflow; - while (j & (j - 1)) != 0 { - j = j & (j - 1); - } - - let period: u64 = batch_duration * j - 1u64; - period.try_into().unwrap() - }; - - timer.set_period(period); + let period = digital_input_stamper::calculate_timestamp_timer_period(); + timer.set_period_ticks(period); timer }; @@ -372,7 +353,7 @@ const APP: () = { let spi: hal::spi::Spi<_, _, u16> = dp.SPI2.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI2, &ccdr.clocks, ); @@ -410,7 +391,7 @@ const APP: () = { let spi: hal::spi::Spi<_, _, u16> = dp.SPI3.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI3, &ccdr.clocks, ); @@ -460,7 +441,7 @@ const APP: () = { dp.SPI4.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI4, &ccdr.clocks, ) @@ -492,7 +473,7 @@ const APP: () = { dp.SPI5.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI5, &ccdr.clocks, ) @@ -702,7 +683,7 @@ const APP: () = { // Ensure that we have enough time for an IO-update every sample. let sample_frequency = - (design_parameters::TIMER_FREQUENCY_MHZ as f32 + (design_parameters::TIMER_FREQUENCY.0 as f32 * 1_000_000.0) / ADC_SAMPLE_TICKS as f32; diff --git a/src/timers.rs b/src/timers.rs index 03bc0aa..8afa5cd 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -41,7 +41,7 @@ macro_rules! timer_channels { /// Manually set the period of the timer. #[allow(dead_code)] - pub fn set_period(&mut self, period: u32) { + pub fn set_period_ticks(&mut self, period: u32) { let regs = unsafe { &*hal::stm32::$TY::ptr() }; regs.arr.write(|w| w.arr().bits(period)); } @@ -136,7 +136,7 @@ macro_rules! timer_channels { /// # Args /// * `input` - The input source for the input capture event. #[allow(dead_code)] - pub fn to_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ + pub fn into_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ let regs = unsafe { &*<$TY>::ptr() }; regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input)); From 7ecd08d86bfca28725f8a22ff02f960032b7103e Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 4 Jan 2021 18:04:01 +0100 Subject: [PATCH 39/44] More updates after PR review --- src/design_parameters.rs | 15 +++++++++++++++ src/digital_input_stamper.rs | 13 ++++++------- src/main.rs | 35 ++++++++++++++++------------------- src/timers.rs | 12 ++++++++++-- 4 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/design_parameters.rs b/src/design_parameters.rs index 40be7b6..125e133 100644 --- a/src/design_parameters.rs +++ b/src/design_parameters.rs @@ -9,3 +9,18 @@ pub const ADC_DAC_SCK_MAX: MegaHertz = MegaHertz(50); /// The optimal counting frequency of the hardware timers used for timestamping and sampling. pub const TIMER_FREQUENCY: MegaHertz = MegaHertz(100); + +/// The QSPI frequency for communicating with the pounder DDS. +pub const POUNDER_QSPI_FREQUENCY: MegaHertz = MegaHertz(40); + +/// The delay after initiating a QSPI transfer before asserting the IO_Update for the pounder DDS. +// Pounder Profile writes are always 16 bytes, with 2 cycles required per byte, coming out to a +// total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so this comes out to an offset +// of 800nS. We use 900ns to be safe. +pub const POUNDER_IO_UPDATE_DELAY: f32 = 900_e-9; + +/// The duration to assert IO_Update for the pounder DDS. +// IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile write. With pounder +// SYNC_CLK running at 100MHz (1/4 of the pounder reference clock of 400MHz), this corresponds to +// 40ns. To accomodate rounding errors, we use 50ns instead. +pub const POUNDER_IO_UPDATE_DURATION: f32 = 50_e-9; diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 43ff9c5..85f248f 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -24,7 +24,7 @@ ///! ///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If ///! timestamping is desired in DI1, a separate timer + capture channel will be necessary. -use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS}; +use super::{hal, timers, ADC_SAMPLE_TICKS, SAMPLE_BUFFER_SIZE}; /// Calculate the period of the digital input timestampe timer. /// @@ -39,7 +39,8 @@ use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS}; /// A 32-bit value that can be programmed into a hardware timer period register. pub fn calculate_timestamp_timer_period() -> u32 { // Calculate how long a single batch requires in timer ticks. - let batch_duration_ticks: u64 = SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; + let batch_duration_ticks: u64 = + SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; // Calculate the largest power-of-two that is less than or equal to // `batches_per_overflow`. This is completed by eliminating the least significant @@ -101,10 +102,8 @@ impl InputStamper { /// To prevent timestamp loss, the batch size and sampling rate must be adjusted such that at /// most one timestamp will occur in each data processing cycle. pub fn latest_timestamp(&mut self) -> Option { - if self.capture_channel.check_overcapture() { - panic!("DI0 timestamp overrun"); - } - - self.capture_channel.latest_capture() + self.capture_channel + .latest_capture() + .expect("DI0 timestamp overrun") } } diff --git a/src/main.rs b/src/main.rs index 52c5650..ede498f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -318,7 +318,8 @@ const APP: () = { // timer, but use a period that is longer. let mut timer = timers::TimestampTimer::new(timer5); - let period = digital_input_stamper::calculate_timestamp_timer_period(); + let period = + digital_input_stamper::calculate_timestamp_timer_period(); timer.set_period_ticks(period); timer @@ -543,7 +544,7 @@ const APP: () = { let qspi = hal::qspi::Qspi::bank2( dp.QUADSPI, qspi_pins, - 40.mhz(), + design_parameters::POUNDER_QSPI_FREQUENCY, &ccdr.clocks, ccdr.peripheral.QSPI, ); @@ -665,30 +666,26 @@ const APP: () = { ccdr.peripheral.HRTIM, ); - // IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile - // write. With pounder SYNC_CLK running at 100MHz (1/4 of the pounder reference - // clock of 400MHz), this corresponds to 40ns. To accomodate rounding errors, we - // use 50ns instead. - // - // Profile writes are always 16 bytes, with 2 cycles required per byte, coming - // out to a total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so - // this comes out to an offset of 800nS. We use 900ns to be safe - note that the - // timer is triggered after the QSPI write, which can take approximately 120nS, - // so there is additional margin. + // IO_Update occurs after a fixed delay from the QSPI write. Note that the timer + // is triggered after the QSPI write, which can take approximately 120nS, so + // there is additional margin. hrtimer.configure_single_shot( hrtimer::Channel::Two, - 50_e-9, - 900_e-9, + design_parameters::POUNDER_IO_UPDATE_DURATION, + design_parameters::POUNDER_IO_UPDATE_DELAY, ); // Ensure that we have enough time for an IO-update every sample. - let sample_frequency = - (design_parameters::TIMER_FREQUENCY.0 as f32 - * 1_000_000.0) - / ADC_SAMPLE_TICKS as f32; + let sample_frequency = (design_parameters::TIMER_FREQUENCY.0 + as f32 + * 1_000_000.0) + / ADC_SAMPLE_TICKS as f32; let sample_period = 1.0 / sample_frequency; - assert!(sample_period > 900_e-9); + assert!( + sample_period + > design_parameters::POUNDER_IO_UPDATE_DELAY + ); hrtimer }; diff --git a/src/timers.rs b/src/timers.rs index 8afa5cd..5ffbeaf 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -147,17 +147,25 @@ macro_rules! timer_channels { impl [< Channel $index InputCapture >] { /// Get the latest capture from the channel. #[allow(dead_code)] - pub fn latest_capture(&mut self) -> Option { + pub fn latest_capture(&mut self) -> Result, ()> { // Note(unsafe): This channel owns all access to the specific timer channel. // Only atomic operations on completed on the timer registers. let regs = unsafe { &*<$TY>::ptr() }; let sr = regs.sr.read(); let ccx = regs.[< ccr $index >].read(); - if sr.[< cc $index if >]().bit_is_set() { + + let result = if sr.[< cc $index if >]().bit_is_set() { regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit()); Some(ccx.ccr().bits()) } else { None + }; + + // If there is an overcapture, return an error. + if sr.[< cc $index of >]().bit_is_clear() { + Ok(result) + } else { + Err(()) } } From 2b6e6f59a4e79195b1337cc51591cb64a4216223 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Mon, 4 Jan 2021 18:09:16 +0100 Subject: [PATCH 40/44] Adding comment about sample rate --- src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.rs b/src/main.rs index ede498f..5979d90 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,6 +56,7 @@ use heapless::{consts::*, String}; // The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is // equal to 10ns per tick. +// Currently, the sample rate is equal to: Fsample = 100/256 MHz = 390.625 KHz const ADC_SAMPLE_TICKS: u32 = 256; // The desired ADC sample processing buffer size. From 13543ce048c9b0c8f63bfc3a15a9f7e48bc9c24a Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Mon, 4 Jan 2021 11:14:27 -0800 Subject: [PATCH 41/44] pll update input is named "x" not "input" --- dsp/src/pll.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/src/pll.rs b/dsp/src/pll.rs index 74377f3..8df750f 100644 --- a/dsp/src/pll.rs +++ b/dsp/src/pll.rs @@ -45,7 +45,7 @@ impl PLL { /// The signal's phase/frequency is reconstructed relative to the sampling period. /// /// Args: - /// * `input`: New input phase sample. + /// * `x`: New input phase sample. /// * `shift_frequency`: Frequency error scaling. The frequency gain per update is /// `1/(1 << shift_frequency)`. /// * `shift_phase`: Phase error scaling. The phase gain is `1/(1 << shift_phase)` From a3cd17fd70031549fcf891208c4bd29914b72421 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Mon, 4 Jan 2021 16:37:46 -0800 Subject: [PATCH 42/44] pin clippy to stable --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0ea705..e8e0e4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,7 @@ jobs: - uses: actions-rs/clippy-check@v1 continue-on-error: true with: + toolchain: stable token: ${{ secrets.GITHUB_TOKEN }} compile: From 9e7bfd4371d50323287044142d7fd2a4ec4d96c4 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Wed, 6 Jan 2021 12:24:09 +0100 Subject: [PATCH 43/44] Adding updates after review --- src/digital_input_stamper.rs | 2 +- src/main.rs | 9 +++++---- src/timers.rs | 11 +++++++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs index 85f248f..910ae98 100644 --- a/src/digital_input_stamper.rs +++ b/src/digital_input_stamper.rs @@ -56,7 +56,7 @@ pub fn calculate_timestamp_timer_period() -> u32 { // period of the timestamp timer. The period is always 1 larger than the value configured in the // register. let period: u64 = batch_duration_ticks * j - 1u64; - assert!(period < u32::MAX as u64); + assert!(period <= u32::MAX as u64); period as u32 } diff --git a/src/main.rs b/src/main.rs index 5979d90..a0430d5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -677,10 +677,11 @@ const APP: () = { ); // Ensure that we have enough time for an IO-update every sample. - let sample_frequency = (design_parameters::TIMER_FREQUENCY.0 - as f32 - * 1_000_000.0) - / ADC_SAMPLE_TICKS as f32; + let sample_frequency = { + let timer_frequency: hal::time::Hertz = + design_parameters::TIMER_FREQUENCY.into(); + timer_frequency.0 as f32 / ADC_SAMPLE_TICKS as f32 + }; let sample_period = 1.0 / sample_frequency; assert!( diff --git a/src/timers.rs b/src/timers.rs index 5ffbeaf..8d7d010 100644 --- a/src/timers.rs +++ b/src/timers.rs @@ -152,19 +152,22 @@ macro_rules! timer_channels { // Only atomic operations on completed on the timer registers. let regs = unsafe { &*<$TY>::ptr() }; let sr = regs.sr.read(); - let ccx = regs.[< ccr $index >].read(); let result = if sr.[< cc $index if >]().bit_is_set() { - regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit()); + // Read the capture value. Reading the captured value clears the flag in the + // status register automatically. + let ccx = regs.[< ccr $index >].read(); Some(ccx.ccr().bits()) } else { None }; - // If there is an overcapture, return an error. - if sr.[< cc $index of >]().bit_is_clear() { + // Read SR again to check for a potential over-capture. If there is an + // overcapture, return an error. + if regs.sr.read().[< cc $index of >]().bit_is_clear() { Ok(result) } else { + regs.sr.modify(|_, w| w.[< cc $index of >]().clear_bit()); Err(()) } } From 96485c4229009908f3540b6584b4c441c734c1e6 Mon Sep 17 00:00:00 2001 From: Ryan Summers Date: Wed, 6 Jan 2021 13:36:13 +0100 Subject: [PATCH 44/44] Reverting unintended diff --- openocd.gdb | 3 --- 1 file changed, 3 deletions(-) diff --git a/openocd.gdb b/openocd.gdb index a96f8d4..e903a33 100644 --- a/openocd.gdb +++ b/openocd.gdb @@ -18,9 +18,6 @@ load # tbreak cortex_m_rt::reset_handler monitor reset halt -source ../../PyCortexMDebug/cmdebug/svd_gdb.py -svd_load ~/Downloads/STM32H743x.svd - # cycle counter delta tool, place two bkpts around the section set var $cc=0xe0001004 define qq