From 3a59f3e989c125d886f4a7d9b29ed1eb60aa71c8 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Wed, 11 Nov 2020 18:28:48 +0100
Subject: [PATCH 01/44] Adding WIP updates for digital input stamping

---
 Cargo.lock                   |   1 -
 Cargo.toml                   |   5 +-
 src/adc.rs                   |  22 +++++--
 src/digital_input_stamper.rs |  84 ++++++++++++++++++++++++++
 src/hrtimer.rs               |   4 +-
 src/main.rs                  |  77 ++++++++++++++++--------
 src/pounder/mod.rs           |   6 +-
 src/sampling_timer.rs        | 112 +++++++++++++++++++++++++++++++++++
 8 files changed, 273 insertions(+), 38 deletions(-)
 create mode 100644 src/digital_input_stamper.rs
 create mode 100644 src/sampling_timer.rs

diff --git a/Cargo.lock b/Cargo.lock
index f247a6c..c6c3f58 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -501,7 +501,6 @@ dependencies = [
 [[package]]
 name = "stm32h7xx-hal"
 version = "0.8.0"
-source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/dma-rtic-example#d8cb6fa5099282665f5e5068a9dcdc9ebaa63240"
 dependencies = [
  "bare-metal 1.0.0",
  "cast",
diff --git a/Cargo.toml b/Cargo.toml
index 049e61c..5b41667 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,8 +54,9 @@ path = "ad9959"
 
 [dependencies.stm32h7xx-hal]
 features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"]
-git = "https://github.com/quartiq/stm32h7xx-hal"
-branch = "feature/dma-rtic-example"
+# git = "https://github.com/quartiq/stm32h7xx-hal"
+# branch = "feature/dma-rtic-example"
+path = "../stm32h7xx-hal"
 
 [features]
 semihosting = ["panic-semihosting", "cortex-m-log/semihosting"]
diff --git a/src/adc.rs b/src/adc.rs
index e3310f4..9971fbb 100644
--- a/src/adc.rs
+++ b/src/adc.rs
@@ -14,8 +14,8 @@
 ///! both transfers are completed before reading the data. This is usually not significant for
 ///! busy-waiting because the transfers should complete at approximately the same time.
 use super::{
-    hal, DMAReq, DmaConfig, MemoryToPeripheral, PeripheralToMemory, Priority,
-    TargetAddress, Transfer,
+    hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral,
+    PeripheralToMemory, Priority, TargetAddress, Transfer,
 };
 
 // The desired ADC input buffer size. This is use configurable.
@@ -142,11 +142,18 @@ impl Adc0Input {
     /// * `trigger_stream` - The DMA stream used to trigger each ADC transfer by writing a word into
     ///   the SPI TX FIFO.
     /// * `data_stream` - The DMA stream used to read samples received over SPI into a data buffer.
+    /// * `_trigger_channel` - The ADC sampling timer output compare channel for read triggers.
     pub fn new(
         spi: hal::spi::Spi<hal::stm32::SPI2, hal::spi::Enabled, u16>,
         trigger_stream: hal::dma::dma::Stream0<hal::stm32::DMA1>,
         data_stream: hal::dma::dma::Stream1<hal::stm32::DMA1>,
+        trigger_channel: sampling_timer::Timer2Channel1,
     ) -> Self {
+        // Generate DMA events when an output compare of the timer hitting zero (timer roll over)
+        // occurs.
+        trigger_channel.listen_dma();
+        trigger_channel.to_output_compare(0);
+
         // The trigger stream constantly writes to the TX FIFO using a static word (dont-care
         // contents). Thus, neither the memory or peripheral address ever change. This is run in
         // circular mode to be completed at every DMA request.
@@ -224,7 +231,7 @@ impl Adc0Input {
 
         // Start the next transfer.
         self.transfer.clear_interrupts();
-        let (prev_buffer, _) =
+        let (prev_buffer, _, _) =
             self.transfer.next_transfer(next_buffer).unwrap();
 
         self.next_buffer.replace(prev_buffer);
@@ -256,11 +263,18 @@ impl Adc1Input {
     /// * `spi` - The SPI interface connected to ADC1.
     /// * `trigger_stream` - The DMA stream used to trigger ADC conversions on the SPI interface.
     /// * `data_stream` - The DMA stream used to read ADC samples from the SPI RX FIFO.
+    /// * `trigger_channel` - The ADC sampling timer output compare channel for read triggers.
     pub fn new(
         spi: hal::spi::Spi<hal::stm32::SPI3, hal::spi::Enabled, u16>,
         trigger_stream: hal::dma::dma::Stream2<hal::stm32::DMA1>,
         data_stream: hal::dma::dma::Stream3<hal::stm32::DMA1>,
+        trigger_channel: sampling_timer::Timer2Channel2,
     ) -> Self {
+        // Generate DMA events when an output compare of the timer hitting zero (timer roll over)
+        // occurs.
+        trigger_channel.listen_dma();
+        trigger_channel.to_output_compare(0);
+
         // The trigger stream constantly writes to the TX FIFO using a static word (dont-care
         // contents). Thus, neither the memory or peripheral address ever change. This is run in
         // circular mode to be completed at every DMA request.
@@ -339,7 +353,7 @@ impl Adc1Input {
 
         // Start the next transfer.
         self.transfer.clear_interrupts();
-        let (prev_buffer, _) =
+        let (prev_buffer, _, _) =
             self.transfer.next_transfer(next_buffer).unwrap();
 
         self.next_buffer.replace(prev_buffer);
diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
new file mode 100644
index 0000000..88b1c58
--- /dev/null
+++ b/src/digital_input_stamper.rs
@@ -0,0 +1,84 @@
+use super::{hal, sampling_timer, DmaConfig, PeripheralToMemory, Transfer};
+
+const INPUT_BUFFER_SIZE: usize = 1;
+
+#[link_section = ".axisram.buffers"]
+static mut BUF0: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE];
+
+#[link_section = ".axisram.buffers"]
+static mut BUF1: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE];
+
+pub struct InputStamper {
+    _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF1>>,
+    timestamp_buffer: heapless::Vec<u16, heapless::consts::U128>,
+    next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>,
+    transfer: Transfer<
+        hal::dma::dma::Stream4<hal::stm32::DMA1>,
+        sampling_timer::Timer2Channel4,
+        PeripheralToMemory,
+        &'static mut [u16; INPUT_BUFFER_SIZE],
+    >,
+}
+
+impl InputStamper {
+    pub fn new(
+        trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF1>>,
+        stream: hal::dma::dma::Stream4<hal::stm32::DMA1>,
+        timer_channel: sampling_timer::Timer2Channel4,
+    ) -> Self {
+        // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
+        // capture source.
+        timer_channel.listen_dma();
+        timer_channel.to_input_capture(sampling_timer::CC4S_A::TI4);
+
+        // Set up the DMA transfer.
+        let dma_config = DmaConfig::default()
+            .memory_increment(true)
+            .peripheral_increment(false);
+
+        let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
+            Transfer::init(
+                stream,
+                timer_channel,
+                unsafe { &mut BUF0 },
+                None,
+                dma_config,
+            );
+
+        timestamp_transfer.start(|_| {});
+
+        Self {
+            timestamp_buffer: heapless::Vec::new(),
+            next_buffer: unsafe { Some(&mut BUF1) },
+            transfer: timestamp_transfer,
+            _di0_trigger: trigger,
+        }
+    }
+
+    pub fn transfer_complete_handler(&mut self) {
+        let next_buffer = self.next_buffer.take().unwrap();
+        self.transfer.clear_interrupts();
+        let (prev_buffer, _, remaining_transfers) =
+            self.transfer.next_transfer(next_buffer).unwrap();
+
+        let valid_count = prev_buffer.len() - remaining_transfers;
+        self.timestamp_buffer
+            .extend_from_slice(&prev_buffer[..valid_count])
+            .unwrap();
+
+        self.next_buffer.replace(prev_buffer);
+    }
+
+    pub fn with_timestamps<F>(&mut self, f: F)
+    where
+        F: FnOnce(&[u16]),
+    {
+        // First, run the transfer complete handler to retrieve any timestamps that are pending in
+        // the DMA transfer.
+        self.transfer_complete_handler();
+
+        f(self.timestamp_buffer.as_ref());
+
+        self.timestamp_buffer.clear();
+    }
+}
diff --git a/src/hrtimer.rs b/src/hrtimer.rs
index d344396..47ea5c2 100644
--- a/src/hrtimer.rs
+++ b/src/hrtimer.rs
@@ -47,7 +47,8 @@ impl HighResTimerE {
         let minimum_duration = set_duration + set_offset;
 
         let source_frequency: u32 = self.clocks.timy_ker_ck().0;
-        let source_cycles = (minimum_duration * source_frequency as f32) as u32 + 1;
+        let source_cycles =
+            (minimum_duration * source_frequency as f32) as u32 + 1;
 
         // Determine the clock divider, which may be 1, 2, or 4. We will choose a clock divider that
         // allows us the highest resolution per tick, so lower dividers are favored.
@@ -92,7 +93,6 @@ impl HighResTimerE {
             }
         }
 
-
         // Enable the timer now that it is configured.
         self.master.mcr.modify(|_, w| w.tecen().set_bit());
     }
diff --git a/src/main.rs b/src/main.rs
index f1d6541..d8a5d7e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -59,10 +59,12 @@ static mut DES_RING: ethernet::DesRing = ethernet::DesRing::new();
 mod adc;
 mod afe;
 mod dac;
+mod digital_input_stamper;
 mod eeprom;
 mod hrtimer;
 mod iir;
 mod pounder;
+mod sampling_timer;
 mod server;
 
 use adc::{Adc0Input, Adc1Input, AdcInputs};
@@ -185,11 +187,10 @@ const APP: () = {
 
         adcs: AdcInputs,
         dacs: DacOutputs,
+        input_stamper: digital_input_stamper::InputStamper,
 
         eeprom_i2c: hal::i2c::I2c<hal::stm32::I2C2>,
 
-        timer: hal::timer::Timer<hal::stm32::TIM2>,
-
         profiles: heapless::spsc::Queue<[u32; 4], heapless::consts::U32>,
 
         // Note: It appears that rustfmt generates a format that GDB cannot recognize, which
@@ -267,6 +268,16 @@ const APP: () = {
         let dma_streams =
             hal::dma::dma::StreamsTuple::new(dp.DMA1, ccdr.peripheral.DMA1);
 
+        // Configure timer 2 to trigger conversions for the ADC
+        let timer2 = dp.TIM2.timer(
+            SAMPLE_FREQUENCY_KHZ.khz(),
+            ccdr.peripheral.TIM2,
+            &ccdr.clocks,
+        );
+
+        let mut sampling_timer = sampling_timer::SamplingTimer::new(timer2);
+        let sampling_timer_channels = sampling_timer.channels();
+
         // Configure the SPI interfaces to the ADCs and DACs.
         let adcs = {
             let adc0 = {
@@ -299,7 +310,12 @@ const APP: () = {
                     &ccdr.clocks,
                 );
 
-                Adc0Input::new(spi, dma_streams.0, dma_streams.1)
+                Adc0Input::new(
+                    spi,
+                    dma_streams.0,
+                    dma_streams.1,
+                    sampling_timer_channels.ch1,
+                )
             };
 
             let adc1 = {
@@ -332,7 +348,12 @@ const APP: () = {
                     &ccdr.clocks,
                 );
 
-                Adc1Input::new(spi, dma_streams.2, dma_streams.3)
+                Adc1Input::new(
+                    spi,
+                    dma_streams.2,
+                    dma_streams.3,
+                    sampling_timer_channels.ch2,
+                )
             };
 
             AdcInputs::new(adc0, adc1)
@@ -478,9 +499,7 @@ const APP: () = {
                 };
 
                 let mut reset_pin = gpioa.pa0.into_push_pull_output();
-                let mut io_update = gpiog
-                    .pg7
-                    .into_push_pull_output();
+                let mut io_update = gpiog.pg7.into_push_pull_output();
 
                 let ad9959 = ad9959::Ad9959::new(
                     qspi_interface,
@@ -736,22 +755,17 @@ const APP: () = {
         // Utilize the cycle counter for RTIC scheduling.
         cp.DWT.enable_cycle_counter();
 
-        // Configure timer 2 to trigger conversions for the ADC
-        let timer2 = dp.TIM2.timer(
-            SAMPLE_FREQUENCY_KHZ.khz(),
-            ccdr.peripheral.TIM2,
-            &ccdr.clocks,
-        );
-        {
-            // Listen to the CH1 and CH2 comparison events. These channels should have a value of
-            // zero loaded into them, so the event should occur whenever the timer overflows. Note
-            // that we use channels instead of timer updates because each SPI DMA transfer needs a
-            // unique request line.
-            let t2_regs = unsafe { &*hal::stm32::TIM2::ptr() };
-            t2_regs
-                .dier
-                .modify(|_, w| w.cc1de().set_bit().cc2de().set_bit());
-        }
+        let input_stamper = {
+            let trigger = gpioa.pa3.into_alternate_af1();
+            digital_input_stamper::InputStamper::new(
+                trigger,
+                dma_streams.4,
+                sampling_timer_channels.ch4,
+            )
+        };
+
+        // Start sampling ADCs.
+        sampling_timer.start();
 
         init::LateResources {
             afe0: afe0,
@@ -760,7 +774,8 @@ const APP: () = {
             adcs,
             dacs,
 
-            timer: timer2,
+            input_stamper,
+
             pounder: pounder_devices,
 
             eeprom_i2c,
@@ -772,6 +787,11 @@ const APP: () = {
         }
     }
 
+    #[task(binds=DMA1_STR4, resources=[input_stamper], priority = 2)]
+    fn digital_stamper(c: digital_stamper::Context) {
+        let _timestamps = c.resources.input_stamper.transfer_complete_handler();
+    }
+
     #[task(binds = TIM3, resources=[dacs, profiles, pounder], priority = 3)]
     fn dac_update(c: dac_update::Context) {
         c.resources.dacs.update();
@@ -812,10 +832,15 @@ const APP: () = {
             c.resources.pounder.lock(|pounder| {
                 if let Some(pounder) = pounder {
                     profiles.lock(|profiles| {
-                        let profile = pounder.ad9959.serialize_profile(pounder::Channel::Out0.into(),
+                        let profile = pounder
+                            .ad9959
+                            .serialize_profile(
+                                pounder::Channel::Out0.into(),
                                 100_000_000_f32,
                                 0.0_f32,
-                                *adc0 as f32 / 0xFFFF as f32).unwrap();
+                                *adc0 as f32 / 0xFFFF as f32,
+                            )
+                            .unwrap();
 
                         profiles.enqueue(profile).unwrap();
                     });
diff --git a/src/pounder/mod.rs b/src/pounder/mod.rs
index 0e32a22..c19096d 100644
--- a/src/pounder/mod.rs
+++ b/src/pounder/mod.rs
@@ -124,9 +124,9 @@ impl QspiInterface {
 
         unsafe {
             qspi_regs.dlr.write(|w| w.dl().bits(0xFFFF_FFFF));
-            qspi_regs
-                .ccr
-                .modify(|_, w| w.imode().bits(0).fmode().bits(0).admode().bits(0));
+            qspi_regs.ccr.modify(|_, w| {
+                w.imode().bits(0).fmode().bits(0).admode().bits(0)
+            });
         }
 
         self.streaming = true;
diff --git a/src/sampling_timer.rs b/src/sampling_timer.rs
new file mode 100644
index 0000000..f299ad6
--- /dev/null
+++ b/src/sampling_timer.rs
@@ -0,0 +1,112 @@
+use super::hal;
+
+use hal::dma::{dma::DMAReq, traits::TargetAddress, PeripheralToMemory};
+pub use hal::stm32::tim2::ccmr2_input::CC4S_A;
+
+pub struct SamplingTimer {
+    timer: hal::timer::Timer<hal::stm32::TIM2>,
+    channels: Option<TimerChannels>,
+}
+
+impl SamplingTimer {
+    pub fn new(mut timer: hal::timer::Timer<hal::stm32::TIM2>) -> Self {
+        timer.pause();
+
+        Self {
+            timer,
+            channels: Some(TimerChannels::new()),
+        }
+    }
+
+    pub fn channels(&mut self) -> TimerChannels {
+        self.channels.take().unwrap()
+    }
+
+    pub fn start(&mut self) {
+        self.timer.reset_counter();
+        self.timer.resume();
+    }
+}
+
+pub struct TimerChannels {
+    pub ch1: Timer2Channel1,
+    pub ch2: Timer2Channel2,
+    pub ch3: Timer2Channel3,
+    pub ch4: Timer2Channel4,
+}
+
+impl TimerChannels {
+    fn new() -> Self {
+        Self {
+            ch1: Timer2Channel1 {},
+            ch2: Timer2Channel2 {},
+            ch3: Timer2Channel3 {},
+            ch4: Timer2Channel4 {},
+        }
+    }
+}
+
+pub struct Timer2Channel1 {}
+
+impl Timer2Channel1 {
+    pub fn listen_dma(&self) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        regs.dier.modify(|_, w| w.cc1de().set_bit());
+    }
+
+    pub fn to_output_compare(&self, value: u32) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        assert!(value <= regs.arr.read().bits());
+        regs.ccr1.write(|w| w.ccr().bits(value));
+        regs.ccmr1_output()
+            .modify(|_, w| unsafe { w.cc1s().bits(0) });
+    }
+}
+
+pub struct Timer2Channel2 {}
+
+impl Timer2Channel2 {
+    pub fn listen_dma(&self) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        regs.dier.modify(|_, w| w.cc2de().set_bit());
+    }
+
+    pub fn to_output_compare(&self, value: u32) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        assert!(value <= regs.arr.read().bits());
+        regs.ccr2.write(|w| w.ccr().bits(value));
+        regs.ccmr1_output()
+            .modify(|_, w| unsafe { w.cc2s().bits(0) });
+    }
+}
+
+pub struct Timer2Channel3 {}
+
+pub struct Timer2Channel4 {}
+
+unsafe impl TargetAddress<PeripheralToMemory> for Timer2Channel4 {
+    type MemSize = u16;
+
+    const REQUEST_LINE: Option<u8> = Some(DMAReq::TIM2_CH4 as u8);
+
+    fn address(&self) -> u32 {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        &regs.dmar as *const _ as u32
+    }
+}
+
+impl Timer2Channel4 {
+    pub fn listen_dma(&self) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        regs.dier.modify(|_, w| w.cc4de().set_bit());
+    }
+
+    pub fn to_input_capture(&self, trig: CC4S_A) {
+        let regs = unsafe { &*hal::stm32::TIM2::ptr() };
+        regs.ccmr2_input().modify(|_, w| w.cc4s().variant(trig));
+
+        // Update the DMA control burst regs to point to CCR4.
+        regs.dcr
+            .modify(|_, w| unsafe { w.dbl().bits(1).dba().bits(16) });
+    }
+}

From fc81c8b5d8b0d9ee63844cd2b1f8b58c3f6b6a72 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 7 Dec 2020 17:29:36 +0100
Subject: [PATCH 02/44] Updating API

---
 Cargo.lock                   |   2 +-
 Cargo.toml                   |   4 +-
 src/adc.rs                   |   2 +-
 src/dac.rs                   |   2 +-
 src/digital_input_stamper.rs |  12 ++--
 src/main.rs                  |  93 ++----------------------
 src/sampling_timer.rs        | 134 ++++++++++++++++++++---------------
 7 files changed, 91 insertions(+), 158 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9cf3a58..edc2864 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -517,7 +517,7 @@ dependencies = [
 [[package]]
 name = "stm32h7xx-hal"
 version = "0.8.0"
-source = "git+https://github.com/stm32-rs/stm32h7xx-hal?branch=dma#0bfeeca4ce120c1b7c6d140a7da73a4372b874d8"
+source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/number-of-transfers#e70a78788e74be5281321213b53e8cd1d213550e"
 dependencies = [
  "bare-metal 1.0.0",
  "cast",
diff --git a/Cargo.toml b/Cargo.toml
index 7217589..f1acbe0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,8 +53,8 @@ default-features = false
 
 [dependencies.stm32h7xx-hal]
 features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"]
-git = "https://github.com/stm32-rs/stm32h7xx-hal"
-branch = "dma"
+git = "https://github.com/quartiq/stm32h7xx-hal"
+branch = "feature/number-of-transfers"
 
 [features]
 semihosting = ["panic-semihosting", "cortex-m-log/semihosting"]
diff --git a/src/adc.rs b/src/adc.rs
index e9120aa..4017f7f 100644
--- a/src/adc.rs
+++ b/src/adc.rs
@@ -195,7 +195,7 @@ macro_rules! adc_input {
 
                 // Start the next transfer.
                 self.transfer.clear_interrupts();
-                let (prev_buffer, _) =
+                let (prev_buffer, _, _) =
                     self.transfer.next_transfer(next_buffer).unwrap();
 
                 self.next_buffer.replace(prev_buffer); // .unwrap_none() https://github.com/rust-lang/rust/issues/62633
diff --git a/src/dac.rs b/src/dac.rs
index d96109c..00c24d4 100644
--- a/src/dac.rs
+++ b/src/dac.rs
@@ -143,7 +143,7 @@ macro_rules! dac_output {
 
                 // Start the next transfer.
                 self.transfer.clear_interrupts();
-                let (prev_buffer, _) =
+                let (prev_buffer, _, _) =
                     self.transfer.next_transfer(next_buffer).unwrap();
 
                 // .unwrap_none() https://github.com/rust-lang/rust/issues/62633
diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 88b1c58..71cd5b1 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -13,8 +13,8 @@ pub struct InputStamper {
     timestamp_buffer: heapless::Vec<u16, heapless::consts::U128>,
     next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>,
     transfer: Transfer<
-        hal::dma::dma::Stream4<hal::stm32::DMA1>,
-        sampling_timer::Timer2Channel4,
+        hal::dma::dma::Stream6<hal::stm32::DMA1>,
+        sampling_timer::tim2::Channel4InputCapture,
         PeripheralToMemory,
         &'static mut [u16; INPUT_BUFFER_SIZE],
     >,
@@ -23,13 +23,13 @@ pub struct InputStamper {
 impl InputStamper {
     pub fn new(
         trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF1>>,
-        stream: hal::dma::dma::Stream4<hal::stm32::DMA1>,
-        timer_channel: sampling_timer::Timer2Channel4,
+        stream: hal::dma::dma::Stream6<hal::stm32::DMA1>,
+        timer_channel: sampling_timer::tim2::Channel4,
     ) -> Self {
         // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
         // capture source.
         timer_channel.listen_dma();
-        timer_channel.to_input_capture(sampling_timer::CC4S_A::TI4);
+        let input_capture = timer_channel.to_input_capture(sampling_timer::tim2::CC4S_A::TI4);
 
         // Set up the DMA transfer.
         let dma_config = DmaConfig::default()
@@ -39,7 +39,7 @@ impl InputStamper {
         let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
             Transfer::init(
                 stream,
-                timer_channel,
+                input_capture,
                 unsafe { &mut BUF0 },
                 None,
                 dma_config,
diff --git a/src/main.rs b/src/main.rs
index 453e8ab..f168c95 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -70,9 +70,7 @@ mod adc;
 mod afe;
 mod dac;
 mod digital_input_stamper;
-mod eeprom;
 mod hrtimer;
-mod iir;
 mod design_parameters;
 mod eeprom;
 mod pounder;
@@ -775,7 +773,7 @@ const APP: () = {
             let trigger = gpioa.pa3.into_alternate_af1();
             digital_input_stamper::InputStamper::new(
                 trigger,
-                dma_streams.4,
+                dma_streams.6,
                 sampling_timer_channels.ch4,
             )
         };
@@ -796,14 +794,12 @@ const APP: () = {
             net_interface: network_interface,
             eth_mac,
             mac_addr,
-
-            profiles: heapless::spsc::Queue::new(),
         }
     }
 
-    #[task(binds=DMA1_STR4, resources=[input_stamper], priority = 2)]
+    #[task(binds=DMA1_STR6, resources=[input_stamper], priority = 2)]
     fn digital_stamper(c: digital_stamper::Context) {
-        let _timestamps = c.resources.input_stamper.transfer_complete_handler();
+        panic!("Timestamp overflow")
     }
 
     #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)]
@@ -911,44 +907,7 @@ const APP: () = {
                                     Ok::<server::Status, ()>(state)
                                 }),
                                 "stabilizer/afe0/gain": (|| c.resources.afes.0.get_gain()),
-                                "stabilizer/afe1/gain": (|| c.resources.afes.1.get_gain()),
-                                "pounder/in0": (|| {
-                                    match c.resources.pounder {
-                                        Some(pounder) =>
-                                            pounder.get_input_channel_state(pounder::Channel::In0),
-                                        _ => Err(pounder::Error::Access),
-                                    }
-                                }),
-                                "pounder/in1": (|| {
-                                    match c.resources.pounder {
-                                        Some(pounder) =>
-                                            pounder.get_input_channel_state(pounder::Channel::In1),
-                                        _ => Err(pounder::Error::Access),
-                                    }
-                                }),
-                                "pounder/out0": (|| {
-                                    match c.resources.pounder {
-                                        Some(pounder) =>
-                                            pounder.get_output_channel_state(pounder::Channel::Out0),
-                                        _ => Err(pounder::Error::Access),
-                                    }
-                                }),
-                                "pounder/out1": (|| {
-                                    match c.resources.pounder {
-                                        Some(pounder) =>
-                                            pounder.get_output_channel_state(pounder::Channel::Out1),
-                                        _ => Err(pounder::Error::Access),
-                                    }
-                                }),
->>>>>>> master
-                                "pounder/dds/clock": (|| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) => pounder.get_dds_clock_config(),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                    })
-                                })
+                                "stabilizer/afe1/gain": (|| c.resources.afes.1.get_gain())
                             ],
 
                             modifiable_attributes: [
@@ -996,50 +955,6 @@ const APP: () = {
                                         Ok::<server::IirRequest, ()>(req)
                                     })
                                 }),
-                                "pounder/in0": pounder::ChannelState, (|state| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) =>
-                                                pounder.set_channel_state(pounder::Channel::In0, state),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                    })
-                                }),
-                                "pounder/in1": pounder::ChannelState, (|state| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) =>
-                                                pounder.set_channel_state(pounder::Channel::In1, state),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                    })
-                                }),
-                                "pounder/out0": pounder::ChannelState, (|state| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) =>
-                                                pounder.set_channel_state(pounder::Channel::Out0, state),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                    })
-                                }),
-                                "pounder/out1": pounder::ChannelState, (|state| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) =>
-                                                pounder.set_channel_state(pounder::Channel::Out1, state),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                    })
-                                }),
-                                "pounder/dds/clock": pounder::DdsClockConfig, (|config| {
-                                    c.resources.pounder.lock(|pounder| {
-                                        match pounder {
-                                            Some(pounder) => pounder.configure_dds_clock(config),
-                                            _ => Err(pounder::Error::Access),
-                                        }
-                                     })
-                                }),
                                 "stabilizer/afe0/gain": afe::Gain, (|gain| {
                                     c.resources.afes.0.set_gain(gain);
                                     Ok::<(), ()>(())
diff --git a/src/sampling_timer.rs b/src/sampling_timer.rs
index b7f9480..3218996 100644
--- a/src/sampling_timer.rs
+++ b/src/sampling_timer.rs
@@ -1,6 +1,5 @@
 ///! The sampling timer is used for managing ADC sampling and external reference timestamping.
 use super::hal;
-pub use hal::stm32::tim2::ccmr2_input::CC4S_A;
 
 /// The timer used for managing ADC sampling.
 pub struct SamplingTimer {
@@ -36,12 +35,54 @@ impl SamplingTimer {
     }
 }
 
-macro_rules! timer_channel {
-    ($name:ident, $TY:ty, ($ccxde:expr, $ccrx:expr, $ccmrx_output:expr, $ccxs:expr)) => {
-        pub struct $name {}
-
+macro_rules! timer_channels {
+    ($TY:ty) => {
         paste::paste! {
-            impl $name {
+            pub mod [< $TY:lower >] {
+                pub use hal::stm32::[< $TY:lower >]::ccmr1_input::{CC1S_A, CC2S_A};
+                pub use hal::stm32::[< $TY:lower >]::ccmr2_input::{CC3S_A, CC4S_A};
+
+                use stm32h7xx_hal as hal;
+                use hal::dma::{traits::TargetAddress, PeripheralToMemory, dma::DMAReq};
+                use hal::stm32::TIM2;
+
+                /// The channels representing the timer.
+                pub struct Channels {
+                    pub ch1: Channel1,
+                    pub ch2: Channel2,
+                    pub ch3: Channel3,
+                    pub ch4: Channel4,
+                }
+
+                impl Channels {
+                    /// Construct a new set of channels.
+                    ///
+                    /// Note(unsafe): This is only safe to call once.
+                    pub unsafe fn new() -> Self {
+                        Self {
+                            ch1: Channel1::new(),
+                            ch2: Channel2::new(),
+                            ch3: Channel3::new(),
+                            ch4: Channel4::new(),
+                        }
+                    }
+                }
+
+                timer_channels!(1, $TY, ccmr1);
+                timer_channels!(2, $TY, ccmr1);
+                timer_channels!(3, $TY, ccmr2);
+                timer_channels!(4, $TY, ccmr2);
+            }
+        }
+    };
+
+    ($index:expr, $TY:ty, $ccmrx:expr) => {
+        paste::paste! {
+            pub struct [< Channel $index >] {}
+
+            pub struct [< Channel $index InputCapture>] {}
+
+            impl [< Channel $index >] {
                 /// Construct a new timer channel.
                 ///
                 /// Note(unsafe): This function must only be called once. Once constructed, the
@@ -50,71 +91,48 @@ macro_rules! timer_channel {
                     Self {}
                 }
 
-                /// Allow CH4 to generate DMA requests.
+                /// Allow the channel to generate DMA requests.
                 pub fn listen_dma(&self) {
                     let regs = unsafe { &*<$TY>::ptr() };
-                    regs.dier.modify(|_, w| w.[< $ccxde >]().set_bit());
+                    regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit());
                 }
 
-                /// Operate CH2 as an output-compare.
+                /// Operate the channel as an output-compare.
                 ///
                 /// # Args
                 /// * `value` - The value to compare the sampling timer's counter against.
                 pub fn to_output_compare(&self, value: u32) {
                     let regs = unsafe { &*<$TY>::ptr() };
                     assert!(value <= regs.arr.read().bits());
-                    regs.[< $ccrx >].write(|w| w.ccr().bits(value));
-                    regs.[< $ccmrx_output >]()
-                        .modify(|_, w| unsafe { w.[< $ccxs >]().bits(0) });
+                    regs.[< ccr $index >].write(|w| w.ccr().bits(value));
+                    regs.[< $ccmrx _output >]()
+                        .modify(|_, w| unsafe { w.[< cc $index s >]().bits(0) });
+                }
+
+                /// Operate the channel in input-capture mode.
+                ///
+                /// # Args
+                /// * `input` - The input source for the input capture event.
+                pub fn to_input_capture(self, input: hal::stm32::[<$TY:lower>]::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input));
+
+                    [< Channel $index InputCapture >] {}
+                }
+            }
+
+            unsafe impl TargetAddress<PeripheralToMemory> for [< Channel $index InputCapture >] {
+                type MemSize = u16;
+
+                const REQUEST_LINE: Option<u8> = Some(DMAReq::[< $TY _CH $index >]as u8);
+
+                fn address(&self) -> u32 {
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    &regs.[<ccr $index >] as *const _ as u32
                 }
             }
         }
     };
 }
 
-pub mod tim2 {
-    use stm32h7xx_hal as hal;
-
-    /// The channels representing the timer.
-    pub struct Channels {
-        pub ch1: Channel1,
-        pub ch2: Channel2,
-        pub ch3: Channel3,
-        pub ch4: Channel4,
-    }
-
-    impl Channels {
-        /// Construct a new set of channels.
-        ///
-        /// Note(unsafe): This is only safe to call once.
-        pub unsafe fn new() -> Self {
-            Self {
-                ch1: Channel1::new(),
-                ch2: Channel2::new(),
-                ch3: Channel3::new(),
-                ch4: Channel4::new(),
-            }
-        }
-    }
-
-    timer_channel!(
-        Channel1,
-        hal::stm32::TIM2,
-        (cc1de, ccr1, ccmr1_output, cc1s)
-    );
-    timer_channel!(
-        Channel2,
-        hal::stm32::TIM2,
-        (cc2de, ccr2, ccmr1_output, cc1s)
-    );
-    timer_channel!(
-        Channel3,
-        hal::stm32::TIM2,
-        (cc3de, ccr3, ccmr2_output, cc3s)
-    );
-    timer_channel!(
-        Channel4,
-        hal::stm32::TIM2,
-        (cc4de, ccr4, ccmr2_output, cc4s)
-    );
-}
+timer_channels!(TIM2);

From ec046bc42d3490f7e788d92d76107cb2e3d58058 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 7 Dec 2020 17:58:36 +0100
Subject: [PATCH 03/44] Refactoring timer timestamping

---
 src/adc.rs                           | 12 ++--
 src/dac.rs                           |  8 +--
 src/digital_input_stamper.rs         | 25 ++++----
 src/main.rs                          | 36 ++++++++----
 src/{sampling_timer.rs => timers.rs} | 85 +++++++++++++++-------------
 5 files changed, 91 insertions(+), 75 deletions(-)
 rename src/{sampling_timer.rs => timers.rs} (64%)

diff --git a/src/adc.rs b/src/adc.rs
index 4017f7f..8d2b61a 100644
--- a/src/adc.rs
+++ b/src/adc.rs
@@ -14,8 +14,8 @@
 ///! both transfers are completed before reading the data. This is usually not significant for
 ///! busy-waiting because the transfers should complete at approximately the same time.
 use super::{
-    hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral,
-    PeripheralToMemory, Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE,
+    hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, PeripheralToMemory,
+    Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE,
 };
 
 // The following data is written by the timer ADC sample trigger into each of the SPI TXFIFOs. Note
@@ -38,12 +38,10 @@ macro_rules! adc_input {
         /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO
         /// whenever the tim2 update dma request occurs.
         struct $spi {
-            _channel: sampling_timer::tim2::$trigger_channel,
+            _channel: timers::tim2::$trigger_channel,
         }
         impl $spi {
-            pub fn new(
-                _channel: sampling_timer::tim2::$trigger_channel,
-            ) -> Self {
+            pub fn new(_channel: timers::tim2::$trigger_channel) -> Self {
                 Self { _channel }
             }
         }
@@ -100,7 +98,7 @@ macro_rules! adc_input {
                     hal::stm32::DMA1,
                 >,
                 data_stream: hal::dma::dma::$data_stream<hal::stm32::DMA1>,
-                trigger_channel: sampling_timer::tim2::$trigger_channel,
+                trigger_channel: timers::tim2::$trigger_channel,
             ) -> Self {
                 // Generate DMA events when an output compare of the timer hitting zero (timer roll over)
                 // occurs.
diff --git a/src/dac.rs b/src/dac.rs
index 00c24d4..06a6362 100644
--- a/src/dac.rs
+++ b/src/dac.rs
@@ -4,7 +4,7 @@
 ///! configured to generate a DMA write into the SPI TXFIFO, which initiates a SPI transfer and
 ///! results in DAC update for both channels.
 use super::{
-    hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress,
+    hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress,
     Transfer, SAMPLE_BUFFER_SIZE,
 };
 
@@ -22,12 +22,12 @@ macro_rules! dac_output {
         /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO
         struct $spi {
             spi: hal::spi::Spi<hal::stm32::$spi, hal::spi::Disabled, u16>,
-            _channel: sampling_timer::tim2::$trigger_channel,
+            _channel: timers::tim2::$trigger_channel,
         }
 
         impl $spi {
             pub fn new(
-                _channel: sampling_timer::tim2::$trigger_channel,
+                _channel: timers::tim2::$trigger_channel,
                 spi: hal::spi::Spi<hal::stm32::$spi, hal::spi::Disabled, u16>,
             ) -> Self {
                 Self { _channel, spi }
@@ -73,7 +73,7 @@ macro_rules! dac_output {
             pub fn new(
                 spi: hal::spi::Spi<hal::stm32::$spi, hal::spi::Enabled, u16>,
                 stream: hal::dma::dma::$data_stream<hal::stm32::DMA1>,
-                trigger_channel: sampling_timer::tim2::$trigger_channel,
+                trigger_channel: timers::tim2::$trigger_channel,
             ) -> Self {
                 // Generate DMA events when an output compare of the timer hitting zero (timer roll over)
                 // occurs.
diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 71cd5b1..8b8f3af 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -1,20 +1,17 @@
-use super::{hal, sampling_timer, DmaConfig, PeripheralToMemory, Transfer};
+use super::{hal, timers, DmaConfig, PeripheralToMemory, Transfer};
 
 const INPUT_BUFFER_SIZE: usize = 1;
 
 #[link_section = ".axisram.buffers"]
-static mut BUF0: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE];
-
-#[link_section = ".axisram.buffers"]
-static mut BUF1: [u16; INPUT_BUFFER_SIZE] = [0; INPUT_BUFFER_SIZE];
+static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2];
 
 pub struct InputStamper {
-    _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF1>>,
+    _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
     timestamp_buffer: heapless::Vec<u16, heapless::consts::U128>,
     next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>,
     transfer: Transfer<
         hal::dma::dma::Stream6<hal::stm32::DMA1>,
-        sampling_timer::tim2::Channel4InputCapture,
+        timers::tim5::Channel4InputCapture,
         PeripheralToMemory,
         &'static mut [u16; INPUT_BUFFER_SIZE],
     >,
@@ -22,17 +19,19 @@ pub struct InputStamper {
 
 impl InputStamper {
     pub fn new(
-        trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF1>>,
+        trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
         stream: hal::dma::dma::Stream6<hal::stm32::DMA1>,
-        timer_channel: sampling_timer::tim2::Channel4,
+        timer_channel: timers::tim5::Channel4,
     ) -> Self {
-        // Utilize the TIM2 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
+        // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
         // capture source.
         timer_channel.listen_dma();
-        let input_capture = timer_channel.to_input_capture(sampling_timer::tim2::CC4S_A::TI4);
+        let input_capture =
+            timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4);
 
         // Set up the DMA transfer.
         let dma_config = DmaConfig::default()
+            .transfer_complete_interrupt(true)
             .memory_increment(true)
             .peripheral_increment(false);
 
@@ -40,7 +39,7 @@ impl InputStamper {
             Transfer::init(
                 stream,
                 input_capture,
-                unsafe { &mut BUF0 },
+                unsafe { &mut BUF[0] },
                 None,
                 dma_config,
             );
@@ -49,7 +48,7 @@ impl InputStamper {
 
         Self {
             timestamp_buffer: heapless::Vec::new(),
-            next_buffer: unsafe { Some(&mut BUF1) },
+            next_buffer: unsafe { Some(&mut BUF[1]) },
             transfer: timestamp_transfer,
             _di0_trigger: trigger,
         }
diff --git a/src/main.rs b/src/main.rs
index f168c95..425b401 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -69,13 +69,13 @@ static mut DES_RING: ethernet::DesRing = ethernet::DesRing::new();
 mod adc;
 mod afe;
 mod dac;
-mod digital_input_stamper;
-mod hrtimer;
 mod design_parameters;
+mod digital_input_stamper;
 mod eeprom;
+mod hrtimer;
 mod pounder;
-mod sampling_timer;
 mod server;
+mod timers;
 
 use adc::{Adc0Input, Adc1Input};
 use dac::{Dac0Output, Dac1Output};
@@ -285,9 +285,22 @@ const APP: () = {
             &ccdr.clocks,
         );
 
-        let mut sampling_timer = sampling_timer::SamplingTimer::new(timer2);
+        let mut sampling_timer = timers::SamplingTimer::new(timer2);
         let sampling_timer_channels = sampling_timer.channels();
 
+        let mut timestamp_timer = {
+            // TODO: This needs to be precisely controlled via the prescaler of the timer.
+            let timer5 = dp.TIM5.timer(
+                (SAMPLE_FREQUENCY_KHZ / SAMPLE_BUFFER_SIZE as u32).khz(),
+                ccdr.peripheral.TIM5,
+                &ccdr.clocks,
+            );
+
+            timers::TimestampTimer::new(timer5)
+        };
+
+        let timestamp_timer_channels = timestamp_timer.channels();
+
         // Configure the SPI interfaces to the ADCs and DACs.
         let adcs = {
             let adc0 = {
@@ -770,16 +783,17 @@ const APP: () = {
         cp.DWT.enable_cycle_counter();
 
         let input_stamper = {
-            let trigger = gpioa.pa3.into_alternate_af1();
+            let trigger = gpioa.pa3.into_alternate_af2();
             digital_input_stamper::InputStamper::new(
                 trigger,
                 dma_streams.6,
-                sampling_timer_channels.ch4,
+                timestamp_timer_channels.ch4,
             )
         };
 
         // Start sampling ADCs.
         sampling_timer.start();
+        timestamp_timer.start();
 
         init::LateResources {
             afes: (afe0, afe1),
@@ -797,11 +811,6 @@ const APP: () = {
         }
     }
 
-    #[task(binds=DMA1_STR6, resources=[input_stamper], priority = 2)]
-    fn digital_stamper(c: digital_stamper::Context) {
-        panic!("Timestamp overflow")
-    }
-
     #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)]
     fn process(c: process::Context) {
         let adc_samples = [
@@ -833,6 +842,11 @@ const APP: () = {
         c.resources.dacs.1.release_buffer(dac1);
     }
 
+    #[task(binds=DMA1_STR6, priority = 2)]
+    fn digital_stamper(_: digital_stamper::Context) {
+        panic!("Timestamp overflow")
+    }
+
     #[idle(resources=[net_interface, pounder, mac_addr, eth_mac, iir_state, iir_ch, afes])]
     fn idle(mut c: idle::Context) -> ! {
         let mut socket_set_entries: [_; 8] = Default::default();
diff --git a/src/sampling_timer.rs b/src/timers.rs
similarity index 64%
rename from src/sampling_timer.rs
rename to src/timers.rs
index 3218996..36f8c01 100644
--- a/src/sampling_timer.rs
+++ b/src/timers.rs
@@ -1,50 +1,51 @@
 ///! The sampling timer is used for managing ADC sampling and external reference timestamping.
 use super::hal;
 
-/// The timer used for managing ADC sampling.
-pub struct SamplingTimer {
-    timer: hal::timer::Timer<hal::stm32::TIM2>,
-    channels: Option<tim2::Channels>,
-}
-
-impl SamplingTimer {
-    /// Construct the sampling timer.
-    pub fn new(mut timer: hal::timer::Timer<hal::stm32::TIM2>) -> Self {
-        timer.pause();
-
-        Self {
-            timer,
-            // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any
-            // of the underlying timer channel registers, as ownership of the channels is now
-            // provided through the associated channel structures. We additionally guarantee this
-            // can only be called once because there is only one Timer2 and this resource takes
-            // ownership of it once instantiated.
-            channels: unsafe { Some(tim2::Channels::new()) },
-        }
-    }
-
-    /// Get the timer capture/compare channels.
-    pub fn channels(&mut self) -> tim2::Channels {
-        self.channels.take().unwrap()
-    }
-
-    /// Start the sampling timer.
-    pub fn start(&mut self) {
-        self.timer.reset_counter();
-        self.timer.resume();
-    }
-}
-
 macro_rules! timer_channels {
-    ($TY:ty) => {
+    ($name:ident, $TY:ident) => {
         paste::paste! {
+
+            /// The timer used for managing ADC sampling.
+            pub struct $name {
+                timer: hal::timer::Timer<hal::stm32::[< $TY >]>,
+                channels: Option<[< $TY:lower >]::Channels>,
+            }
+
+            impl $name {
+                /// Construct the sampling timer.
+                pub fn new(mut timer: hal::timer::Timer<hal::stm32::[< $TY>]>) -> Self {
+                    timer.pause();
+
+                    Self {
+                        timer,
+                        // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any
+                        // of the underlying timer channel registers, as ownership of the channels is now
+                        // provided through the associated channel structures. We additionally guarantee this
+                        // can only be called once because there is only one Timer2 and this resource takes
+                        // ownership of it once instantiated.
+                        channels: unsafe { Some([< $TY:lower >]::Channels::new()) },
+                    }
+                }
+
+                /// Get the timer capture/compare channels.
+                pub fn channels(&mut self) -> [< $TY:lower >]::Channels {
+                    self.channels.take().unwrap()
+                }
+
+                /// Start the sampling timer.
+                pub fn start(&mut self) {
+                    self.timer.reset_counter();
+                    self.timer.resume();
+                }
+            }
+
             pub mod [< $TY:lower >] {
-                pub use hal::stm32::[< $TY:lower >]::ccmr1_input::{CC1S_A, CC2S_A};
-                pub use hal::stm32::[< $TY:lower >]::ccmr2_input::{CC3S_A, CC4S_A};
+                pub use hal::stm32::tim2::ccmr1_input::{CC1S_A, CC2S_A};
+                pub use hal::stm32::tim2::ccmr2_input::{CC3S_A, CC4S_A};
 
                 use stm32h7xx_hal as hal;
                 use hal::dma::{traits::TargetAddress, PeripheralToMemory, dma::DMAReq};
-                use hal::stm32::TIM2;
+                use hal::stm32::$TY;
 
                 /// The channels representing the timer.
                 pub struct Channels {
@@ -92,6 +93,7 @@ macro_rules! timer_channels {
                 }
 
                 /// Allow the channel to generate DMA requests.
+                #[allow(dead_code)]
                 pub fn listen_dma(&self) {
                     let regs = unsafe { &*<$TY>::ptr() };
                     regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit());
@@ -101,6 +103,7 @@ macro_rules! timer_channels {
                 ///
                 /// # Args
                 /// * `value` - The value to compare the sampling timer's counter against.
+                #[allow(dead_code)]
                 pub fn to_output_compare(&self, value: u32) {
                     let regs = unsafe { &*<$TY>::ptr() };
                     assert!(value <= regs.arr.read().bits());
@@ -113,7 +116,8 @@ macro_rules! timer_channels {
                 ///
                 /// # Args
                 /// * `input` - The input source for the input capture event.
-                pub fn to_input_capture(self, input: hal::stm32::[<$TY:lower>]::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{
+                #[allow(dead_code)]
+                pub fn to_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{
                     let regs = unsafe { &*<$TY>::ptr() };
                     regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input));
 
@@ -135,4 +139,5 @@ macro_rules! timer_channels {
     };
 }
 
-timer_channels!(TIM2);
+timer_channels!(SamplingTimer, TIM2);
+timer_channels!(TimestampTimer, TIM5);

From b191a3f01df139ef17ac3a9cfe951a2b8b7d47c8 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 7 Dec 2020 18:11:46 +0100
Subject: [PATCH 04/44] Updating timestamp timer to be more precise

---
 src/main.rs   | 19 +++++++++++++------
 src/timers.rs | 32 ++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 425b401..2520ad9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -289,14 +289,21 @@ const APP: () = {
         let sampling_timer_channels = sampling_timer.channels();
 
         let mut timestamp_timer = {
-            // TODO: This needs to be precisely controlled via the prescaler of the timer.
-            let timer5 = dp.TIM5.timer(
-                (SAMPLE_FREQUENCY_KHZ / SAMPLE_BUFFER_SIZE as u32).khz(),
-                ccdr.peripheral.TIM5,
-                &ccdr.clocks,
+            // The timer frequency is manually adjusted below, so the 1KHz setting here is a
+            // dont-care.
+            let timer5 =
+                dp.TIM5.timer(1.khz(), ccdr.peripheral.TIM5, &ccdr.clocks);
+
+            // The time stamp timer must run at exactly a multiple of the sample timer based on the
+            // batch size. To accomodate this, we manually set the period identical to the sample
+            // timer, but use a prescaler that is `BATCH_SIZE` longer.
+            let mut timer = timers::TimestampTimer::new(timer5);
+            timer.set_period(sampling_timer.get_period());
+            timer.set_prescaler(
+                sampling_timer.get_prescaler() * SAMPLE_BUFFER_SIZE as u16,
             );
 
-            timers::TimestampTimer::new(timer5)
+            timer
         };
 
         let timestamp_timer_channels = timestamp_timer.channels();
diff --git a/src/timers.rs b/src/timers.rs
index 36f8c01..a3c2dce 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -32,8 +32,36 @@ macro_rules! timer_channels {
                     self.channels.take().unwrap()
                 }
 
-                /// Start the sampling timer.
-                pub fn start(&mut self) {
+                #[allow(dead_code)]
+                pub fn get_prescaler(&self) -> u16 {
+                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
+                    regs.psc.read().psc().bits() + 1
+                }
+
+                #[allow(dead_code)]
+                pub fn set_prescaler(&mut self, prescaler: u16) {
+                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
+                    assert!(prescaler >= 1);
+                    regs.psc.write(|w| w.psc().bits(prescaler - 1));
+                }
+
+                #[allow(dead_code)]
+                pub fn get_period(&self) -> u32 {
+                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
+                    regs.arr.read().arr().bits()
+                }
+
+                #[allow(dead_code)]
+                pub fn set_period(&mut self, period: u32) {
+                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
+                    regs.arr.write(|w| w.arr().bits(period));
+                }
+
+                /// Start the timer.
+                pub fn start(mut self) {
+                    // Force a refresh of the frequency settings.
+                    self.timer.apply_freq();
+
                     self.timer.reset_counter();
                     self.timer.resume();
                 }

From 6eaf2cc073f58ac6e5a001248170af89d5348810 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 7 Dec 2020 18:19:20 +0100
Subject: [PATCH 05/44] Updating timestamp buffer logic

---
 src/digital_input_stamper.rs | 21 ++-------------------
 src/main.rs                  | 14 ++++++++------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 8b8f3af..a8ce1d9 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -7,7 +7,6 @@ static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2];
 
 pub struct InputStamper {
     _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
-    timestamp_buffer: heapless::Vec<u16, heapless::consts::U128>,
     next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>,
     transfer: Transfer<
         hal::dma::dma::Stream6<hal::stm32::DMA1>,
@@ -47,37 +46,21 @@ impl InputStamper {
         timestamp_transfer.start(|_| {});
 
         Self {
-            timestamp_buffer: heapless::Vec::new(),
             next_buffer: unsafe { Some(&mut BUF[1]) },
             transfer: timestamp_transfer,
             _di0_trigger: trigger,
         }
     }
 
-    pub fn transfer_complete_handler(&mut self) {
+    pub fn acquire_buffer(&mut self) -> &[u16] {
         let next_buffer = self.next_buffer.take().unwrap();
-        self.transfer.clear_interrupts();
         let (prev_buffer, _, remaining_transfers) =
             self.transfer.next_transfer(next_buffer).unwrap();
 
         let valid_count = prev_buffer.len() - remaining_transfers;
-        self.timestamp_buffer
-            .extend_from_slice(&prev_buffer[..valid_count])
-            .unwrap();
 
         self.next_buffer.replace(prev_buffer);
-    }
 
-    pub fn with_timestamps<F>(&mut self, f: F)
-    where
-        F: FnOnce(&[u16]),
-    {
-        // First, run the transfer complete handler to retrieve any timestamps that are pending in
-        // the DMA transfer.
-        self.transfer_complete_handler();
-
-        f(self.timestamp_buffer.as_ref());
-
-        self.timestamp_buffer.clear();
+        &self.next_buffer.as_ref().unwrap()[..valid_count]
     }
 }
diff --git a/src/main.rs b/src/main.rs
index 2520ad9..2c07284 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -818,7 +818,7 @@ const APP: () = {
         }
     }
 
-    #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch], priority=2)]
+    #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch, input_stamper], priority=2)]
     fn process(c: process::Context) {
         let adc_samples = [
             c.resources.adcs.0.acquire_buffer(),
@@ -829,6 +829,8 @@ const APP: () = {
             c.resources.dacs.1.acquire_buffer(),
         ];
 
+        let _timestamps = c.resources.input_stamper.acquire_buffer();
+
         for channel in 0..adc_samples.len() {
             for sample in 0..adc_samples[0].len() {
                 let x = f32::from(adc_samples[channel][sample] as i16);
@@ -849,11 +851,6 @@ const APP: () = {
         c.resources.dacs.1.release_buffer(dac1);
     }
 
-    #[task(binds=DMA1_STR6, priority = 2)]
-    fn digital_stamper(_: digital_stamper::Context) {
-        panic!("Timestamp overflow")
-    }
-
     #[idle(resources=[net_interface, pounder, mac_addr, eth_mac, iir_state, iir_ch, afes])]
     fn idle(mut c: idle::Context) -> ! {
         let mut socket_set_entries: [_; 8] = Default::default();
@@ -1008,6 +1005,11 @@ const APP: () = {
         }
     }
 
+    #[task(binds=DMA1_STR6, priority = 2)]
+    fn di0_timestamp(_: di0_timestamp::Context) {
+        panic!("DI0 Timestamp overflow")
+    }
+
     #[task(binds = ETH, priority = 1)]
     fn eth(_: eth::Context) {
         unsafe { ethernet::interrupt_handler() }

From 551cc5d7421e7865497e96d345791e6c00561771 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 7 Dec 2020 18:44:45 +0100
Subject: [PATCH 06/44] Adding support for DBM

---
 src/digital_input_stamper.rs | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index a8ce1d9..ba65b66 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -1,18 +1,16 @@
-use super::{hal, timers, DmaConfig, PeripheralToMemory, Transfer};
-
-const INPUT_BUFFER_SIZE: usize = 1;
+use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer};
 
 #[link_section = ".axisram.buffers"]
-static mut BUF: [[u16; INPUT_BUFFER_SIZE]; 2] = [[0; INPUT_BUFFER_SIZE]; 2];
+static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 3] = [[0; SAMPLE_BUFFER_SIZE]; 3];
 
 pub struct InputStamper {
     _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
-    next_buffer: Option<&'static mut [u16; INPUT_BUFFER_SIZE]>,
+    next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>,
     transfer: Transfer<
         hal::dma::dma::Stream6<hal::stm32::DMA1>,
         timers::tim5::Channel4InputCapture,
         PeripheralToMemory,
-        &'static mut [u16; INPUT_BUFFER_SIZE],
+        &'static mut [u16; SAMPLE_BUFFER_SIZE],
     >,
 }
 
@@ -32,21 +30,25 @@ impl InputStamper {
         let dma_config = DmaConfig::default()
             .transfer_complete_interrupt(true)
             .memory_increment(true)
+            .circular_buffer(true)
+            .double_buffer(true)
             .peripheral_increment(false);
 
+        // This needs to operate in double-buffer+circular mode so that we don't potentially drop
+        // input timestamps.
         let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
             Transfer::init(
                 stream,
                 input_capture,
                 unsafe { &mut BUF[0] },
-                None,
+                unsafe { Some(&mut BUF[1]) },
                 dma_config,
             );
 
         timestamp_transfer.start(|_| {});
 
         Self {
-            next_buffer: unsafe { Some(&mut BUF[1]) },
+            next_buffer: unsafe { Some(&mut BUF[2]) },
             transfer: timestamp_transfer,
             _di0_trigger: trigger,
         }

From f2e4f497fa96d2e38dfb59b946f6385efeab9109 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 8 Dec 2020 11:29:59 +0100
Subject: [PATCH 07/44] Removing DBM from input stamper

---
 src/digital_input_stamper.rs | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index ba65b66..3df8eee 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -1,7 +1,7 @@
 use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer};
 
 #[link_section = ".axisram.buffers"]
-static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 3] = [[0; SAMPLE_BUFFER_SIZE]; 3];
+static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2];
 
 pub struct InputStamper {
     _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
@@ -30,25 +30,23 @@ impl InputStamper {
         let dma_config = DmaConfig::default()
             .transfer_complete_interrupt(true)
             .memory_increment(true)
-            .circular_buffer(true)
-            .double_buffer(true)
             .peripheral_increment(false);
 
-        // This needs to operate in double-buffer+circular mode so that we don't potentially drop
-        // input timestamps.
+        // TODO: This needs to operate in double-buffer+circular mode so that we don't potentially
+        // drop input timestamps.
         let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
             Transfer::init(
                 stream,
                 input_capture,
                 unsafe { &mut BUF[0] },
-                unsafe { Some(&mut BUF[1]) },
+                None,
                 dma_config,
             );
 
         timestamp_transfer.start(|_| {});
 
         Self {
-            next_buffer: unsafe { Some(&mut BUF[2]) },
+            next_buffer: unsafe { Some(&mut BUF[1]) },
             transfer: timestamp_transfer,
             _di0_trigger: trigger,
         }

From a134340726f94fd1b6df3f894aae503518e7c3fa Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 8 Dec 2020 13:53:34 +0100
Subject: [PATCH 08/44] Adding direct and DMA collection support for DI0
 timestamps

---
 src/digital_input_stamper.rs | 146 +++++++++++++++++++++++++++--------
 src/main.rs                  |  11 +--
 src/timers.rs                |  58 +++++++++++++-
 3 files changed, 173 insertions(+), 42 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 3df8eee..fe8c86b 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -1,66 +1,146 @@
-use super::{SAMPLE_BUFFER_SIZE, hal, timers, DmaConfig, PeripheralToMemory, Transfer};
+///! Digital Input 0 (DI0) reference clock timestamper
+///!
+///! This module provides a means of timestamping the rising edges of an external reference clock on
+///! the DI0 with a timer value from TIM5.
+///!
+///! This module only supports input clocks on DI0 and may or may not utilize DMA to collect
+///! timestamps.
+///!
+///! # Design
+///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is
+///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0
+///! triggers, the current TIM5 capture value is recorded as a timestamp. This timestamp can be
+///! either directly read from the timer channel or can be collected asynchronously via DMA
+///! collection.
+///!
+///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many
+///! timestamps as there are samples, but it is intended that this DMA transfer should never
+///! complete. Instead, when all samples are collected, the module pauses the DMA transfer and
+///! checks to see how many timestamps were collected. These collected timestamps are then returned
+///! for further processing.
+///!
+///! To prevent silently discarding timestamps, the TIm5 input capture over-capture interrupt is
+///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR
+///! which handles the over-capture.
+///!
+///! # Tradeoffs
+///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they
+///! are being prematurely stopped (such is the case here). As such, for a sample batch size of 1,
+///! this can take up a significant amount of the total available processing time for the samples.
+///! To avoid this, the module does not use DMA when the sample batch size is one. Instead, the
+///! module manually checks for any captured timestamps from the timer capture channel manually. In
+///! this mode, the maximum input clock frequency supported is equal to the configured sample rate.
+///!
+///! There is a small window while the DMA buffers are swapped where a timestamp could potentially
+///! be lost. To prevent this, the `acuire_buffer()` method should not be pre-empted. Any lost
+///! timestamp will trigger an over-capture interrupt.
+use super::{
+    hal, timers, DmaConfig, PeripheralToMemory, Transfer, SAMPLE_BUFFER_SIZE,
+};
 
+// The DMA buffers must exist in a location where DMA can access. By default, RAM uses DTCM, which
+// is off-limits to the normal DMA peripheral. Instead, we use AXISRAM.
 #[link_section = ".axisram.buffers"]
-static mut BUF: [[u16; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2];
+static mut BUF: [[u32; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2];
 
+/// The timestamper for DI0 reference clock inputs.
 pub struct InputStamper {
     _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
-    next_buffer: Option<&'static mut [u16; SAMPLE_BUFFER_SIZE]>,
-    transfer: Transfer<
-        hal::dma::dma::Stream6<hal::stm32::DMA1>,
-        timers::tim5::Channel4InputCapture,
-        PeripheralToMemory,
-        &'static mut [u16; SAMPLE_BUFFER_SIZE],
+    next_buffer: Option<&'static mut [u32; SAMPLE_BUFFER_SIZE]>,
+    transfer: Option<
+        Transfer<
+            hal::dma::dma::Stream6<hal::stm32::DMA1>,
+            timers::tim5::Channel4InputCapture,
+            PeripheralToMemory,
+            &'static mut [u32; SAMPLE_BUFFER_SIZE],
+        >,
     >,
+    capture_channel: Option<timers::tim5::Channel4InputCapture>,
 }
 
 impl InputStamper {
+    /// Construct the DI0 input timestamper.
+    ///
+    /// # Args
+    /// * `trigger` - The capture trigger input pin.
+    /// * `stream` - The DMA stream to use for collecting timestamps.
+    /// * `timer_channel - The timer channel used for capturing timestamps.
+    /// * `batch_size` - The number of samples collected per processing batch.
     pub fn new(
         trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
         stream: hal::dma::dma::Stream6<hal::stm32::DMA1>,
         timer_channel: timers::tim5::Channel4,
+        batch_size: usize,
     ) -> Self {
         // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
         // capture source.
-        timer_channel.listen_dma();
         let input_capture =
             timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4);
 
-        // Set up the DMA transfer.
-        let dma_config = DmaConfig::default()
-            .transfer_complete_interrupt(true)
-            .memory_increment(true)
-            .peripheral_increment(false);
+        // Listen for over-capture events, which indicates an over-run of DI0 timestamps.
+        input_capture.listen_overcapture();
 
-        // TODO: This needs to operate in double-buffer+circular mode so that we don't potentially
-        // drop input timestamps.
-        let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
-            Transfer::init(
-                stream,
-                input_capture,
-                unsafe { &mut BUF[0] },
-                None,
-                dma_config,
-            );
+        // For small batch sizes, the overhead of DMA can become burdensome to the point where
+        // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only
+        // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach.
+        let (transfer, input_capture) = if batch_size >= 2 {
+            input_capture.listen_dma();
 
-        timestamp_transfer.start(|_| {});
+            // Set up the DMA transfer.
+            let dma_config = DmaConfig::default().memory_increment(true);
+
+            let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
+                Transfer::init(
+                    stream,
+                    input_capture,
+                    unsafe { &mut BUF[0] },
+                    None,
+                    dma_config,
+                );
+
+            timestamp_transfer.start(|_| {});
+            (Some(timestamp_transfer), None)
+        } else {
+            (None, Some(input_capture))
+        };
 
         Self {
             next_buffer: unsafe { Some(&mut BUF[1]) },
-            transfer: timestamp_transfer,
+            transfer,
+            capture_channel: input_capture,
             _di0_trigger: trigger,
         }
     }
 
-    pub fn acquire_buffer(&mut self) -> &[u16] {
-        let next_buffer = self.next_buffer.take().unwrap();
-        let (prev_buffer, _, remaining_transfers) =
-            self.transfer.next_transfer(next_buffer).unwrap();
+    /// Get all of the timestamps that have occurred during the last processing cycle.
+    pub fn acquire_buffer(&mut self) -> &[u32] {
+        // If we are using DMA, finish the transfer and swap over buffers.
+        if self.transfer.is_some() {
+            let next_buffer = self.next_buffer.take().unwrap();
 
-        let valid_count = prev_buffer.len() - remaining_transfers;
+            let (prev_buffer, _, remaining_transfers) = self
+                .transfer
+                .as_mut()
+                .unwrap()
+                .next_transfer(next_buffer)
+                .unwrap();
+            let valid_count = prev_buffer.len() - remaining_transfers;
 
-        self.next_buffer.replace(prev_buffer);
+            self.next_buffer.replace(prev_buffer);
 
-        &self.next_buffer.as_ref().unwrap()[..valid_count]
+            // Note that we likely didn't finish the transfer, so only return the number of
+            // timestamps actually collected.
+            &self.next_buffer.as_ref().unwrap()[..valid_count]
+        } else {
+            // If we aren't using DMA, just manually check the input capture channel for a
+            // timestamp.
+            match self.capture_channel.as_mut().unwrap().latest_capture() {
+                Some(stamp) => {
+                    self.next_buffer.as_mut().unwrap()[0] = stamp;
+                    &self.next_buffer.as_ref().unwrap()[..1]
+                }
+                None => &[],
+            }
+        }
     }
 }
diff --git a/src/main.rs b/src/main.rs
index e798294..469f114 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -808,6 +808,7 @@ const APP: () = {
                 trigger,
                 dma_streams.6,
                 timestamp_timer_channels.ch4,
+                SAMPLE_BUFFER_SIZE,
             )
         };
 
@@ -1030,11 +1031,6 @@ const APP: () = {
         }
     }
 
-    #[task(binds=DMA1_STR6, priority = 2)]
-    fn di0_timestamp(_: di0_timestamp::Context) {
-        panic!("DI0 Timestamp overflow")
-    }
-
     #[task(binds = ETH, priority = 1)]
     fn eth(_: eth::Context) {
         unsafe { ethernet::interrupt_handler() }
@@ -1060,6 +1056,11 @@ const APP: () = {
         panic!("DAC1 output error");
     }
 
+    #[task(binds = TIM5, priority = 3)]
+    fn di0(_: di0::Context) {
+        panic!("DI0 timestamp overrun");
+    }
+
     extern "C" {
         // hw interrupt handlers for RTIC to use for scheduling tasks
         // one per priority
diff --git a/src/timers.rs b/src/timers.rs
index a3c2dce..74b4731 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -2,7 +2,7 @@
 use super::hal;
 
 macro_rules! timer_channels {
-    ($name:ident, $TY:ident) => {
+    ($name:ident, $TY:ident, u32) => {
         paste::paste! {
 
             /// The timer used for managing ADC sampling.
@@ -32,12 +32,14 @@ macro_rules! timer_channels {
                     self.channels.take().unwrap()
                 }
 
+                /// Get the prescaler of a timer.
                 #[allow(dead_code)]
                 pub fn get_prescaler(&self) -> u16 {
                     let regs = unsafe { &*hal::stm32::$TY::ptr() };
                     regs.psc.read().psc().bits() + 1
                 }
 
+                /// Manually set the prescaler of the timer.
                 #[allow(dead_code)]
                 pub fn set_prescaler(&mut self, prescaler: u16) {
                     let regs = unsafe { &*hal::stm32::$TY::ptr() };
@@ -45,12 +47,14 @@ macro_rules! timer_channels {
                     regs.psc.write(|w| w.psc().bits(prescaler - 1));
                 }
 
+                /// Get the period of the timer.
                 #[allow(dead_code)]
                 pub fn get_period(&self) -> u32 {
                     let regs = unsafe { &*hal::stm32::$TY::ptr() };
                     regs.arr.read().arr().bits()
                 }
 
+                /// Manually set the period of the timer.
                 #[allow(dead_code)]
                 pub fn set_period(&mut self, period: u32) {
                     let regs = unsafe { &*hal::stm32::$TY::ptr() };
@@ -107,8 +111,10 @@ macro_rules! timer_channels {
 
     ($index:expr, $TY:ty, $ccmrx:expr) => {
         paste::paste! {
+            /// A capture/compare channel of the timer.
             pub struct [< Channel $index >] {}
 
+            /// A capture channel of the timer.
             pub struct [< Channel $index InputCapture>] {}
 
             impl [< Channel $index >] {
@@ -153,8 +159,52 @@ macro_rules! timer_channels {
                 }
             }
 
+            impl [< Channel $index InputCapture >] {
+                /// Get the latest capture from the channel.
+                #[allow(dead_code)]
+                pub fn latest_capture(&mut self) -> Option<u32> {
+                    // Note(unsafe): This channel owns all access to the specific timer channel.
+                    // Only atomic operations on completed on the timer registers.
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    let sr = regs.sr.read();
+                    let ccx = regs.[< ccr $index >].read();
+                    if sr.[< cc $index if >]().bit_is_set() {
+                        regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit());
+                        Some(ccx.ccr().bits())
+                    } else {
+                        None
+                    }
+                }
+
+                /// Listen for over-capture events on the timer channel.
+                ///
+                /// # Note
+                /// An over-capture event is when a previous capture was lost due to a new capture.
+                ///
+                /// "Listening" is equivalent to enabling the interrupt for the event.
+                #[allow(dead_code)]
+                pub fn listen_overcapture(&self) {
+                    // Note(unsafe): This channel owns all access to the specific timer channel.
+                    // Only atomic operations on completed on the timer registers.
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    regs.dier.modify(|_, w| w.[<cc $index ie>]().set_bit());
+                }
+
+                /// Allow the channel to generate DMA requests.
+                #[allow(dead_code)]
+                pub fn listen_dma(&self) {
+                    // Note(unsafe): This channel owns all access to the specific timer channel.
+                    // Only atomic operations on completed on the timer registers.
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit());
+                }
+            }
+
+            // Note(unsafe): This manually implements DMA support for input-capture channels. This
+            // is safe as it is only completed once per channel and each DMA request is allocated to
+            // each channel as the owner.
             unsafe impl TargetAddress<PeripheralToMemory> for [< Channel $index InputCapture >] {
-                type MemSize = u16;
+                type MemSize = u32;
 
                 const REQUEST_LINE: Option<u8> = Some(DMAReq::[< $TY _CH $index >]as u8);
 
@@ -167,5 +217,5 @@ macro_rules! timer_channels {
     };
 }
 
-timer_channels!(SamplingTimer, TIM2);
-timer_channels!(TimestampTimer, TIM5);
+timer_channels!(SamplingTimer, TIM2, u32);
+timer_channels!(TimestampTimer, TIM5, u32);

From 1a1d8fd8b93c5527298837903fc160f50ab1cd9b Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 8 Dec 2020 14:25:46 +0100
Subject: [PATCH 09/44] Adding precise control of timer periods and prescalers

---
 src/design_parameters.rs |  3 ++
 src/main.rs              | 63 +++++++++++++++++++++++++++++-----------
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/src/design_parameters.rs b/src/design_parameters.rs
index 9835568..414a9e2 100644
--- a/src/design_parameters.rs
+++ b/src/design_parameters.rs
@@ -4,3 +4,6 @@ pub const ADC_SETUP_TIME: f32 = 220e-9;
 
 /// The maximum DAC/ADC serial clock line frequency. This is a hardware limit.
 pub const ADC_DAC_SCK_MHZ_MAX: u32 = 50;
+
+/// The optimal counting frequency of the hardware timers used for timestamping and sampling.
+pub const TIMER_FREQUENCY_MHZ: u32 = 100;
diff --git a/src/main.rs b/src/main.rs
index 469f114..14505e8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -30,6 +30,8 @@ extern crate panic_halt;
 #[macro_use]
 extern crate log;
 
+use core::convert::TryInto;
+
 // use core::sync::atomic::{AtomicU32, AtomicBool, Ordering};
 use cortex_m_rt::exception;
 use rtic::cyccnt::{Instant, U32Ext};
@@ -54,11 +56,12 @@ use smoltcp::wire::Ipv4Address;
 
 use heapless::{consts::*, String};
 
-// The desired sampling frequency of the ADCs.
-const SAMPLE_FREQUENCY_KHZ: u32 = 500;
+// The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is
+// equal to 10ns per tick.
+const ADC_SAMPLE_TICKS: u32 = 128;
 
 // The desired ADC sample processing buffer size.
-const SAMPLE_BUFFER_SIZE: usize = 1;
+const SAMPLE_BUFFER_SIZE: usize = 8;
 
 // The number of cascaded IIR biquads per channel. Select 1 or 2!
 const IIR_CASCADE_LENGTH: usize = 1;
@@ -282,29 +285,51 @@ const APP: () = {
             hal::dma::dma::StreamsTuple::new(dp.DMA1, ccdr.peripheral.DMA1);
 
         // Configure timer 2 to trigger conversions for the ADC
-        let timer2 = dp.TIM2.timer(
-            SAMPLE_FREQUENCY_KHZ.khz(),
-            ccdr.peripheral.TIM2,
-            &ccdr.clocks,
-        );
+        let mut sampling_timer = {
+            // The timer frequency is manually adjusted below, so the 1KHz setting here is a
+            // dont-care.
+            let mut timer2 =
+                dp.TIM2.timer(1.khz(), ccdr.peripheral.TIM2, &ccdr.clocks);
+
+            // Configure the timer to count at the designed tick rate. We will manually set the
+            // period below.
+            timer2.pause();
+            timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz());
+
+            let mut sampling_timer = timers::SamplingTimer::new(timer2);
+            sampling_timer.set_period(ADC_SAMPLE_TICKS - 1);
+
+            sampling_timer
+        };
 
-        let mut sampling_timer = timers::SamplingTimer::new(timer2);
         let sampling_timer_channels = sampling_timer.channels();
 
         let mut timestamp_timer = {
             // The timer frequency is manually adjusted below, so the 1KHz setting here is a
             // dont-care.
-            let timer5 =
+            let mut timer5 =
                 dp.TIM5.timer(1.khz(), ccdr.peripheral.TIM5, &ccdr.clocks);
 
+            // Configure the timer to count at the designed tick rate. We will manually set the
+            // period below.
+            timer5.pause();
+            timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz());
+
             // The time stamp timer must run at exactly a multiple of the sample timer based on the
             // batch size. To accomodate this, we manually set the period identical to the sample
             // timer, but use a prescaler that is `BATCH_SIZE` longer.
             let mut timer = timers::TimestampTimer::new(timer5);
-            timer.set_period(sampling_timer.get_period());
-            timer.set_prescaler(
-                sampling_timer.get_prescaler() * SAMPLE_BUFFER_SIZE as u16,
-            );
+
+            let period: u32 = {
+                let batch_duration: u64 =
+                    SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
+                let batches_per_overflow: u64 =
+                    (1u64 + u32::MAX as u64) / batch_duration;
+                let period: u64 = batch_duration * batches_per_overflow - 1u64;
+                period.try_into().unwrap()
+            };
+
+            timer.set_period(period);
 
             timer
         };
@@ -667,9 +692,13 @@ const APP: () = {
                     );
 
                     // Ensure that we have enough time for an IO-update every sample.
-                    assert!(
-                        1.0 / (1000 * SAMPLE_FREQUENCY_KHZ) as f32 > 900_e-9
-                    );
+                    let sample_frequency =
+                        (design_parameters::TIMER_FREQUENCY_MHZ as f32
+                            * 1_000_000.0)
+                            / ADC_SAMPLE_TICKS as f32;
+
+                    let sample_period = 1.0 / sample_frequency;
+                    assert!(sample_period > 900_e-9);
 
                     hrtimer
                 };

From 4da892b8a27c743ac59a2cdf1244145325db5830 Mon Sep 17 00:00:00 2001
From: Ryan Summers <summers.ryan.m@gmail.com>
Date: Tue, 8 Dec 2020 15:15:51 +0100
Subject: [PATCH 10/44] Update src/digital_input_stamper.rs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Robert Jördens <rj@quartiq.de>
---
 src/digital_input_stamper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index fe8c86b..eaf9683 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -9,7 +9,7 @@
 ///! # Design
 ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is
 ///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0
-///! triggers, the current TIM5 capture value is recorded as a timestamp. This timestamp can be
+///! triggers, the current TIM5 counter value is captured and recorded as a timestamp. This timestamp can be
 ///! either directly read from the timer channel or can be collected asynchronously via DMA
 ///! collection.
 ///!

From 3886dab961a7b2bc41edc1a336af81485db490a7 Mon Sep 17 00:00:00 2001
From: Ryan Summers <summers.ryan.m@gmail.com>
Date: Tue, 8 Dec 2020 15:15:58 +0100
Subject: [PATCH 11/44] Update src/digital_input_stamper.rs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Robert Jördens <rj@quartiq.de>
---
 src/digital_input_stamper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index eaf9683..621264d 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -19,7 +19,7 @@
 ///! checks to see how many timestamps were collected. These collected timestamps are then returned
 ///! for further processing.
 ///!
-///! To prevent silently discarding timestamps, the TIm5 input capture over-capture interrupt is
+///! To prevent silently discarding timestamps, the TIM5 input capture over-capture interrupt is
 ///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR
 ///! which handles the over-capture.
 ///!

From 645a1cd83256ec650e69e2827c9a5987362bb457 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 8 Dec 2020 16:14:27 +0100
Subject: [PATCH 12/44] Updating timestamper after testing

---
 src/digital_input_stamper.rs | 42 ++++++++++++++++++++++----------
 src/main.rs                  | 12 +++------
 src/timers.rs                | 47 ++++++++++++++----------------------
 3 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 621264d..d4204f3 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -8,10 +8,10 @@
 ///!
 ///! # Design
 ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is
-///! then run in a free-running mode with a configured frequency and period. Whenever an edge on DI0
-///! triggers, the current TIM5 counter value is captured and recorded as a timestamp. This timestamp can be
-///! either directly read from the timer channel or can be collected asynchronously via DMA
-///! collection.
+///! then run in a free-running mode with a configured tick rate (PSC) and maximum count value
+///! (ARR). Whenever an edge on DI0 triggers, the current TIM5 counter value is captured and
+///! recorded as a timestamp. This timestamp can be either directly read from the timer channel or
+///! can be collected asynchronously via DMA collection.
 ///!
 ///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many
 ///! timestamps as there are samples, but it is intended that this DMA transfer should never
@@ -19,9 +19,9 @@
 ///! checks to see how many timestamps were collected. These collected timestamps are then returned
 ///! for further processing.
 ///!
-///! To prevent silently discarding timestamps, the TIM5 input capture over-capture interrupt is
-///! used. Any over-capture event (which indicates an overwritten timestamp) then generates an ISR
-///! which handles the over-capture.
+///! To prevent silently discarding timestamps, the TIM5 input capture over-capture flag is
+///! continually checked. Any over-capture event (which indicates an overwritten timestamp) then
+///! triggers a panic to indicate the dropped timestamp so that design parameters can be adjusted.
 ///!
 ///! # Tradeoffs
 ///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they
@@ -77,9 +77,6 @@ impl InputStamper {
         let input_capture =
             timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4);
 
-        // Listen for over-capture events, which indicates an over-run of DI0 timestamps.
-        input_capture.listen_overcapture();
-
         // For small batch sizes, the overhead of DMA can become burdensome to the point where
         // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only
         // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach.
@@ -89,7 +86,7 @@ impl InputStamper {
             // Set up the DMA transfer.
             let dma_config = DmaConfig::default().memory_increment(true);
 
-            let mut timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
+            let timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
                 Transfer::init(
                     stream,
                     input_capture,
@@ -97,8 +94,6 @@ impl InputStamper {
                     None,
                     dma_config,
                 );
-
-            timestamp_transfer.start(|_| {});
             (Some(timestamp_transfer), None)
         } else {
             (None, Some(input_capture))
@@ -112,12 +107,29 @@ impl InputStamper {
         }
     }
 
+    /// Start capture timestamps on DI0.
+    pub fn start(&mut self) {
+        if let Some(transfer) = &mut self.transfer {
+            transfer.start(|capture_channel| {
+                capture_channel.enable();
+            });
+        } else {
+            self.capture_channel.as_mut().unwrap().enable();
+        }
+    }
+
     /// Get all of the timestamps that have occurred during the last processing cycle.
     pub fn acquire_buffer(&mut self) -> &[u32] {
         // If we are using DMA, finish the transfer and swap over buffers.
         if self.transfer.is_some() {
             let next_buffer = self.next_buffer.take().unwrap();
 
+            self.transfer.as_mut().unwrap().pause(|channel| {
+                if channel.check_overcapture() {
+                    panic!("DI0 timestamp overrun");
+                }
+            });
+
             let (prev_buffer, _, remaining_transfers) = self
                 .transfer
                 .as_mut()
@@ -132,6 +144,10 @@ impl InputStamper {
             // timestamps actually collected.
             &self.next_buffer.as_ref().unwrap()[..valid_count]
         } else {
+            if self.capture_channel.as_ref().unwrap().check_overcapture() {
+                panic!("DI0 timestamp overrun");
+            }
+
             // If we aren't using DMA, just manually check the input capture channel for a
             // timestamp.
             match self.capture_channel.as_mut().unwrap().latest_capture() {
diff --git a/src/main.rs b/src/main.rs
index 14505e8..a22fe1c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -58,10 +58,10 @@ use heapless::{consts::*, String};
 
 // The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is
 // equal to 10ns per tick.
-const ADC_SAMPLE_TICKS: u32 = 128;
+const ADC_SAMPLE_TICKS: u32 = 256;
 
 // The desired ADC sample processing buffer size.
-const SAMPLE_BUFFER_SIZE: usize = 8;
+const SAMPLE_BUFFER_SIZE: usize = 1;
 
 // The number of cascaded IIR biquads per channel. Select 1 or 2!
 const IIR_CASCADE_LENGTH: usize = 1;
@@ -831,7 +831,7 @@ const APP: () = {
         // Utilize the cycle counter for RTIC scheduling.
         cp.DWT.enable_cycle_counter();
 
-        let input_stamper = {
+        let mut input_stamper = {
             let trigger = gpioa.pa3.into_alternate_af2();
             digital_input_stamper::InputStamper::new(
                 trigger,
@@ -844,6 +844,7 @@ const APP: () = {
         // Start sampling ADCs.
         sampling_timer.start();
         timestamp_timer.start();
+        input_stamper.start();
 
         init::LateResources {
             afes: (afe0, afe1),
@@ -1085,11 +1086,6 @@ const APP: () = {
         panic!("DAC1 output error");
     }
 
-    #[task(binds = TIM5, priority = 3)]
-    fn di0(_: di0::Context) {
-        panic!("DI0 timestamp overrun");
-    }
-
     extern "C" {
         // hw interrupt handlers for RTIC to use for scheduling tasks
         // one per priority
diff --git a/src/timers.rs b/src/timers.rs
index 74b4731..03bc0aa 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -32,21 +32,6 @@ macro_rules! timer_channels {
                     self.channels.take().unwrap()
                 }
 
-                /// Get the prescaler of a timer.
-                #[allow(dead_code)]
-                pub fn get_prescaler(&self) -> u16 {
-                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
-                    regs.psc.read().psc().bits() + 1
-                }
-
-                /// Manually set the prescaler of the timer.
-                #[allow(dead_code)]
-                pub fn set_prescaler(&mut self, prescaler: u16) {
-                    let regs = unsafe { &*hal::stm32::$TY::ptr() };
-                    assert!(prescaler >= 1);
-                    regs.psc.write(|w| w.psc().bits(prescaler - 1));
-                }
-
                 /// Get the period of the timer.
                 #[allow(dead_code)]
                 pub fn get_period(&self) -> u32 {
@@ -176,20 +161,6 @@ macro_rules! timer_channels {
                     }
                 }
 
-                /// Listen for over-capture events on the timer channel.
-                ///
-                /// # Note
-                /// An over-capture event is when a previous capture was lost due to a new capture.
-                ///
-                /// "Listening" is equivalent to enabling the interrupt for the event.
-                #[allow(dead_code)]
-                pub fn listen_overcapture(&self) {
-                    // Note(unsafe): This channel owns all access to the specific timer channel.
-                    // Only atomic operations on completed on the timer registers.
-                    let regs = unsafe { &*<$TY>::ptr() };
-                    regs.dier.modify(|_, w| w.[<cc $index ie>]().set_bit());
-                }
-
                 /// Allow the channel to generate DMA requests.
                 #[allow(dead_code)]
                 pub fn listen_dma(&self) {
@@ -198,6 +169,24 @@ macro_rules! timer_channels {
                     let regs = unsafe { &*<$TY>::ptr() };
                     regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit());
                 }
+
+                /// Enable the input capture to begin capturing timer values.
+                #[allow(dead_code)]
+                pub fn enable(&mut self) {
+                    // Note(unsafe): This channel owns all access to the specific timer channel.
+                    // Only atomic operations on completed on the timer registers.
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    regs.ccer.modify(|_, w| w.[< cc $index e >]().set_bit());
+                }
+
+                /// Check if an over-capture event has occurred.
+                #[allow(dead_code)]
+                pub fn check_overcapture(&self) -> bool {
+                    // Note(unsafe): This channel owns all access to the specific timer channel.
+                    // Only atomic operations on completed on the timer registers.
+                    let regs = unsafe { &*<$TY>::ptr() };
+                    regs.sr.read().[< cc $index of >]().bit_is_set()
+                }
             }
 
             // Note(unsafe): This manually implements DMA support for input-capture channels. This

From 2e0681ebccc3c0c0cb8e3388b5fc4f0688e19d01 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 8 Dec 2020 16:38:07 +0100
Subject: [PATCH 13/44] Fixing power-of-two calculation

---
 src/main.rs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/main.rs b/src/main.rs
index a22fe1c..308a4f0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -325,7 +325,16 @@ const APP: () = {
                     SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
                 let batches_per_overflow: u64 =
                     (1u64 + u32::MAX as u64) / batch_duration;
-                let period: u64 = batch_duration * batches_per_overflow - 1u64;
+
+                // Calculate the largest power-of-two that is less than `batches_per_overflow`.
+                // This is completed by eliminating the least significant bits of the value until
+                // only the msb remains, which is always a power of two.
+                let mut j = batches_per_overflow;
+                while (j & (j - 1)) != 0 {
+                    j = j & (j - 1);
+                }
+
+                let period: u64 = batch_duration * j - 1u64;
                 period.try_into().unwrap()
             };
 

From fc81f3d55db88f954dd64225cb6bc96a84cf563c Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Tue, 15 Dec 2020 14:34:14 +0100
Subject: [PATCH 14/44] Removing DMA support from DI0 timestamping

---
 src/digital_input_stamper.rs | 130 ++++++-----------------------------
 src/main.rs                  |   4 +-
 2 files changed, 23 insertions(+), 111 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index d4204f3..5843528 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -3,9 +3,6 @@
 ///! This module provides a means of timestamping the rising edges of an external reference clock on
 ///! the DI0 with a timer value from TIM5.
 ///!
-///! This module only supports input clocks on DI0 and may or may not utilize DMA to collect
-///! timestamps.
-///!
 ///! # Design
 ///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is
 ///! then run in a free-running mode with a configured tick rate (PSC) and maximum count value
@@ -13,12 +10,6 @@
 ///! recorded as a timestamp. This timestamp can be either directly read from the timer channel or
 ///! can be collected asynchronously via DMA collection.
 ///!
-///! When DMA is used for timestamp collection, a DMA transfer is configured to collect as many
-///! timestamps as there are samples, but it is intended that this DMA transfer should never
-///! complete. Instead, when all samples are collected, the module pauses the DMA transfer and
-///! checks to see how many timestamps were collected. These collected timestamps are then returned
-///! for further processing.
-///!
 ///! To prevent silently discarding timestamps, the TIM5 input capture over-capture flag is
 ///! continually checked. Any over-capture event (which indicates an overwritten timestamp) then
 ///! triggers a panic to indicate the dropped timestamp so that design parameters can be adjusted.
@@ -27,35 +18,18 @@
 ///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they
 ///! are being prematurely stopped (such is the case here). As such, for a sample batch size of 1,
 ///! this can take up a significant amount of the total available processing time for the samples.
-///! To avoid this, the module does not use DMA when the sample batch size is one. Instead, the
-///! module manually checks for any captured timestamps from the timer capture channel manually. In
-///! this mode, the maximum input clock frequency supported is equal to the configured sample rate.
+///! This module checks for any captured timestamps from the timer capture channel manually. In
+///! this mode, the maximum input clock frequency supported is dependant on the sampling rate and
+///! batch size.
 ///!
-///! There is a small window while the DMA buffers are swapped where a timestamp could potentially
-///! be lost. To prevent this, the `acuire_buffer()` method should not be pre-empted. Any lost
-///! timestamp will trigger an over-capture interrupt.
-use super::{
-    hal, timers, DmaConfig, PeripheralToMemory, Transfer, SAMPLE_BUFFER_SIZE,
-};
-
-// The DMA buffers must exist in a location where DMA can access. By default, RAM uses DTCM, which
-// is off-limits to the normal DMA peripheral. Instead, we use AXISRAM.
-#[link_section = ".axisram.buffers"]
-static mut BUF: [[u32; SAMPLE_BUFFER_SIZE]; 2] = [[0; SAMPLE_BUFFER_SIZE]; 2];
+///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If
+///! timestamping is desired in DI1, a separate timer + capture channel will be necessary.
+use super::{hal, timers};
 
 /// The timestamper for DI0 reference clock inputs.
 pub struct InputStamper {
     _di0_trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
-    next_buffer: Option<&'static mut [u32; SAMPLE_BUFFER_SIZE]>,
-    transfer: Option<
-        Transfer<
-            hal::dma::dma::Stream6<hal::stm32::DMA1>,
-            timers::tim5::Channel4InputCapture,
-            PeripheralToMemory,
-            &'static mut [u32; SAMPLE_BUFFER_SIZE],
-        >,
-    >,
-    capture_channel: Option<timers::tim5::Channel4InputCapture>,
+    capture_channel: timers::tim5::Channel4InputCapture,
 }
 
 impl InputStamper {
@@ -63,100 +37,40 @@ impl InputStamper {
     ///
     /// # Args
     /// * `trigger` - The capture trigger input pin.
-    /// * `stream` - The DMA stream to use for collecting timestamps.
     /// * `timer_channel - The timer channel used for capturing timestamps.
-    /// * `batch_size` - The number of samples collected per processing batch.
     pub fn new(
         trigger: hal::gpio::gpioa::PA3<hal::gpio::Alternate<hal::gpio::AF2>>,
-        stream: hal::dma::dma::Stream6<hal::stm32::DMA1>,
         timer_channel: timers::tim5::Channel4,
-        batch_size: usize,
     ) -> Self {
         // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
         // capture source.
         let input_capture =
             timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4);
 
-        // For small batch sizes, the overhead of DMA can become burdensome to the point where
-        // timing is not met. The DMA requires 500ns overhead, whereas a direct register read only
-        // requires ~80ns. When batches of 2-or-greater are used, use a DMA-based approach.
-        let (transfer, input_capture) = if batch_size >= 2 {
-            input_capture.listen_dma();
-
-            // Set up the DMA transfer.
-            let dma_config = DmaConfig::default().memory_increment(true);
-
-            let timestamp_transfer: Transfer<_, _, PeripheralToMemory, _> =
-                Transfer::init(
-                    stream,
-                    input_capture,
-                    unsafe { &mut BUF[0] },
-                    None,
-                    dma_config,
-                );
-            (Some(timestamp_transfer), None)
-        } else {
-            (None, Some(input_capture))
-        };
-
         Self {
-            next_buffer: unsafe { Some(&mut BUF[1]) },
-            transfer,
             capture_channel: input_capture,
             _di0_trigger: trigger,
         }
     }
 
-    /// Start capture timestamps on DI0.
+    /// Start to capture timestamps on DI0.
     pub fn start(&mut self) {
-        if let Some(transfer) = &mut self.transfer {
-            transfer.start(|capture_channel| {
-                capture_channel.enable();
-            });
-        } else {
-            self.capture_channel.as_mut().unwrap().enable();
-        }
+        self.capture_channel.enable();
     }
 
-    /// Get all of the timestamps that have occurred during the last processing cycle.
-    pub fn acquire_buffer(&mut self) -> &[u32] {
-        // If we are using DMA, finish the transfer and swap over buffers.
-        if self.transfer.is_some() {
-            let next_buffer = self.next_buffer.take().unwrap();
-
-            self.transfer.as_mut().unwrap().pause(|channel| {
-                if channel.check_overcapture() {
-                    panic!("DI0 timestamp overrun");
-                }
-            });
-
-            let (prev_buffer, _, remaining_transfers) = self
-                .transfer
-                .as_mut()
-                .unwrap()
-                .next_transfer(next_buffer)
-                .unwrap();
-            let valid_count = prev_buffer.len() - remaining_transfers;
-
-            self.next_buffer.replace(prev_buffer);
-
-            // Note that we likely didn't finish the transfer, so only return the number of
-            // timestamps actually collected.
-            &self.next_buffer.as_ref().unwrap()[..valid_count]
-        } else {
-            if self.capture_channel.as_ref().unwrap().check_overcapture() {
-                panic!("DI0 timestamp overrun");
-            }
-
-            // If we aren't using DMA, just manually check the input capture channel for a
-            // timestamp.
-            match self.capture_channel.as_mut().unwrap().latest_capture() {
-                Some(stamp) => {
-                    self.next_buffer.as_mut().unwrap()[0] = stamp;
-                    &self.next_buffer.as_ref().unwrap()[..1]
-                }
-                None => &[],
-            }
+    /// Get the latest timestamp that has occurred.
+    ///
+    /// # Note
+    /// This function must be called sufficiently often. If an over-capture event occurs, this
+    /// function will panic, as this indicates a timestamp was inadvertently dropped.
+    ///
+    /// To prevent timestamp loss, the batch size and sampling rate must be adjusted such that at
+    /// most one timestamp will occur in each data processing cycle.
+    pub fn latest_timestamp(&mut self) -> Option<u32> {
+        if self.capture_channel.check_overcapture() {
+            panic!("DI0 timestamp overrun");
         }
+
+        self.capture_channel.latest_capture()
     }
 }
diff --git a/src/main.rs b/src/main.rs
index 308a4f0..5ae2b0f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -844,9 +844,7 @@ const APP: () = {
             let trigger = gpioa.pa3.into_alternate_af2();
             digital_input_stamper::InputStamper::new(
                 trigger,
-                dma_streams.6,
                 timestamp_timer_channels.ch4,
-                SAMPLE_BUFFER_SIZE,
             )
         };
 
@@ -882,7 +880,7 @@ const APP: () = {
             c.resources.dacs.1.acquire_buffer(),
         ];
 
-        let _timestamps = c.resources.input_stamper.acquire_buffer();
+        let _timestamp = c.resources.input_stamper.latest_timestamp();
 
         for channel in 0..adc_samples.len() {
             for sample in 0..adc_samples[0].len() {

From e89db65722672bbbbc9178b5ba1214126854f17a Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 15:25:31 -0800
Subject: [PATCH 15/44] rename trig.rs -> cossin.rs

---
 dsp/src/{trig.rs => cossin.rs} | 0
 dsp/src/lib.rs                 | 3 ++-
 2 files changed, 2 insertions(+), 1 deletion(-)
 rename dsp/src/{trig.rs => cossin.rs} (100%)

diff --git a/dsp/src/trig.rs b/dsp/src/cossin.rs
similarity index 100%
rename from dsp/src/trig.rs
rename to dsp/src/cossin.rs
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 6dd20f7..ef0c131 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -18,10 +18,11 @@ pub fn shift_round(x: i32, shift: usize) -> i32 {
     (x + (1 << (shift - 1))) >> shift
 }
 
+pub mod atan2;
+pub mod cossin;
 pub mod iir;
 pub mod lockin;
 pub mod pll;
-pub mod trig;
 pub mod unwrap;
 
 #[cfg(test)]

From 17f9f0750eee1ec0fff98d46720e8a45b8702148 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:01:50 -0800
Subject: [PATCH 16/44] dsp: move abs to lib.rs

---
 dsp/src/iir.rs | 12 +-----------
 dsp/src/lib.rs | 13 +++++++++++++
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs
index c6f2100..48c92e9 100644
--- a/dsp/src/iir.rs
+++ b/dsp/src/iir.rs
@@ -2,23 +2,13 @@ use core::ops::{Add, Mul, Neg};
 use serde::{Deserialize, Serialize};
 
 use core::f32;
+use super::abs;
 
 // These are implemented here because core::f32 doesn't have them (yet).
 // They are naive and don't handle inf/nan.
 // `compiler-intrinsics`/llvm should have better (robust, universal, and
 // faster) implementations.
 
-fn abs<T>(x: T) -> T
-where
-    T: PartialOrd + Default + Neg<Output = T>,
-{
-    if x >= T::default() {
-        x
-    } else {
-        -x
-    }
-}
-
 fn copysign<T>(x: T, y: T) -> T
 where
     T: PartialOrd + Default + Neg<Output = T>,
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index ef0c131..2fbd121 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -1,6 +1,8 @@
 #![cfg_attr(not(test), no_std)]
 #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]
 
+use core::ops::Neg;
+
 pub type Complex<T> = (T, T);
 
 /// Round up half.
@@ -18,6 +20,17 @@ pub fn shift_round(x: i32, shift: usize) -> i32 {
     (x + (1 << (shift - 1))) >> shift
 }
 
+fn abs<T>(x: T) -> T
+where
+    T: PartialOrd + Default + Neg<Output = T>,
+{
+    if x >= T::default() {
+        x
+    } else {
+        -x
+    }
+}
+
 pub mod atan2;
 pub mod cossin;
 pub mod iir;

From 6d651da758f44d0a8b6702cb4e364da3fad276fa Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:02:17 -0800
Subject: [PATCH 17/44] dsp: add f64 isclose testing function

---
 dsp/src/testing.rs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs
index 1a8e109..098ec87 100644
--- a/dsp/src/testing.rs
+++ b/dsp/src/testing.rs
@@ -1,6 +1,10 @@
 use super::Complex;
 
-pub fn isclose(a: f32, b: f32, rtol: f32, atol: f32) -> bool {
+pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool {
+    (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol
+}
+
+pub fn isclosef(a: f32, b: f32, rtol: f32, atol: f32) -> bool {
     (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol
 }
 
@@ -10,7 +14,7 @@ pub fn complex_isclose(
     rtol: f32,
     atol: f32,
 ) -> bool {
-    isclose(a.0, b.0, rtol, atol) && isclose(a.1, b.1, rtol, atol)
+    isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol)
 }
 
 pub fn complex_allclose(

From 5d055b01a03f31b0950ce5e36c214a1fc6289a96 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:02:42 -0800
Subject: [PATCH 18/44] dsp: add atan2

---
 dsp/src/atan2.rs | 126 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 dsp/src/atan2.rs

diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs
new file mode 100644
index 0000000..a5f4d3e
--- /dev/null
+++ b/dsp/src/atan2.rs
@@ -0,0 +1,126 @@
+use super::{abs, shift_round};
+
+/// 2-argument arctangent function.
+///
+/// This implementation uses all integer arithmetic for fast
+/// computation. It is designed to have high accuracy near the axes
+/// and lower away from the axes. It is additionally designed so that
+/// the error changes slowly with respect to the angle.
+///
+/// # Arguments
+///
+/// * `y` - Y-axis component.
+/// * `x` - X-axis component.
+///
+/// # Returns
+///
+/// The angle between the x-axis and the ray to the point (x,y). The
+/// result range is from i32::MIN to i32::MAX, where i32::MIN
+/// corresponds to an angle of -pi and i32::MAX corresponds to an
+/// angle of +pi.
+pub fn atan2(y: i32, x: i32) -> i32 {
+    let y = y >> 16;
+    let x = x >> 16;
+
+    let ux = abs::<i32>(x);
+    let uy = abs::<i32>(y);
+
+    // Uses the general procedure described in the following
+    // Mathematics stack exchange answer:
+    //
+    // https://math.stackexchange.com/a/1105038/583981
+    //
+    // The atan approximation method has been modified to be cheaper
+    // to compute and to be more compatible with integer
+    // arithmetic. The approximation technique used here is
+    //
+    // pi / 4 * x + 0.285 * x * (1 - abs(x))
+    //
+    // which is taken from Rajan 2006: Efficient Approximations for
+    // the Arctangent Function.
+    let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) };
+
+    if max == 0 {
+        return 0;
+    }
+
+    let ratio = (min << 15) / max;
+
+    let mut angle = {
+        // pi/4, referenced to i16::MAX
+        const PI_4_FACTOR: i32 = 25735;
+        // 0.285, referenced to i16::MAX
+        const FACTOR_0285: i32 = 9339;
+        // 1/pi, referenced to u16::MAX
+        const PI_INVERTED_FACTOR: i32 = 20861;
+
+        let r1 = shift_round(ratio * PI_4_FACTOR, 15);
+        let r2 = shift_round(
+            (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio),
+            15,
+        );
+        (r1 + r2) * PI_INVERTED_FACTOR
+    };
+
+    if uy > ux {
+        angle = (i32::MAX >> 1) - angle;
+    }
+
+    if x < 0 {
+        angle = i32::MAX - angle;
+    }
+
+    if y < 0 {
+        angle *= -1;
+    }
+
+    angle
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use core::f64::consts::PI;
+    use crate::testing::isclose;
+
+    fn angle_to_axis(angle: f64) -> f64 {
+        let angle = angle % (PI / 2.);
+        (PI / 2. - angle).min(angle)
+    }
+
+    #[test]
+    fn absolute_error() {
+        const NUM_VALS: usize = 1_001;
+        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
+        let val_bounds: (f64, f64) = (-1., 1.);
+        let val_delta: f64 =
+            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
+        for i in 0..NUM_VALS {
+            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
+        }
+
+        for &x in test_vals.iter() {
+            for &y in test_vals.iter() {
+                let atol: f64 = 4e-5;
+                let rtol: f64 = 0.127;
+                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
+                    / i16::MAX as f64;
+                let tol = atol + rtol * angle_to_axis(actual).abs();
+                let computed = (atan2(
+                    ((y * i16::MAX as f64) as i32) << 16,
+                    ((x * i16::MAX as f64) as i32) << 16,
+                ) >> 16) as f64
+                    / i16::MAX as f64
+                    * PI;
+
+                if !isclose(computed, actual, 0., tol) {
+                    println!("(x, y)   : {}, {}", x, y);
+                    println!("actual   : {}", actual);
+                    println!("computed : {}", computed);
+                    println!("tolerance: {}\n", tol);
+                    assert!(false);
+                }
+            }
+        }
+    }
+}

From e257545321cb27fd808c35f5c7d8ddedca632fbb Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:14:11 -0800
Subject: [PATCH 19/44] fix formatting

---
 dsp/src/atan2.rs | 2 +-
 dsp/src/iir.rs   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs
index a5f4d3e..2643d19 100644
--- a/dsp/src/atan2.rs
+++ b/dsp/src/atan2.rs
@@ -80,8 +80,8 @@ pub fn atan2(y: i32, x: i32) -> i32 {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use core::f64::consts::PI;
     use crate::testing::isclose;
+    use core::f64::consts::PI;
 
     fn angle_to_axis(angle: f64) -> f64 {
         let angle = angle % (PI / 2.);
diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs
index 48c92e9..5ff0970 100644
--- a/dsp/src/iir.rs
+++ b/dsp/src/iir.rs
@@ -1,8 +1,8 @@
 use core::ops::{Add, Mul, Neg};
 use serde::{Deserialize, Serialize};
 
-use core::f32;
 use super::abs;
+use core::f32;
 
 // These are implemented here because core::f32 doesn't have them (yet).
 // They are naive and don't handle inf/nan.

From 7c4f6082068d8ae3bda2896f29b00c7b4ab50ee4 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:26:44 -0800
Subject: [PATCH 20/44] move cossin and atan2 into the same trig file

---
 dsp/benches/cossin.rs          |   2 +-
 dsp/src/atan2.rs               | 126 ---------------------------------
 dsp/src/lib.rs                 |   3 +-
 dsp/src/{cossin.rs => trig.rs} | 123 +++++++++++++++++++++++++++++++-
 4 files changed, 124 insertions(+), 130 deletions(-)
 delete mode 100644 dsp/src/atan2.rs
 rename dsp/src/{cossin.rs => trig.rs} (57%)

diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs
index 4e23774..9f88e1b 100644
--- a/dsp/benches/cossin.rs
+++ b/dsp/benches/cossin.rs
@@ -1,6 +1,6 @@
 use core::f32::consts::PI;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use dsp::trig::cossin;
+use dsp::cossin::cossin;
 
 fn cossin_bench(c: &mut Criterion) {
     let zi = -0x7304_2531_i32;
diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs
deleted file mode 100644
index 2643d19..0000000
--- a/dsp/src/atan2.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-use super::{abs, shift_round};
-
-/// 2-argument arctangent function.
-///
-/// This implementation uses all integer arithmetic for fast
-/// computation. It is designed to have high accuracy near the axes
-/// and lower away from the axes. It is additionally designed so that
-/// the error changes slowly with respect to the angle.
-///
-/// # Arguments
-///
-/// * `y` - Y-axis component.
-/// * `x` - X-axis component.
-///
-/// # Returns
-///
-/// The angle between the x-axis and the ray to the point (x,y). The
-/// result range is from i32::MIN to i32::MAX, where i32::MIN
-/// corresponds to an angle of -pi and i32::MAX corresponds to an
-/// angle of +pi.
-pub fn atan2(y: i32, x: i32) -> i32 {
-    let y = y >> 16;
-    let x = x >> 16;
-
-    let ux = abs::<i32>(x);
-    let uy = abs::<i32>(y);
-
-    // Uses the general procedure described in the following
-    // Mathematics stack exchange answer:
-    //
-    // https://math.stackexchange.com/a/1105038/583981
-    //
-    // The atan approximation method has been modified to be cheaper
-    // to compute and to be more compatible with integer
-    // arithmetic. The approximation technique used here is
-    //
-    // pi / 4 * x + 0.285 * x * (1 - abs(x))
-    //
-    // which is taken from Rajan 2006: Efficient Approximations for
-    // the Arctangent Function.
-    let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) };
-
-    if max == 0 {
-        return 0;
-    }
-
-    let ratio = (min << 15) / max;
-
-    let mut angle = {
-        // pi/4, referenced to i16::MAX
-        const PI_4_FACTOR: i32 = 25735;
-        // 0.285, referenced to i16::MAX
-        const FACTOR_0285: i32 = 9339;
-        // 1/pi, referenced to u16::MAX
-        const PI_INVERTED_FACTOR: i32 = 20861;
-
-        let r1 = shift_round(ratio * PI_4_FACTOR, 15);
-        let r2 = shift_round(
-            (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio),
-            15,
-        );
-        (r1 + r2) * PI_INVERTED_FACTOR
-    };
-
-    if uy > ux {
-        angle = (i32::MAX >> 1) - angle;
-    }
-
-    if x < 0 {
-        angle = i32::MAX - angle;
-    }
-
-    if y < 0 {
-        angle *= -1;
-    }
-
-    angle
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::testing::isclose;
-    use core::f64::consts::PI;
-
-    fn angle_to_axis(angle: f64) -> f64 {
-        let angle = angle % (PI / 2.);
-        (PI / 2. - angle).min(angle)
-    }
-
-    #[test]
-    fn absolute_error() {
-        const NUM_VALS: usize = 1_001;
-        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
-        let val_bounds: (f64, f64) = (-1., 1.);
-        let val_delta: f64 =
-            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
-        for i in 0..NUM_VALS {
-            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
-        }
-
-        for &x in test_vals.iter() {
-            for &y in test_vals.iter() {
-                let atol: f64 = 4e-5;
-                let rtol: f64 = 0.127;
-                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
-                    / i16::MAX as f64;
-                let tol = atol + rtol * angle_to_axis(actual).abs();
-                let computed = (atan2(
-                    ((y * i16::MAX as f64) as i32) << 16,
-                    ((x * i16::MAX as f64) as i32) << 16,
-                ) >> 16) as f64
-                    / i16::MAX as f64
-                    * PI;
-
-                if !isclose(computed, actual, 0., tol) {
-                    println!("(x, y)   : {}, {}", x, y);
-                    println!("actual   : {}", actual);
-                    println!("computed : {}", computed);
-                    println!("tolerance: {}\n", tol);
-                    assert!(false);
-                }
-            }
-        }
-    }
-}
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 2fbd121..90f62f6 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -31,11 +31,10 @@ where
     }
 }
 
-pub mod atan2;
-pub mod cossin;
 pub mod iir;
 pub mod lockin;
 pub mod pll;
+pub mod trig;
 pub mod unwrap;
 
 #[cfg(test)]
diff --git a/dsp/src/cossin.rs b/dsp/src/trig.rs
similarity index 57%
rename from dsp/src/cossin.rs
rename to dsp/src/trig.rs
index 5a99232..72435e0 100644
--- a/dsp/src/cossin.rs
+++ b/dsp/src/trig.rs
@@ -1,8 +1,85 @@
-use super::Complex;
+use super::{abs, shift_round, Complex};
 use core::f64::consts::PI;
 
 include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 
+/// 2-argument arctangent function.
+///
+/// This implementation uses all integer arithmetic for fast
+/// computation. It is designed to have high accuracy near the axes
+/// and lower away from the axes. It is additionally designed so that
+/// the error changes slowly with respect to the angle.
+///
+/// # Arguments
+///
+/// * `y` - Y-axis component.
+/// * `x` - X-axis component.
+///
+/// # Returns
+///
+/// The angle between the x-axis and the ray to the point (x,y). The
+/// result range is from i32::MIN to i32::MAX, where i32::MIN
+/// corresponds to an angle of -pi and i32::MAX corresponds to an
+/// angle of +pi.
+pub fn atan2(y: i32, x: i32) -> i32 {
+    let y = y >> 16;
+    let x = x >> 16;
+
+    let ux = abs::<i32>(x);
+    let uy = abs::<i32>(y);
+
+    // Uses the general procedure described in the following
+    // Mathematics stack exchange answer:
+    //
+    // https://math.stackexchange.com/a/1105038/583981
+    //
+    // The atan approximation method has been modified to be cheaper
+    // to compute and to be more compatible with integer
+    // arithmetic. The approximation technique used here is
+    //
+    // pi / 4 * x + 0.285 * x * (1 - abs(x))
+    //
+    // which is taken from Rajan 2006: Efficient Approximations for
+    // the Arctangent Function.
+    let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) };
+
+    if max == 0 {
+        return 0;
+    }
+
+    let ratio = (min << 15) / max;
+
+    let mut angle = {
+        // pi/4, referenced to i16::MAX
+        const PI_4_FACTOR: i32 = 25735;
+        // 0.285, referenced to i16::MAX
+        const FACTOR_0285: i32 = 9339;
+        // 1/pi, referenced to u16::MAX
+        const PI_INVERTED_FACTOR: i32 = 20861;
+
+        let r1 = shift_round(ratio * PI_4_FACTOR, 15);
+        let r2 = shift_round(
+            (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio),
+            15,
+        );
+        (r1 + r2) * PI_INVERTED_FACTOR
+    };
+
+    if uy > ux {
+        angle = (i32::MAX >> 1) - angle;
+    }
+
+    if x < 0 {
+        angle = i32::MAX - angle;
+    }
+
+    if y < 0 {
+        angle *= -1;
+    }
+
+    angle
+}
+
 /// Compute the cosine and sine of an angle.
 /// This is ported from the MiSoC cossin core.
 /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
@@ -75,6 +152,14 @@ pub fn cossin(phase: i32) -> Complex<i32> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::testing::isclose;
+    use core::f64::consts::PI;
+
+    fn angle_to_axis(angle: f64) -> f64 {
+        let angle = angle % (PI / 2.);
+        (PI / 2. - angle).min(angle)
+    }
+
     #[test]
     fn error_max_rms_all_phase() {
         // Constant amplitude error due to LUT data range.
@@ -143,4 +228,40 @@ mod tests {
         assert!(max_err.0 < 1.1e-5);
         assert!(max_err.1 < 1.1e-5);
     }
+
+    #[test]
+    fn absolute_error() {
+        const NUM_VALS: usize = 1_001;
+        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
+        let val_bounds: (f64, f64) = (-1., 1.);
+        let val_delta: f64 =
+            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
+        for i in 0..NUM_VALS {
+            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
+        }
+
+        for &x in test_vals.iter() {
+            for &y in test_vals.iter() {
+                let atol: f64 = 4e-5;
+                let rtol: f64 = 0.127;
+                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
+                    / i16::MAX as f64;
+                let tol = atol + rtol * angle_to_axis(actual).abs();
+                let computed = (atan2(
+                    ((y * i16::MAX as f64) as i32) << 16,
+                    ((x * i16::MAX as f64) as i32) << 16,
+                ) >> 16) as f64
+                    / i16::MAX as f64
+                    * PI;
+
+                if !isclose(computed, actual, 0., tol) {
+                    println!("(x, y)   : {}, {}", x, y);
+                    println!("actual   : {}", actual);
+                    println!("computed : {}", computed);
+                    println!("tolerance: {}\n", tol);
+                    assert!(false);
+                }
+            }
+        }
+    }
 }

From 85ae70fe6205ce25e6b80840c92aaf1e4221d2a5 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:28:49 -0800
Subject: [PATCH 21/44] rename trig tests to delineate between cossin and atan2

---
 dsp/src/trig.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 72435e0..4dc26be 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -161,7 +161,7 @@ mod tests {
     }
 
     #[test]
-    fn error_max_rms_all_phase() {
+    fn cossin_error_max_rms_all_phase() {
         // Constant amplitude error due to LUT data range.
         const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
         const MAX_PHASE: f64 = (1i64 << 32) as f64;
@@ -230,7 +230,7 @@ mod tests {
     }
 
     #[test]
-    fn absolute_error() {
+    fn atan2_absolute_error() {
         const NUM_VALS: usize = 1_001;
         let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
         let val_bounds: (f64, f64) = (-1., 1.);

From 2ddaab8fae34b4f01d2f2029eb18f3676e41ab56 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Wed, 16 Dec 2020 16:57:18 -0800
Subject: [PATCH 22/44] dsp: fix bench import path

---
 dsp/benches/cossin.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs
index 9f88e1b..4e23774 100644
--- a/dsp/benches/cossin.rs
+++ b/dsp/benches/cossin.rs
@@ -1,6 +1,6 @@
 use core::f32::consts::PI;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use dsp::cossin::cossin;
+use dsp::trig::cossin;
 
 fn cossin_bench(c: &mut Criterion) {
     let zi = -0x7304_2531_i32;

From d9d500743f41aa150c055263aedfdba36c4ffbd0 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 08:02:54 -0800
Subject: [PATCH 23/44] simplify atan initial angle expression

---
 dsp/src/trig.rs | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 4dc26be..e306356 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -50,19 +50,13 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     let ratio = (min << 15) / max;
 
     let mut angle = {
-        // pi/4, referenced to i16::MAX
-        const PI_4_FACTOR: i32 = 25735;
-        // 0.285, referenced to i16::MAX
-        const FACTOR_0285: i32 = 9339;
-        // 1/pi, referenced to u16::MAX
-        const PI_INVERTED_FACTOR: i32 = 20861;
+        const K1: i32 =
+            ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32;
+        const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32;
 
-        let r1 = shift_round(ratio * PI_4_FACTOR, 15);
-        let r2 = shift_round(
-            (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio),
-            15,
-        );
-        (r1 + r2) * PI_INVERTED_FACTOR
+        let ratio_squared = shift_round(ratio * ratio, 15);
+
+        ratio * K1 - K2 * ratio_squared
     };
 
     if uy > ux {

From d7111a3aa811deed34f01c0682ee6fada8978c61 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 08:04:53 -0800
Subject: [PATCH 24/44] dsp/trig: let compiler infer type parameter in atan2
 abs call

---
 dsp/src/trig.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index e306356..51e8b2e 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -25,8 +25,8 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     let y = y >> 16;
     let x = x >> 16;
 
-    let ux = abs::<i32>(x);
-    let uy = abs::<i32>(y);
+    let ux = abs(x);
+    let uy = abs(y);
 
     // Uses the general procedure described in the following
     // Mathematics stack exchange answer:

From 5717991ada1847549b3154a20803f21c5688c3e5 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 09:31:18 -0800
Subject: [PATCH 25/44] atan2: result range is from i32::MIN+1 to i32::MAX

---
 dsp/src/trig.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 51e8b2e..57e4fec 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -18,7 +18,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 /// # Returns
 ///
 /// The angle between the x-axis and the ray to the point (x,y). The
-/// result range is from i32::MIN to i32::MAX, where i32::MIN
+/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1
 /// corresponds to an angle of -pi and i32::MAX corresponds to an
 /// angle of +pi.
 pub fn atan2(y: i32, x: i32) -> i32 {

From cb38c3e3bd3f28f32bfba3ed2abf048772af45a9 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 09:31:38 -0800
Subject: [PATCH 26/44] atan2: clarify sharing bits between atan argument and
 constant factors

---
 dsp/src/trig.rs | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 57e4fec..d25d50b 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -47,16 +47,21 @@ pub fn atan2(y: i32, x: i32) -> i32 {
         return 0;
     }
 
-    let ratio = (min << 15) / max;
+    // We need to share the 31 available non-sign bits between the
+    // atan argument and constant factors used in the atan
+    // approximation. Sharing the bits roughly equally between them
+    // gives good accuracy.
+    const ATAN_ARGUMENT_BITS: usize = 15;
+    let ratio = (min << ATAN_ARGUMENT_BITS) / max;
 
     let mut angle = {
-        const K1: i32 =
-            ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32;
-        const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32;
+        const K1: i32 = ((1. / 4. + 0.285 / PI)
+            * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64)
+            as i32;
+        const K2: i32 =
+            ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32;
 
-        let ratio_squared = shift_round(ratio * ratio, 15);
-
-        ratio * K1 - K2 * ratio_squared
+        ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS)
     };
 
     if uy > ux {

From 1f28949bc5d859144a6fc96cdf28cd6959dd2e29 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 09:47:39 -0800
Subject: [PATCH 27/44] atan2: store sign bits and greater of |x| and |y|

---
 dsp/src/trig.rs | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index d25d50b..53aecfa 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -1,4 +1,4 @@
-use super::{abs, shift_round, Complex};
+use super::{shift_round, Complex};
 use core::f64::consts::PI;
 
 include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
@@ -22,11 +22,18 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 /// corresponds to an angle of -pi and i32::MAX corresponds to an
 /// angle of +pi.
 pub fn atan2(y: i32, x: i32) -> i32 {
-    let y = y >> 16;
-    let x = x >> 16;
+    let mut y = y >> 16;
+    let mut x = x >> 16;
 
-    let ux = abs(x);
-    let uy = abs(y);
+    let sign = ((y >> 14) & 2) | ((x >> 15) & 1);
+    if sign & 1 == 1 {
+        x *= -1;
+    }
+    if sign & 2 == 2 {
+        y *= -1;
+    }
+
+    let y_greater = y > x;
 
     // Uses the general procedure described in the following
     // Mathematics stack exchange answer:
@@ -41,7 +48,7 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     //
     // which is taken from Rajan 2006: Efficient Approximations for
     // the Arctangent Function.
-    let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) };
+    let (min, max) = if y_greater { (x, y) } else { (y, x) };
 
     if max == 0 {
         return 0;
@@ -64,15 +71,15 @@ pub fn atan2(y: i32, x: i32) -> i32 {
         ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS)
     };
 
-    if uy > ux {
+    if y_greater {
         angle = (i32::MAX >> 1) - angle;
     }
 
-    if x < 0 {
+    if sign & 1 == 1 {
         angle = i32::MAX - angle;
     }
 
-    if y < 0 {
+    if sign & 2 == 2 {
         angle *= -1;
     }
 

From 56641d5838cba59fa55d82af2e0d495185b227cc Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 10:02:35 -0800
Subject: [PATCH 28/44] atan2: specify why we cannot use more than 15 bits for
 the atan argument

---
 dsp/src/trig.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 53aecfa..6d0acff 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -57,7 +57,9 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     // We need to share the 31 available non-sign bits between the
     // atan argument and constant factors used in the atan
     // approximation. Sharing the bits roughly equally between them
-    // gives good accuracy.
+    // gives good accuracy. Additionally, we cannot increase the
+    // number of atan argument bits beyond 15 because we must square
+    // it.
     const ATAN_ARGUMENT_BITS: usize = 15;
     let ratio = (min << ATAN_ARGUMENT_BITS) / max;
 

From 09a744f59c5771fa044acd1694809e4d055ef157 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 10:03:16 -0800
Subject: [PATCH 29/44] dsp: move iir generic math functions to top-level
 module scope

---
 dsp/src/iir.rs | 69 +-------------------------------------------------
 dsp/src/lib.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 69 deletions(-)

diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs
index 5ff0970..dbec8d8 100644
--- a/dsp/src/iir.rs
+++ b/dsp/src/iir.rs
@@ -1,75 +1,8 @@
-use core::ops::{Add, Mul, Neg};
 use serde::{Deserialize, Serialize};
 
-use super::abs;
+use super::{abs, copysign, macc, max, min};
 use core::f32;
 
-// These are implemented here because core::f32 doesn't have them (yet).
-// They are naive and don't handle inf/nan.
-// `compiler-intrinsics`/llvm should have better (robust, universal, and
-// faster) implementations.
-
-fn copysign<T>(x: T, y: T) -> T
-where
-    T: PartialOrd + Default + Neg<Output = T>,
-{
-    if (x >= T::default() && y >= T::default())
-        || (x <= T::default() && y <= T::default())
-    {
-        x
-    } else {
-        -x
-    }
-}
-
-#[cfg(not(feature = "nightly"))]
-fn max<T>(x: T, y: T) -> T
-where
-    T: PartialOrd,
-{
-    if x > y {
-        x
-    } else {
-        y
-    }
-}
-
-#[cfg(not(feature = "nightly"))]
-fn min<T>(x: T, y: T) -> T
-where
-    T: PartialOrd,
-{
-    if x < y {
-        x
-    } else {
-        y
-    }
-}
-
-#[cfg(feature = "nightly")]
-fn max(x: f32, y: f32) -> f32 {
-    core::intrinsics::maxnumf32(x, y)
-}
-
-#[cfg(feature = "nightly")]
-fn min(x: f32, y: f32) -> f32 {
-    core::intrinsics::minnumf32(x, y)
-}
-
-// Multiply-accumulate vectors `x` and `a`.
-//
-// A.k.a. dot product.
-// Rust/LLVM optimize this nicely.
-fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
-where
-    T: Add<Output = T> + Mul<Output = T> + Copy,
-{
-    x.iter()
-        .zip(a)
-        .map(|(x, a)| *x * *a)
-        .fold(y0, |y, xa| y + xa)
-}
-
 /// IIR state and coefficients type.
 ///
 /// To represent the IIR state (input and output memory) during the filter update
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 90f62f6..67b1882 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -1,7 +1,7 @@
 #![cfg_attr(not(test), no_std)]
 #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]
 
-use core::ops::Neg;
+use core::ops::{Add, Mul, Neg};
 
 pub type Complex<T> = (T, T);
 
@@ -31,6 +31,72 @@ where
     }
 }
 
+// These are implemented here because core::f32 doesn't have them (yet).
+// They are naive and don't handle inf/nan.
+// `compiler-intrinsics`/llvm should have better (robust, universal, and
+// faster) implementations.
+
+fn copysign<T>(x: T, y: T) -> T
+where
+    T: PartialOrd + Default + Neg<Output = T>,
+{
+    if (x >= T::default() && y >= T::default())
+        || (x <= T::default() && y <= T::default())
+    {
+        x
+    } else {
+        -x
+    }
+}
+
+#[cfg(not(feature = "nightly"))]
+fn max<T>(x: T, y: T) -> T
+where
+    T: PartialOrd,
+{
+    if x > y {
+        x
+    } else {
+        y
+    }
+}
+
+#[cfg(not(feature = "nightly"))]
+fn min<T>(x: T, y: T) -> T
+where
+    T: PartialOrd,
+{
+    if x < y {
+        x
+    } else {
+        y
+    }
+}
+
+#[cfg(feature = "nightly")]
+fn max(x: f32, y: f32) -> f32 {
+    core::intrinsics::maxnumf32(x, y)
+}
+
+#[cfg(feature = "nightly")]
+fn min(x: f32, y: f32) -> f32 {
+    core::intrinsics::minnumf32(x, y)
+}
+
+// Multiply-accumulate vectors `x` and `a`.
+//
+// A.k.a. dot product.
+// Rust/LLVM optimize this nicely.
+fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
+where
+    T: Add<Output = T> + Mul<Output = T> + Copy,
+{
+    x.iter()
+        .zip(a)
+        .map(|(x, a)| *x * *a)
+        .fold(y0, |y, xa| y + xa)
+}
+
 pub mod iir;
 pub mod lockin;
 pub mod pll;

From 6ffc42021edbcf34e4eabd2a3213b8d191e22e5e Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 10:09:12 -0800
Subject: [PATCH 30/44] move atan2 test before cossin test to mimic function
 order

---
 dsp/src/trig.rs | 72 ++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 6d0acff..ccf9292 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -168,6 +168,42 @@ mod tests {
         (PI / 2. - angle).min(angle)
     }
 
+    #[test]
+    fn atan2_absolute_error() {
+        const NUM_VALS: usize = 1_001;
+        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
+        let val_bounds: (f64, f64) = (-1., 1.);
+        let val_delta: f64 =
+            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
+        for i in 0..NUM_VALS {
+            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
+        }
+
+        for &x in test_vals.iter() {
+            for &y in test_vals.iter() {
+                let atol: f64 = 4e-5;
+                let rtol: f64 = 0.127;
+                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
+                    / i16::MAX as f64;
+                let tol = atol + rtol * angle_to_axis(actual).abs();
+                let computed = (atan2(
+                    ((y * i16::MAX as f64) as i32) << 16,
+                    ((x * i16::MAX as f64) as i32) << 16,
+                ) >> 16) as f64
+                    / i16::MAX as f64
+                    * PI;
+
+                if !isclose(computed, actual, 0., tol) {
+                    println!("(x, y)   : {}, {}", x, y);
+                    println!("actual   : {}", actual);
+                    println!("computed : {}", computed);
+                    println!("tolerance: {}\n", tol);
+                    assert!(false);
+                }
+            }
+        }
+    }
+
     #[test]
     fn cossin_error_max_rms_all_phase() {
         // Constant amplitude error due to LUT data range.
@@ -236,40 +272,4 @@ mod tests {
         assert!(max_err.0 < 1.1e-5);
         assert!(max_err.1 < 1.1e-5);
     }
-
-    #[test]
-    fn atan2_absolute_error() {
-        const NUM_VALS: usize = 1_001;
-        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
-        let val_bounds: (f64, f64) = (-1., 1.);
-        let val_delta: f64 =
-            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
-        for i in 0..NUM_VALS {
-            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
-        }
-
-        for &x in test_vals.iter() {
-            for &y in test_vals.iter() {
-                let atol: f64 = 4e-5;
-                let rtol: f64 = 0.127;
-                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
-                    / i16::MAX as f64;
-                let tol = atol + rtol * angle_to_axis(actual).abs();
-                let computed = (atan2(
-                    ((y * i16::MAX as f64) as i32) << 16,
-                    ((x * i16::MAX as f64) as i32) << 16,
-                ) >> 16) as f64
-                    / i16::MAX as f64
-                    * PI;
-
-                if !isclose(computed, actual, 0., tol) {
-                    println!("(x, y)   : {}, {}", x, y);
-                    println!("actual   : {}", actual);
-                    println!("computed : {}", computed);
-                    println!("tolerance: {}\n", tol);
-                    assert!(false);
-                }
-            }
-        }
-    }
 }

From 9c5e68ceea82c8f9096473cfb74d0ef930d34843 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 11:34:39 -0800
Subject: [PATCH 31/44] atan2: test min and max angle inputs

---
 dsp/src/trig.rs | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index ccf9292..5d73846 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -179,10 +179,10 @@ mod tests {
             test_vals[i] = val_bounds.0 + i as f64 * val_delta;
         }
 
+        let atol: f64 = 4e-5;
+        let rtol: f64 = 0.127;
         for &x in test_vals.iter() {
             for &y in test_vals.iter() {
-                let atol: f64 = 4e-5;
-                let rtol: f64 = 0.127;
                 let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
                     / i16::MAX as f64;
                 let tol = atol + rtol * angle_to_axis(actual).abs();
@@ -202,6 +202,29 @@ mod tests {
                 }
             }
         }
+
+        // test min and max explicitly
+        for (x, y) in [
+            ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32),
+            ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32),
+        ]
+        .iter()
+        {
+            let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64;
+            let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64;
+            let actual =
+                (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64;
+            let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI;
+            let tol = atol + rtol * angle_to_axis(actual).abs();
+
+            if !isclose(computed, actual, 0., tol) {
+                println!("(x, y)   : {}, {}", *x, *y);
+                println!("actual   : {}", actual);
+                println!("computed : {}", computed);
+                println!("tolerance: {}\n", tol);
+                assert!(false);
+            }
+        }
     }
 
     #[test]

From 17cf71f22bc3978cb50f21c50367f108aa8f9ee9 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 11:39:32 -0800
Subject: [PATCH 32/44] atan2: replace min, max with x, y

---
 dsp/src/trig.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 5d73846..13ce844 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -48,9 +48,11 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     //
     // which is taken from Rajan 2006: Efficient Approximations for
     // the Arctangent Function.
-    let (min, max) = if y_greater { (x, y) } else { (y, x) };
+    if y_greater {
+        core::mem::swap(&mut x, &mut y);
+    }
 
-    if max == 0 {
+    if x == 0 {
         return 0;
     }
 
@@ -61,7 +63,7 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     // number of atan argument bits beyond 15 because we must square
     // it.
     const ATAN_ARGUMENT_BITS: usize = 15;
-    let ratio = (min << ATAN_ARGUMENT_BITS) / max;
+    let ratio = (y << ATAN_ARGUMENT_BITS) / x;
 
     let mut angle = {
         const K1: i32 = ((1. / 4. + 0.285 / PI)

From 3125365a1580731d104d74f3023f7c8fb5e1ff9e Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 14:01:57 -0800
Subject: [PATCH 33/44] add atan2 host benchmark

---
 dsp/Cargo.toml        |  2 +-
 dsp/benches/cossin.rs | 13 -------------
 dsp/benches/trig.rs   | 28 ++++++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 14 deletions(-)
 delete mode 100644 dsp/benches/cossin.rs
 create mode 100644 dsp/benches/trig.rs

diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml
index 8313a49..548e64f 100644
--- a/dsp/Cargo.toml
+++ b/dsp/Cargo.toml
@@ -12,7 +12,7 @@ serde = { version = "1.0", features = ["derive"], default-features = false }
 criterion = "0.3"
 
 [[bench]]
-name = "cossin"
+name = "trig"
 harness = false
 
 [features]
diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs
deleted file mode 100644
index 4e23774..0000000
--- a/dsp/benches/cossin.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-use core::f32::consts::PI;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use dsp::trig::cossin;
-
-fn cossin_bench(c: &mut Criterion) {
-    let zi = -0x7304_2531_i32;
-    let zf = zi as f32 / i32::MAX as f32 * PI;
-    c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi))));
-    c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos()));
-}
-
-criterion_group!(benches, cossin_bench);
-criterion_main!(benches);
diff --git a/dsp/benches/trig.rs b/dsp/benches/trig.rs
new file mode 100644
index 0000000..19b6cce
--- /dev/null
+++ b/dsp/benches/trig.rs
@@ -0,0 +1,28 @@
+use core::f32::consts::PI;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use dsp::trig::{atan2, cossin};
+
+fn atan2_bench(c: &mut Criterion) {
+    let xi = (10 << 16) as i32;
+    let xf = xi as f32 / i32::MAX as f32;
+
+    let yi = (-26_328 << 16) as i32;
+    let yf = yi as f32 / i32::MAX as f32;
+
+    c.bench_function("atan2(y, x)", |b| {
+        b.iter(|| atan2(black_box(yi), black_box(xi)))
+    });
+    c.bench_function("y.atan2(x)", |b| {
+        b.iter(|| black_box(yf).atan2(black_box(xf)))
+    });
+}
+
+fn cossin_bench(c: &mut Criterion) {
+    let zi = -0x7304_2531_i32;
+    let zf = zi as f32 / i32::MAX as f32 * PI;
+    c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi))));
+    c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos()));
+}
+
+criterion_group!(benches, atan2_bench, cossin_bench);
+criterion_main!(benches);

From 7e794373f45c942cd3f108b32afe743cf52cf777 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Thu, 17 Dec 2020 14:21:39 -0800
Subject: [PATCH 34/44] atan2: fix output range description

---
 dsp/src/trig.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 13ce844..9873d7e 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -18,9 +18,9 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 /// # Returns
 ///
 /// The angle between the x-axis and the ray to the point (x,y). The
-/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1
-/// corresponds to an angle of -pi and i32::MAX corresponds to an
-/// angle of +pi.
+/// result range is from i32::MIN to i32::MAX, where i32::MIN
+/// represents -pi and, equivalently, +pi. i32::MAX represents one
+/// count less than +pi.
 pub fn atan2(y: i32, x: i32) -> i32 {
     let mut y = y >> 16;
     let mut x = x >> 16;

From 12d5945d811062cfa06b83cc4d51b8962d0646a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Fri, 18 Dec 2020 15:46:21 +0100
Subject: [PATCH 35/44] dsp/testing: simplify

---
 dsp/src/testing.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs
index 098ec87..4a14f22 100644
--- a/dsp/src/testing.rs
+++ b/dsp/src/testing.rs
@@ -1,3 +1,4 @@
+#![allow(dead_code)]
 use super::Complex;
 
 pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool {
@@ -23,9 +24,7 @@ pub fn complex_allclose(
     rtol: f32,
     atol: f32,
 ) -> bool {
-    let mut result: bool = true;
-    a.iter().zip(b.iter()).for_each(|(i, j)| {
-        result &= complex_isclose(*i, *j, rtol, atol);
-    });
-    result
+    a.iter()
+        .zip(b)
+        .all(|(&i, &j)| complex_isclose(i, j, rtol, atol))
 }

From 8d9af70c19d2606a99b68a70cb03b2834f152f3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Sun, 20 Dec 2020 20:35:26 +0100
Subject: [PATCH 36/44] trig/atan2: refine

* use dynamic scaling of the inputs to get accurate ratios (effectively
  floating point) to maintain accuracy for small arguments
* this also allows shifting later and keep more bits
* use u32 ratio to keep one more bit
* merge the corner case unittests into the big test value list
* print rms, absolute and axis-relative angle
* simplify the correction expression to get rid of one multiplication
* use 5 bit for the correction constant and 15 bits for r
* least squares optimal correction constant, this lowers the max error
  below 5e-5
---
 dsp/src/trig.rs | 154 +++++++++++++++++++++---------------------------
 1 file changed, 66 insertions(+), 88 deletions(-)

diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 9873d7e..3f96609 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -1,4 +1,4 @@
-use super::{shift_round, Complex};
+use super::Complex;
 use core::f64::consts::PI;
 
 include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
@@ -22,18 +22,25 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 /// represents -pi and, equivalently, +pi. i32::MAX represents one
 /// count less than +pi.
 pub fn atan2(y: i32, x: i32) -> i32 {
-    let mut y = y >> 16;
-    let mut x = x >> 16;
+    let sign = (x < 0, y < 0);
 
-    let sign = ((y >> 14) & 2) | ((x >> 15) & 1);
-    if sign & 1 == 1 {
-        x *= -1;
-    }
-    if sign & 2 == 2 {
-        y *= -1;
-    }
+    let mut y = y.wrapping_abs() as u32;
+    let mut x = x.wrapping_abs() as u32;
 
     let y_greater = y > x;
+    if y_greater {
+        core::mem::swap(&mut y, &mut x);
+    }
+
+    let z = (16 - y.leading_zeros() as i32).max(0);
+
+    x >>= z;
+    if x == 0 {
+        return 0;
+    }
+    y >>= z;
+    let r = (y << 16) / x;
+    debug_assert!(r <= 1 << 16);
 
     // Uses the general procedure described in the following
     // Mathematics stack exchange answer:
@@ -44,47 +51,37 @@ pub fn atan2(y: i32, x: i32) -> i32 {
     // to compute and to be more compatible with integer
     // arithmetic. The approximation technique used here is
     //
-    // pi / 4 * x + 0.285 * x * (1 - abs(x))
+    // pi / 4 * r + C * r * (1 - abs(r))
     //
     // which is taken from Rajan 2006: Efficient Approximations for
     // the Arctangent Function.
-    if y_greater {
-        core::mem::swap(&mut x, &mut y);
-    }
+    //
+    // The least mean squared error solution is C = 0.279 (no the 0.285 that
+    // Rajan uses). K = C*4/pi.
+    // Q5 for K provides sufficient correction accuracy while preserving
+    // as much smoothness of the quadratic correction as possible.
+    const FP_K: usize = 5;
+    const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32;
+    // debug_assert!(K == 11);
 
-    if x == 0 {
-        return 0;
-    }
-
-    // We need to share the 31 available non-sign bits between the
-    // atan argument and constant factors used in the atan
-    // approximation. Sharing the bits roughly equally between them
-    // gives good accuracy. Additionally, we cannot increase the
-    // number of atan argument bits beyond 15 because we must square
-    // it.
-    const ATAN_ARGUMENT_BITS: usize = 15;
-    let ratio = (y << ATAN_ARGUMENT_BITS) / x;
-
-    let mut angle = {
-        const K1: i32 = ((1. / 4. + 0.285 / PI)
-            * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64)
-            as i32;
-        const K2: i32 =
-            ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32;
-
-        ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS)
-    };
+    // `r` is unsigned Q16.16 and <= 1
+    // `angle` is signed Q1.31 with 1 << 31 == +- pi
+    // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use
+    // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range.
+    let mut angle = ((r << 13)
+        + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1)))
+        as i32;
 
     if y_greater {
-        angle = (i32::MAX >> 1) - angle;
+        angle = (1 << 30) - angle;
     }
 
-    if sign & 1 == 1 {
+    if sign.0 {
         angle = i32::MAX - angle;
     }
 
-    if sign & 2 == 2 {
-        angle *= -1;
+    if sign.1 {
+        angle = angle.wrapping_neg();
     }
 
     angle
@@ -162,7 +159,6 @@ pub fn cossin(phase: i32) -> Complex<i32> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::testing::isclose;
     use core::f64::consts::PI;
 
     fn angle_to_axis(angle: f64) -> f64 {
@@ -172,61 +168,43 @@ mod tests {
 
     #[test]
     fn atan2_absolute_error() {
-        const NUM_VALS: usize = 1_001;
-        let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
-        let val_bounds: (f64, f64) = (-1., 1.);
-        let val_delta: f64 =
-            (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
-        for i in 0..NUM_VALS {
-            test_vals[i] = val_bounds.0 + i as f64 * val_delta;
+        const N: usize = 321;
+        let mut test_vals = [0i32; N + 4];
+        let scale = (1i64 << 31) as f64;
+        for i in 0..N {
+            test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32;
         }
 
-        let atol: f64 = 4e-5;
-        let rtol: f64 = 0.127;
+        assert!(test_vals.contains(&i32::MIN));
+        test_vals[N] = i32::MAX;
+        test_vals[N + 1] = 0;
+        test_vals[N + 2] = -1;
+        test_vals[N + 3] = 1;
+
+        let mut rms_err = 0f64;
+        let mut abs_err = 0f64;
+        let mut rel_err = 0f64;
+
         for &x in test_vals.iter() {
             for &y in test_vals.iter() {
-                let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
-                    / i16::MAX as f64;
-                let tol = atol + rtol * angle_to_axis(actual).abs();
-                let computed = (atan2(
-                    ((y * i16::MAX as f64) as i32) << 16,
-                    ((x * i16::MAX as f64) as i32) << 16,
-                ) >> 16) as f64
-                    / i16::MAX as f64
-                    * PI;
+                let want = (y as f64 / scale).atan2(x as f64 / scale);
+                let have = atan2(y, x) as f64 * PI / scale;
 
-                if !isclose(computed, actual, 0., tol) {
-                    println!("(x, y)   : {}, {}", x, y);
-                    println!("actual   : {}", actual);
-                    println!("computed : {}", computed);
-                    println!("tolerance: {}\n", tol);
-                    assert!(false);
+                let err = (have - want).abs();
+                abs_err = abs_err.max(err);
+                rms_err += err * err;
+                if err > 3e-5 {
+                    rel_err = rel_err.max(err / angle_to_axis(want));
                 }
             }
         }
-
-        // test min and max explicitly
-        for (x, y) in [
-            ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32),
-            ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32),
-        ]
-        .iter()
-        {
-            let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64;
-            let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64;
-            let actual =
-                (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64;
-            let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI;
-            let tol = atol + rtol * angle_to_axis(actual).abs();
-
-            if !isclose(computed, actual, 0., tol) {
-                println!("(x, y)   : {}, {}", *x, *y);
-                println!("actual   : {}", actual);
-                println!("computed : {}", computed);
-                println!("tolerance: {}\n", tol);
-                assert!(false);
-            }
-        }
+        rms_err = rms_err.sqrt() / test_vals.len() as f64;
+        println!("max abs err: {:.2e}", abs_err);
+        println!("rms abs err: {:.2e}", rms_err);
+        println!("max rel err: {:.2e}", rel_err);
+        assert!(abs_err < 5e-3);
+        assert!(rms_err < 3e-3);
+        assert!(rel_err < 0.6);
     }
 
     #[test]

From cc42c0c477c03293994692ff8f072c643b1f2f11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Tue, 22 Dec 2020 16:49:12 +0100
Subject: [PATCH 37/44] iir_int: add optimized integer iir implementation

---
 dsp/src/iir_int.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 dsp/src/lib.rs     |  1 +
 2 files changed, 59 insertions(+)
 create mode 100644 dsp/src/iir_int.rs

diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs
new file mode 100644
index 0000000..1a4a6a9
--- /dev/null
+++ b/dsp/src/iir_int.rs
@@ -0,0 +1,58 @@
+use serde::{Deserialize, Serialize};
+
+pub type IIRState = [i32; 5];
+
+fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
+    // Rounding bias, half up
+    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
+    let y = x
+        .iter()
+        .zip(a)
+        .map(|(x, a)| *x as i64 * *a as i64)
+        .fold(y0, |y, xa| y + xa);
+    (y >> shift) as i32
+}
+
+/// Integer biquad IIR
+///
+/// See `dsp::iir::IIR` for general implementation details.
+/// Offset and limiting disabled to suit lowpass applications.
+/// Coefficient scaling fixed and optimized.
+#[derive(Copy, Clone, Deserialize, Serialize)]
+pub struct IIR {
+    pub ba: IIRState,
+    // pub y_offset: i32,
+    // pub y_min: i32,
+    // pub y_max: i32,
+}
+
+impl IIR {
+    /// Coefficient fixed point: signed Q2.30.
+    /// Tailored to low-passes PI, II etc.
+    const SHIFT: u32 = 30;
+
+    /// Feed a new input value into the filter, update the filter state, and
+    /// return the new output. Only the state `xy` is modified.
+    ///
+    /// # Arguments
+    /// * `xy` - Current filter state.
+    /// * `x0` - New input.
+    pub fn update(&self, xy: &mut IIRState, x0: i32) -> i32 {
+        let n = self.ba.len();
+        debug_assert!(xy.len() == n);
+        // `xy` contains       x0 x1 y0 y1 y2
+        // Increment time      x1 x2 y1 y2 y3
+        // Shift               x1 x1 x2 y1 y2
+        // This unrolls better than xy.rotate_right(1)
+        xy.copy_within(0..n - 1, 1);
+        // Store x0            x0 x1 x2 y1 y2
+        xy[0] = x0;
+        // Compute y0 by multiply-accumulate
+        let y0 = macc(0, xy, &self.ba, IIR::SHIFT);
+        // Limit y0
+        // let y0 = y0.max(self.y_min).min(self.y_max);
+        // Store y0            x0 x1 y0 y1 y2
+        xy[n / 2] = y0;
+        y0
+    }
+}
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 67b1882..fb189fa 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -98,6 +98,7 @@ where
 }
 
 pub mod iir;
+pub mod iir_int;
 pub mod lockin;
 pub mod pll;
 pub mod trig;

From 67b6990fc027d5e575d1cb1476405f9cd7dc6e2d Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 4 Jan 2021 17:12:24 +0100
Subject: [PATCH 38/44] Addressing PR review

---
 Cargo.lock                   |  2 +-
 Cargo.toml                   |  4 ++--
 openocd.gdb                  |  3 +++
 src/design_parameters.rs     |  6 +++--
 src/digital_input_stamper.rs | 38 +++++++++++++++++++++++++++++--
 src/main.rs                  | 43 ++++++++++--------------------------
 src/timers.rs                |  4 ++--
 7 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index edc2864..f7082af 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -517,7 +517,7 @@ dependencies = [
 [[package]]
 name = "stm32h7xx-hal"
 version = "0.8.0"
-source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/number-of-transfers#e70a78788e74be5281321213b53e8cd1d213550e"
+source = "git+https://github.com/stm32-rs/stm32h7xx-hal?branch=dma#25ee0f3a9ae27d1fd6bb390d6045aa312f29f096"
 dependencies = [
  "bare-metal 1.0.0",
  "cast",
diff --git a/Cargo.toml b/Cargo.toml
index f1acbe0..7217589 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,8 +53,8 @@ default-features = false
 
 [dependencies.stm32h7xx-hal]
 features = ["stm32h743v", "rt", "unproven", "ethernet", "quadspi"]
-git = "https://github.com/quartiq/stm32h7xx-hal"
-branch = "feature/number-of-transfers"
+git = "https://github.com/stm32-rs/stm32h7xx-hal"
+branch = "dma"
 
 [features]
 semihosting = ["panic-semihosting", "cortex-m-log/semihosting"]
diff --git a/openocd.gdb b/openocd.gdb
index e903a33..a96f8d4 100644
--- a/openocd.gdb
+++ b/openocd.gdb
@@ -18,6 +18,9 @@ load
 # tbreak cortex_m_rt::reset_handler
 monitor reset halt
 
+source ../../PyCortexMDebug/cmdebug/svd_gdb.py
+svd_load ~/Downloads/STM32H743x.svd
+
 # cycle counter delta tool, place two bkpts around the section
 set var $cc=0xe0001004
 define qq
diff --git a/src/design_parameters.rs b/src/design_parameters.rs
index 414a9e2..40be7b6 100644
--- a/src/design_parameters.rs
+++ b/src/design_parameters.rs
@@ -1,9 +1,11 @@
+use super::hal::time::MegaHertz;
+
 /// The ADC setup time is the number of seconds after the CSn line goes low before the serial clock
 /// may begin. This is used for performing the internal ADC conversion.
 pub const ADC_SETUP_TIME: f32 = 220e-9;
 
 /// The maximum DAC/ADC serial clock line frequency. This is a hardware limit.
-pub const ADC_DAC_SCK_MHZ_MAX: u32 = 50;
+pub const ADC_DAC_SCK_MAX: MegaHertz = MegaHertz(50);
 
 /// The optimal counting frequency of the hardware timers used for timestamping and sampling.
-pub const TIMER_FREQUENCY_MHZ: u32 = 100;
+pub const TIMER_FREQUENCY: MegaHertz = MegaHertz(100);
diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 5843528..43ff9c5 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -24,7 +24,41 @@
 ///!
 ///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If
 ///! timestamping is desired in DI1, a separate timer + capture channel will be necessary.
-use super::{hal, timers};
+use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS};
+
+/// Calculate the period of the digital input timestampe timer.
+///
+/// # Note
+/// The period returned will be 1 less than the required period in timer ticks. The value returned
+/// can be immediately programmed into a hardware timer period register.
+///
+/// The period is calcualted to be some power-of-two multiple of the batch size, such that N batches
+/// will occur between each timestamp timer overflow.
+///
+/// # Returns
+/// A 32-bit value that can be programmed into a hardware timer period register.
+pub fn calculate_timestamp_timer_period() -> u32 {
+    // Calculate how long a single batch requires in timer ticks.
+    let batch_duration_ticks: u64 = SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
+
+    // Calculate the largest power-of-two that is less than or equal to
+    // `batches_per_overflow`.  This is completed by eliminating the least significant
+    // bits of the value until only the msb remains, which is always a power of two.
+    let batches_per_overflow: u64 =
+        (1u64 + u32::MAX as u64) / batch_duration_ticks;
+    let mut j = batches_per_overflow;
+    while (j & (j - 1)) != 0 {
+        j = j & (j - 1);
+    }
+
+    // Once the number of batches per timestamp overflow is calculated, we can figure out the final
+    // period of the timestamp timer. The period is always 1 larger than the value configured in the
+    // register.
+    let period: u64 = batch_duration_ticks * j - 1u64;
+    assert!(period < u32::MAX as u64);
+
+    period as u32
+}
 
 /// The timestamper for DI0 reference clock inputs.
 pub struct InputStamper {
@@ -45,7 +79,7 @@ impl InputStamper {
         // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the
         // capture source.
         let input_capture =
-            timer_channel.to_input_capture(timers::tim5::CC4S_A::TI4);
+            timer_channel.into_input_capture(timers::tim5::CC4S_A::TI4);
 
         Self {
             capture_channel: input_capture,
diff --git a/src/main.rs b/src/main.rs
index 5ae2b0f..52c5650 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -30,8 +30,6 @@ extern crate panic_halt;
 #[macro_use]
 extern crate log;
 
-use core::convert::TryInto;
-
 // use core::sync::atomic::{AtomicU32, AtomicBool, Ordering};
 use cortex_m_rt::exception;
 use rtic::cyccnt::{Instant, U32Ext};
@@ -294,10 +292,10 @@ const APP: () = {
             // Configure the timer to count at the designed tick rate. We will manually set the
             // period below.
             timer2.pause();
-            timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz());
+            timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY);
 
             let mut sampling_timer = timers::SamplingTimer::new(timer2);
-            sampling_timer.set_period(ADC_SAMPLE_TICKS - 1);
+            sampling_timer.set_period_ticks(ADC_SAMPLE_TICKS - 1);
 
             sampling_timer
         };
@@ -313,32 +311,15 @@ const APP: () = {
             // Configure the timer to count at the designed tick rate. We will manually set the
             // period below.
             timer5.pause();
-            timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY_MHZ.mhz());
+            timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY);
 
             // The time stamp timer must run at exactly a multiple of the sample timer based on the
-            // batch size. To accomodate this, we manually set the period identical to the sample
-            // timer, but use a prescaler that is `BATCH_SIZE` longer.
+            // batch size. To accomodate this, we manually set the prescaler identical to the sample
+            // timer, but use a period that is longer.
             let mut timer = timers::TimestampTimer::new(timer5);
 
-            let period: u32 = {
-                let batch_duration: u64 =
-                    SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
-                let batches_per_overflow: u64 =
-                    (1u64 + u32::MAX as u64) / batch_duration;
-
-                // Calculate the largest power-of-two that is less than `batches_per_overflow`.
-                // This is completed by eliminating the least significant bits of the value until
-                // only the msb remains, which is always a power of two.
-                let mut j = batches_per_overflow;
-                while (j & (j - 1)) != 0 {
-                    j = j & (j - 1);
-                }
-
-                let period: u64 = batch_duration * j - 1u64;
-                period.try_into().unwrap()
-            };
-
-            timer.set_period(period);
+            let period = digital_input_stamper::calculate_timestamp_timer_period();
+            timer.set_period_ticks(period);
 
             timer
         };
@@ -372,7 +353,7 @@ const APP: () = {
                 let spi: hal::spi::Spi<_, _, u16> = dp.SPI2.spi(
                     (spi_sck, spi_miso, hal::spi::NoMosi),
                     config,
-                    design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(),
+                    design_parameters::ADC_DAC_SCK_MAX,
                     ccdr.peripheral.SPI2,
                     &ccdr.clocks,
                 );
@@ -410,7 +391,7 @@ const APP: () = {
                 let spi: hal::spi::Spi<_, _, u16> = dp.SPI3.spi(
                     (spi_sck, spi_miso, hal::spi::NoMosi),
                     config,
-                    design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(),
+                    design_parameters::ADC_DAC_SCK_MAX,
                     ccdr.peripheral.SPI3,
                     &ccdr.clocks,
                 );
@@ -460,7 +441,7 @@ const APP: () = {
                 dp.SPI4.spi(
                     (spi_sck, spi_miso, hal::spi::NoMosi),
                     config,
-                    design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(),
+                    design_parameters::ADC_DAC_SCK_MAX,
                     ccdr.peripheral.SPI4,
                     &ccdr.clocks,
                 )
@@ -492,7 +473,7 @@ const APP: () = {
                 dp.SPI5.spi(
                     (spi_sck, spi_miso, hal::spi::NoMosi),
                     config,
-                    design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(),
+                    design_parameters::ADC_DAC_SCK_MAX,
                     ccdr.peripheral.SPI5,
                     &ccdr.clocks,
                 )
@@ -702,7 +683,7 @@ const APP: () = {
 
                     // Ensure that we have enough time for an IO-update every sample.
                     let sample_frequency =
-                        (design_parameters::TIMER_FREQUENCY_MHZ as f32
+                        (design_parameters::TIMER_FREQUENCY.0 as f32
                             * 1_000_000.0)
                             / ADC_SAMPLE_TICKS as f32;
 
diff --git a/src/timers.rs b/src/timers.rs
index 03bc0aa..8afa5cd 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -41,7 +41,7 @@ macro_rules! timer_channels {
 
                 /// Manually set the period of the timer.
                 #[allow(dead_code)]
-                pub fn set_period(&mut self, period: u32) {
+                pub fn set_period_ticks(&mut self, period: u32) {
                     let regs = unsafe { &*hal::stm32::$TY::ptr() };
                     regs.arr.write(|w| w.arr().bits(period));
                 }
@@ -136,7 +136,7 @@ macro_rules! timer_channels {
                 /// # Args
                 /// * `input` - The input source for the input capture event.
                 #[allow(dead_code)]
-                pub fn to_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{
+                pub fn into_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{
                     let regs = unsafe { &*<$TY>::ptr() };
                     regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input));
 

From 7ecd08d86bfca28725f8a22ff02f960032b7103e Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 4 Jan 2021 18:04:01 +0100
Subject: [PATCH 39/44] More updates after PR review

---
 src/design_parameters.rs     | 15 +++++++++++++++
 src/digital_input_stamper.rs | 13 ++++++-------
 src/main.rs                  | 35 ++++++++++++++++-------------------
 src/timers.rs                | 12 ++++++++++--
 4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/src/design_parameters.rs b/src/design_parameters.rs
index 40be7b6..125e133 100644
--- a/src/design_parameters.rs
+++ b/src/design_parameters.rs
@@ -9,3 +9,18 @@ pub const ADC_DAC_SCK_MAX: MegaHertz = MegaHertz(50);
 
 /// The optimal counting frequency of the hardware timers used for timestamping and sampling.
 pub const TIMER_FREQUENCY: MegaHertz = MegaHertz(100);
+
+/// The QSPI frequency for communicating with the pounder DDS.
+pub const POUNDER_QSPI_FREQUENCY: MegaHertz = MegaHertz(40);
+
+/// The delay after initiating a QSPI transfer before asserting the IO_Update for the pounder DDS.
+// Pounder Profile writes are always 16 bytes, with 2 cycles required per byte, coming out to a
+// total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so this comes out to an offset
+// of 800nS. We use 900ns to be safe.
+pub const POUNDER_IO_UPDATE_DELAY: f32 = 900_e-9;
+
+/// The duration to assert IO_Update for the pounder DDS.
+// IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile write. With pounder
+// SYNC_CLK running at 100MHz (1/4 of the pounder reference clock of 400MHz), this corresponds to
+// 40ns. To accomodate rounding errors, we use 50ns instead.
+pub const POUNDER_IO_UPDATE_DURATION: f32 = 50_e-9;
diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 43ff9c5..85f248f 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -24,7 +24,7 @@
 ///!
 ///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If
 ///! timestamping is desired in DI1, a separate timer + capture channel will be necessary.
-use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS};
+use super::{hal, timers, ADC_SAMPLE_TICKS, SAMPLE_BUFFER_SIZE};
 
 /// Calculate the period of the digital input timestampe timer.
 ///
@@ -39,7 +39,8 @@ use super::{hal, timers, SAMPLE_BUFFER_SIZE, ADC_SAMPLE_TICKS};
 /// A 32-bit value that can be programmed into a hardware timer period register.
 pub fn calculate_timestamp_timer_period() -> u32 {
     // Calculate how long a single batch requires in timer ticks.
-    let batch_duration_ticks: u64 = SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
+    let batch_duration_ticks: u64 =
+        SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64;
 
     // Calculate the largest power-of-two that is less than or equal to
     // `batches_per_overflow`.  This is completed by eliminating the least significant
@@ -101,10 +102,8 @@ impl InputStamper {
     /// To prevent timestamp loss, the batch size and sampling rate must be adjusted such that at
     /// most one timestamp will occur in each data processing cycle.
     pub fn latest_timestamp(&mut self) -> Option<u32> {
-        if self.capture_channel.check_overcapture() {
-            panic!("DI0 timestamp overrun");
-        }
-
-        self.capture_channel.latest_capture()
+        self.capture_channel
+            .latest_capture()
+            .expect("DI0 timestamp overrun")
     }
 }
diff --git a/src/main.rs b/src/main.rs
index 52c5650..ede498f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -318,7 +318,8 @@ const APP: () = {
             // timer, but use a period that is longer.
             let mut timer = timers::TimestampTimer::new(timer5);
 
-            let period = digital_input_stamper::calculate_timestamp_timer_period();
+            let period =
+                digital_input_stamper::calculate_timestamp_timer_period();
             timer.set_period_ticks(period);
 
             timer
@@ -543,7 +544,7 @@ const APP: () = {
                     let qspi = hal::qspi::Qspi::bank2(
                         dp.QUADSPI,
                         qspi_pins,
-                        40.mhz(),
+                        design_parameters::POUNDER_QSPI_FREQUENCY,
                         &ccdr.clocks,
                         ccdr.peripheral.QSPI,
                     );
@@ -665,30 +666,26 @@ const APP: () = {
                         ccdr.peripheral.HRTIM,
                     );
 
-                    // IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile
-                    // write. With pounder SYNC_CLK running at 100MHz (1/4 of the pounder reference
-                    // clock of 400MHz), this corresponds to 40ns. To accomodate rounding errors, we
-                    // use 50ns instead.
-                    //
-                    // Profile writes are always 16 bytes, with 2 cycles required per byte, coming
-                    // out to a total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so
-                    // this comes out to an offset of 800nS. We use 900ns to be safe - note that the
-                    // timer is triggered after the QSPI write, which can take approximately 120nS,
-                    // so there is additional margin.
+                    // IO_Update occurs after a fixed delay from the QSPI write. Note that the timer
+                    // is triggered after the QSPI write, which can take approximately 120nS, so
+                    // there is additional margin.
                     hrtimer.configure_single_shot(
                         hrtimer::Channel::Two,
-                        50_e-9,
-                        900_e-9,
+                        design_parameters::POUNDER_IO_UPDATE_DURATION,
+                        design_parameters::POUNDER_IO_UPDATE_DELAY,
                     );
 
                     // Ensure that we have enough time for an IO-update every sample.
-                    let sample_frequency =
-                        (design_parameters::TIMER_FREQUENCY.0 as f32
-                            * 1_000_000.0)
-                            / ADC_SAMPLE_TICKS as f32;
+                    let sample_frequency = (design_parameters::TIMER_FREQUENCY.0
+                        as f32
+                        * 1_000_000.0)
+                        / ADC_SAMPLE_TICKS as f32;
 
                     let sample_period = 1.0 / sample_frequency;
-                    assert!(sample_period > 900_e-9);
+                    assert!(
+                        sample_period
+                            > design_parameters::POUNDER_IO_UPDATE_DELAY
+                    );
 
                     hrtimer
                 };
diff --git a/src/timers.rs b/src/timers.rs
index 8afa5cd..5ffbeaf 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -147,17 +147,25 @@ macro_rules! timer_channels {
             impl [< Channel $index InputCapture >] {
                 /// Get the latest capture from the channel.
                 #[allow(dead_code)]
-                pub fn latest_capture(&mut self) -> Option<u32> {
+                pub fn latest_capture(&mut self) -> Result<Option<u32>, ()> {
                     // Note(unsafe): This channel owns all access to the specific timer channel.
                     // Only atomic operations on completed on the timer registers.
                     let regs = unsafe { &*<$TY>::ptr() };
                     let sr = regs.sr.read();
                     let ccx = regs.[< ccr $index >].read();
-                    if sr.[< cc $index if >]().bit_is_set() {
+
+                    let result = if sr.[< cc $index if >]().bit_is_set() {
                         regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit());
                         Some(ccx.ccr().bits())
                     } else {
                         None
+                    };
+
+                    // If there is an overcapture, return an error.
+                    if sr.[< cc $index of >]().bit_is_clear() {
+                        Ok(result)
+                    } else {
+                        Err(())
                     }
                 }
 

From 2b6e6f59a4e79195b1337cc51591cb64a4216223 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Mon, 4 Jan 2021 18:09:16 +0100
Subject: [PATCH 40/44] Adding comment about sample rate

---
 src/main.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main.rs b/src/main.rs
index ede498f..5979d90 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -56,6 +56,7 @@ use heapless::{consts::*, String};
 
 // The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is
 // equal to 10ns per tick.
+// Currently, the sample rate is equal to: Fsample = 100/256 MHz = 390.625 KHz
 const ADC_SAMPLE_TICKS: u32 = 256;
 
 // The desired ADC sample processing buffer size.

From 13543ce048c9b0c8f63bfc3a15a9f7e48bc9c24a Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Mon, 4 Jan 2021 11:14:27 -0800
Subject: [PATCH 41/44] pll update input is named "x" not "input"

---
 dsp/src/pll.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsp/src/pll.rs b/dsp/src/pll.rs
index 74377f3..8df750f 100644
--- a/dsp/src/pll.rs
+++ b/dsp/src/pll.rs
@@ -45,7 +45,7 @@ impl PLL {
     /// The signal's phase/frequency is reconstructed relative to the sampling period.
     ///
     /// Args:
-    /// * `input`: New input phase sample.
+    /// * `x`: New input phase sample.
     /// * `shift_frequency`: Frequency error scaling. The frequency gain per update is
     ///   `1/(1 << shift_frequency)`.
     /// * `shift_phase`: Phase error scaling. The phase gain is `1/(1 << shift_phase)`

From a3cd17fd70031549fcf891208c4bd29914b72421 Mon Sep 17 00:00:00 2001
From: Matt Huszagh <huszaghmatt@gmail.com>
Date: Mon, 4 Jan 2021 16:37:46 -0800
Subject: [PATCH 42/44] pin clippy to stable

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d0ea705..e8e0e4e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,6 +29,7 @@ jobs:
       - uses: actions-rs/clippy-check@v1
         continue-on-error: true
         with:
+          toolchain: stable
           token: ${{ secrets.GITHUB_TOKEN }}
 
   compile:

From 9e7bfd4371d50323287044142d7fd2a4ec4d96c4 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Wed, 6 Jan 2021 12:24:09 +0100
Subject: [PATCH 43/44] Adding updates after review

---
 src/digital_input_stamper.rs |  2 +-
 src/main.rs                  |  9 +++++----
 src/timers.rs                | 11 +++++++----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs
index 85f248f..910ae98 100644
--- a/src/digital_input_stamper.rs
+++ b/src/digital_input_stamper.rs
@@ -56,7 +56,7 @@ pub fn calculate_timestamp_timer_period() -> u32 {
     // period of the timestamp timer. The period is always 1 larger than the value configured in the
     // register.
     let period: u64 = batch_duration_ticks * j - 1u64;
-    assert!(period < u32::MAX as u64);
+    assert!(period <= u32::MAX as u64);
 
     period as u32
 }
diff --git a/src/main.rs b/src/main.rs
index 5979d90..a0430d5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -677,10 +677,11 @@ const APP: () = {
                     );
 
                     // Ensure that we have enough time for an IO-update every sample.
-                    let sample_frequency = (design_parameters::TIMER_FREQUENCY.0
-                        as f32
-                        * 1_000_000.0)
-                        / ADC_SAMPLE_TICKS as f32;
+                    let sample_frequency = {
+                        let timer_frequency: hal::time::Hertz =
+                            design_parameters::TIMER_FREQUENCY.into();
+                        timer_frequency.0 as f32 / ADC_SAMPLE_TICKS as f32
+                    };
 
                     let sample_period = 1.0 / sample_frequency;
                     assert!(
diff --git a/src/timers.rs b/src/timers.rs
index 5ffbeaf..8d7d010 100644
--- a/src/timers.rs
+++ b/src/timers.rs
@@ -152,19 +152,22 @@ macro_rules! timer_channels {
                     // Only atomic operations on completed on the timer registers.
                     let regs = unsafe { &*<$TY>::ptr() };
                     let sr = regs.sr.read();
-                    let ccx = regs.[< ccr $index >].read();
 
                     let result = if sr.[< cc $index if >]().bit_is_set() {
-                        regs.sr.modify(|_, w| w.[< cc $index if >]().clear_bit());
+                        // Read the capture value. Reading the captured value clears the flag in the
+                        // status register automatically.
+                        let ccx = regs.[< ccr $index >].read();
                         Some(ccx.ccr().bits())
                     } else {
                         None
                     };
 
-                    // If there is an overcapture, return an error.
-                    if sr.[< cc $index of >]().bit_is_clear() {
+                    // Read SR again to check for a potential over-capture. If there is an
+                    // overcapture, return an error.
+                    if regs.sr.read().[< cc $index of >]().bit_is_clear() {
                         Ok(result)
                     } else {
+                        regs.sr.modify(|_, w| w.[< cc $index of >]().clear_bit());
                         Err(())
                     }
                 }

From 96485c4229009908f3540b6584b4c441c734c1e6 Mon Sep 17 00:00:00 2001
From: Ryan Summers <ryan.summers@vertigo-designs.com>
Date: Wed, 6 Jan 2021 13:36:13 +0100
Subject: [PATCH 44/44] Reverting unintended diff

---
 openocd.gdb | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/openocd.gdb b/openocd.gdb
index a96f8d4..e903a33 100644
--- a/openocd.gdb
+++ b/openocd.gdb
@@ -18,9 +18,6 @@ load
 # tbreak cortex_m_rt::reset_handler
 monitor reset halt
 
-source ../../PyCortexMDebug/cmdebug/svd_gdb.py
-svd_load ~/Downloads/STM32H743x.svd
-
 # cycle counter delta tool, place two bkpts around the section
 set var $cc=0xe0001004
 define qq