diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0ea705..e8e0e4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,7 @@ jobs: - uses: actions-rs/clippy-check@v1 continue-on-error: true with: + toolchain: stable token: ${{ secrets.GITHUB_TOKEN }} compile: diff --git a/Cargo.lock b/Cargo.lock index 866c958..030ff99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -874,7 +874,7 @@ dependencies = [ [[package]] name = "stm32h7xx-hal" version = "0.8.0" -source = "git+https://github.com/quartiq/stm32h7xx-hal?branch=feature/dma-buffer-swap-logic#5f97920b639f8cb29c9f30c89a33960d5b2082f8" +source = "git+https://github.com/stm32-rs/stm32h7xx-hal?branch=dma#25ee0f3a9ae27d1fd6bb390d6045aa312f29f096" dependencies = [ "bare-metal 1.0.0", "cast", diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml index 8313a49..548e64f 100644 --- a/dsp/Cargo.toml +++ b/dsp/Cargo.toml @@ -12,7 +12,7 @@ serde = { version = "1.0", features = ["derive"], default-features = false } criterion = "0.3" [[bench]] -name = "cossin" +name = "trig" harness = false [features] diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs deleted file mode 100644 index 4e23774..0000000 --- a/dsp/benches/cossin.rs +++ /dev/null @@ -1,13 +0,0 @@ -use core::f32::consts::PI; -use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::trig::cossin; - -fn cossin_bench(c: &mut Criterion) { - let zi = -0x7304_2531_i32; - let zf = zi as f32 / i32::MAX as f32 * PI; - c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); - c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); -} - -criterion_group!(benches, cossin_bench); -criterion_main!(benches); diff --git a/dsp/benches/trig.rs b/dsp/benches/trig.rs new file mode 100644 index 0000000..19b6cce --- /dev/null +++ b/dsp/benches/trig.rs @@ -0,0 +1,28 @@ +use core::f32::consts::PI; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use dsp::trig::{atan2, cossin}; + +fn atan2_bench(c: &mut Criterion) { + let xi = (10 << 16) as i32; + let xf = xi as f32 / i32::MAX as f32; + + let yi = (-26_328 << 16) as i32; + let yf = yi as f32 / i32::MAX as f32; + + c.bench_function("atan2(y, x)", |b| { + b.iter(|| atan2(black_box(yi), black_box(xi))) + }); + c.bench_function("y.atan2(x)", |b| { + b.iter(|| black_box(yf).atan2(black_box(xf))) + }); +} + +fn cossin_bench(c: &mut Criterion) { + let zi = -0x7304_2531_i32; + let zf = zi as f32 / i32::MAX as f32 * PI; + c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); + c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); +} + +criterion_group!(benches, atan2_bench, cossin_bench); +criterion_main!(benches); diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index c6f2100..dbec8d8 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,85 +1,8 @@ -use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; +use super::{abs, copysign, macc, max, min}; use core::f32; -// These are implemented here because core::f32 doesn't have them (yet). -// They are naive and don't handle inf/nan. -// `compiler-intrinsics`/llvm should have better (robust, universal, and -// faster) implementations. - -fn abs(x: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if x >= T::default() { - x - } else { - -x - } -} - -fn copysign(x: T, y: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if (x >= T::default() && y >= T::default()) - || (x <= T::default() && y <= T::default()) - { - x - } else { - -x - } -} - -#[cfg(not(feature = "nightly"))] -fn max(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x > y { - x - } else { - y - } -} - -#[cfg(not(feature = "nightly"))] -fn min(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x < y { - x - } else { - y - } -} - -#[cfg(feature = "nightly")] -fn max(x: f32, y: f32) -> f32 { - core::intrinsics::maxnumf32(x, y) -} - -#[cfg(feature = "nightly")] -fn min(x: f32, y: f32) -> f32 { - core::intrinsics::minnumf32(x, y) -} - -// Multiply-accumulate vectors `x` and `a`. -// -// A.k.a. dot product. -// Rust/LLVM optimize this nicely. -fn macc(y0: T, x: &[T], a: &[T]) -> T -where - T: Add + Mul + Copy, -{ - x.iter() - .zip(a) - .map(|(x, a)| *x * *a) - .fold(y0, |y, xa| y + xa) -} - /// IIR state and coefficients type. /// /// To represent the IIR state (input and output memory) during the filter update diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs new file mode 100644 index 0000000..1a4a6a9 --- /dev/null +++ b/dsp/src/iir_int.rs @@ -0,0 +1,58 @@ +use serde::{Deserialize, Serialize}; + +pub type IIRState = [i32; 5]; + +fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { + // Rounding bias, half up + let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); + let y = x + .iter() + .zip(a) + .map(|(x, a)| *x as i64 * *a as i64) + .fold(y0, |y, xa| y + xa); + (y >> shift) as i32 +} + +/// Integer biquad IIR +/// +/// See `dsp::iir::IIR` for general implementation details. +/// Offset and limiting disabled to suit lowpass applications. +/// Coefficient scaling fixed and optimized. +#[derive(Copy, Clone, Deserialize, Serialize)] +pub struct IIR { + pub ba: IIRState, + // pub y_offset: i32, + // pub y_min: i32, + // pub y_max: i32, +} + +impl IIR { + /// Coefficient fixed point: signed Q2.30. + /// Tailored to low-passes PI, II etc. + const SHIFT: u32 = 30; + + /// Feed a new input value into the filter, update the filter state, and + /// return the new output. Only the state `xy` is modified. + /// + /// # Arguments + /// * `xy` - Current filter state. + /// * `x0` - New input. + pub fn update(&self, xy: &mut IIRState, x0: i32) -> i32 { + let n = self.ba.len(); + debug_assert!(xy.len() == n); + // `xy` contains x0 x1 y0 y1 y2 + // Increment time x1 x2 y1 y2 y3 + // Shift x1 x1 x2 y1 y2 + // This unrolls better than xy.rotate_right(1) + xy.copy_within(0..n - 1, 1); + // Store x0 x0 x1 x2 y1 y2 + xy[0] = x0; + // Compute y0 by multiply-accumulate + let y0 = macc(0, xy, &self.ba, IIR::SHIFT); + // Limit y0 + // let y0 = y0.max(self.y_min).min(self.y_max); + // Store y0 x0 x1 y0 y1 y2 + xy[n / 2] = y0; + y0 + } +} diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 6dd20f7..fb189fa 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,6 +1,8 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] +use core::ops::{Add, Mul, Neg}; + pub type Complex = (T, T); /// Round up half. @@ -18,7 +20,85 @@ pub fn shift_round(x: i32, shift: usize) -> i32 { (x + (1 << (shift - 1))) >> shift } +fn abs(x: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if x >= T::default() { + x + } else { + -x + } +} + +// These are implemented here because core::f32 doesn't have them (yet). +// They are naive and don't handle inf/nan. +// `compiler-intrinsics`/llvm should have better (robust, universal, and +// faster) implementations. + +fn copysign(x: T, y: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if (x >= T::default() && y >= T::default()) + || (x <= T::default() && y <= T::default()) + { + x + } else { + -x + } +} + +#[cfg(not(feature = "nightly"))] +fn max(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x > y { + x + } else { + y + } +} + +#[cfg(not(feature = "nightly"))] +fn min(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x < y { + x + } else { + y + } +} + +#[cfg(feature = "nightly")] +fn max(x: f32, y: f32) -> f32 { + core::intrinsics::maxnumf32(x, y) +} + +#[cfg(feature = "nightly")] +fn min(x: f32, y: f32) -> f32 { + core::intrinsics::minnumf32(x, y) +} + +// Multiply-accumulate vectors `x` and `a`. +// +// A.k.a. dot product. +// Rust/LLVM optimize this nicely. +fn macc(y0: T, x: &[T], a: &[T]) -> T +where + T: Add + Mul + Copy, +{ + x.iter() + .zip(a) + .map(|(x, a)| *x * *a) + .fold(y0, |y, xa| y + xa) +} + pub mod iir; +pub mod iir_int; pub mod lockin; pub mod pll; pub mod trig; diff --git a/dsp/src/pll.rs b/dsp/src/pll.rs index 74377f3..8df750f 100644 --- a/dsp/src/pll.rs +++ b/dsp/src/pll.rs @@ -45,7 +45,7 @@ impl PLL { /// The signal's phase/frequency is reconstructed relative to the sampling period. /// /// Args: - /// * `input`: New input phase sample. + /// * `x`: New input phase sample. /// * `shift_frequency`: Frequency error scaling. The frequency gain per update is /// `1/(1 << shift_frequency)`. /// * `shift_phase`: Phase error scaling. The phase gain is `1/(1 << shift_phase)` diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs index 1a8e109..4a14f22 100644 --- a/dsp/src/testing.rs +++ b/dsp/src/testing.rs @@ -1,6 +1,11 @@ +#![allow(dead_code)] use super::Complex; -pub fn isclose(a: f32, b: f32, rtol: f32, atol: f32) -> bool { +pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool { + (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol +} + +pub fn isclosef(a: f32, b: f32, rtol: f32, atol: f32) -> bool { (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol } @@ -10,7 +15,7 @@ pub fn complex_isclose( rtol: f32, atol: f32, ) -> bool { - isclose(a.0, b.0, rtol, atol) && isclose(a.1, b.1, rtol, atol) + isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol) } pub fn complex_allclose( @@ -19,9 +24,7 @@ pub fn complex_allclose( rtol: f32, atol: f32, ) -> bool { - let mut result: bool = true; - a.iter().zip(b.iter()).for_each(|(i, j)| { - result &= complex_isclose(*i, *j, rtol, atol); - }); - result + a.iter() + .zip(b) + .all(|(&i, &j)| complex_isclose(i, j, rtol, atol)) } diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 5a99232..3f96609 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -3,6 +3,90 @@ use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); +/// 2-argument arctangent function. +/// +/// This implementation uses all integer arithmetic for fast +/// computation. It is designed to have high accuracy near the axes +/// and lower away from the axes. It is additionally designed so that +/// the error changes slowly with respect to the angle. +/// +/// # Arguments +/// +/// * `y` - Y-axis component. +/// * `x` - X-axis component. +/// +/// # Returns +/// +/// The angle between the x-axis and the ray to the point (x,y). The +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// represents -pi and, equivalently, +pi. i32::MAX represents one +/// count less than +pi. +pub fn atan2(y: i32, x: i32) -> i32 { + let sign = (x < 0, y < 0); + + let mut y = y.wrapping_abs() as u32; + let mut x = x.wrapping_abs() as u32; + + let y_greater = y > x; + if y_greater { + core::mem::swap(&mut y, &mut x); + } + + let z = (16 - y.leading_zeros() as i32).max(0); + + x >>= z; + if x == 0 { + return 0; + } + y >>= z; + let r = (y << 16) / x; + debug_assert!(r <= 1 << 16); + + // Uses the general procedure described in the following + // Mathematics stack exchange answer: + // + // https://math.stackexchange.com/a/1105038/583981 + // + // The atan approximation method has been modified to be cheaper + // to compute and to be more compatible with integer + // arithmetic. The approximation technique used here is + // + // pi / 4 * r + C * r * (1 - abs(r)) + // + // which is taken from Rajan 2006: Efficient Approximations for + // the Arctangent Function. + // + // The least mean squared error solution is C = 0.279 (no the 0.285 that + // Rajan uses). K = C*4/pi. + // Q5 for K provides sufficient correction accuracy while preserving + // as much smoothness of the quadratic correction as possible. + const FP_K: usize = 5; + const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32; + // debug_assert!(K == 11); + + // `r` is unsigned Q16.16 and <= 1 + // `angle` is signed Q1.31 with 1 << 31 == +- pi + // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use + // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range. + let mut angle = ((r << 13) + + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1))) + as i32; + + if y_greater { + angle = (1 << 30) - angle; + } + + if sign.0 { + angle = i32::MAX - angle; + } + + if sign.1 { + angle = angle.wrapping_neg(); + } + + angle +} + /// Compute the cosine and sine of an angle. /// This is ported from the MiSoC cossin core. /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py) @@ -75,8 +159,56 @@ pub fn cossin(phase: i32) -> Complex { #[cfg(test)] mod tests { use super::*; + use core::f64::consts::PI; + + fn angle_to_axis(angle: f64) -> f64 { + let angle = angle % (PI / 2.); + (PI / 2. - angle).min(angle) + } + #[test] - fn error_max_rms_all_phase() { + fn atan2_absolute_error() { + const N: usize = 321; + let mut test_vals = [0i32; N + 4]; + let scale = (1i64 << 31) as f64; + for i in 0..N { + test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32; + } + + assert!(test_vals.contains(&i32::MIN)); + test_vals[N] = i32::MAX; + test_vals[N + 1] = 0; + test_vals[N + 2] = -1; + test_vals[N + 3] = 1; + + let mut rms_err = 0f64; + let mut abs_err = 0f64; + let mut rel_err = 0f64; + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let want = (y as f64 / scale).atan2(x as f64 / scale); + let have = atan2(y, x) as f64 * PI / scale; + + let err = (have - want).abs(); + abs_err = abs_err.max(err); + rms_err += err * err; + if err > 3e-5 { + rel_err = rel_err.max(err / angle_to_axis(want)); + } + } + } + rms_err = rms_err.sqrt() / test_vals.len() as f64; + println!("max abs err: {:.2e}", abs_err); + println!("rms abs err: {:.2e}", rms_err); + println!("max rel err: {:.2e}", rel_err); + assert!(abs_err < 5e-3); + assert!(rms_err < 3e-3); + assert!(rel_err < 0.6); + } + + #[test] + fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64; const MAX_PHASE: f64 = (1i64 << 32) as f64; diff --git a/src/adc.rs b/src/adc.rs index a652d3d..cb50a01 100644 --- a/src/adc.rs +++ b/src/adc.rs @@ -71,8 +71,8 @@ ///! of this, double-buffered mode does not offer any advantages over single-buffered mode (unless ///! double-buffered mode offers less overhead when accessing data). use super::{ - hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, - PeripheralToMemory, Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, + hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, PeripheralToMemory, + Priority, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, }; // The following data is written by the timer ADC sample trigger into each of the SPI TXFIFOs. Note @@ -95,12 +95,10 @@ macro_rules! adc_input { /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO /// whenever the tim2 update dma request occurs. struct $spi { - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, } impl $spi { - pub fn new( - _channel: sampling_timer::tim2::$trigger_channel, - ) -> Self { + pub fn new(_channel: timers::tim2::$trigger_channel) -> Self { Self { _channel } } } @@ -157,7 +155,7 @@ macro_rules! adc_input { hal::stm32::DMA1, >, data_stream: hal::dma::dma::$data_stream, - trigger_channel: sampling_timer::tim2::$trigger_channel, + trigger_channel: timers::tim2::$trigger_channel, ) -> Self { // Generate DMA events when an output compare of the timer hitting zero (timer roll over) // occurs. @@ -252,7 +250,7 @@ macro_rules! adc_input { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); self.next_buffer.replace(prev_buffer); // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 diff --git a/src/dac.rs b/src/dac.rs index 53076b0..cc6ed3b 100644 --- a/src/dac.rs +++ b/src/dac.rs @@ -50,7 +50,7 @@ ///! for re-use of a previously provided DAC output buffer. It is assumed that the DMA request is ///! served promptly after the transfer completes. use super::{ - hal, sampling_timer, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress, + hal, timers, DMAReq, DmaConfig, MemoryToPeripheral, TargetAddress, Transfer, SAMPLE_BUFFER_SIZE, }; @@ -68,12 +68,12 @@ macro_rules! dac_output { /// $spi is used as a type for indicating a DMA transfer into the SPI TX FIFO struct $spi { spi: hal::spi::Spi, - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, } impl $spi { pub fn new( - _channel: sampling_timer::tim2::$trigger_channel, + _channel: timers::tim2::$trigger_channel, spi: hal::spi::Spi, ) -> Self { Self { _channel, spi } @@ -128,7 +128,7 @@ macro_rules! dac_output { pub fn new( spi: hal::spi::Spi, stream: hal::dma::dma::$data_stream, - trigger_channel: sampling_timer::tim2::$trigger_channel, + trigger_channel: timers::tim2::$trigger_channel, ) -> Self { // Generate DMA events when an output compare of the timer hitting zero (timer roll over) // occurs. @@ -187,7 +187,7 @@ macro_rules! dac_output { // Start the next transfer. self.transfer.clear_interrupts(); - let (prev_buffer, _) = + let (prev_buffer, _, _) = self.transfer.next_transfer(next_buffer).unwrap(); // .unwrap_none() https://github.com/rust-lang/rust/issues/62633 diff --git a/src/design_parameters.rs b/src/design_parameters.rs index 9835568..125e133 100644 --- a/src/design_parameters.rs +++ b/src/design_parameters.rs @@ -1,6 +1,26 @@ +use super::hal::time::MegaHertz; + /// The ADC setup time is the number of seconds after the CSn line goes low before the serial clock /// may begin. This is used for performing the internal ADC conversion. pub const ADC_SETUP_TIME: f32 = 220e-9; /// The maximum DAC/ADC serial clock line frequency. This is a hardware limit. -pub const ADC_DAC_SCK_MHZ_MAX: u32 = 50; +pub const ADC_DAC_SCK_MAX: MegaHertz = MegaHertz(50); + +/// The optimal counting frequency of the hardware timers used for timestamping and sampling. +pub const TIMER_FREQUENCY: MegaHertz = MegaHertz(100); + +/// The QSPI frequency for communicating with the pounder DDS. +pub const POUNDER_QSPI_FREQUENCY: MegaHertz = MegaHertz(40); + +/// The delay after initiating a QSPI transfer before asserting the IO_Update for the pounder DDS. +// Pounder Profile writes are always 16 bytes, with 2 cycles required per byte, coming out to a +// total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so this comes out to an offset +// of 800nS. We use 900ns to be safe. +pub const POUNDER_IO_UPDATE_DELAY: f32 = 900_e-9; + +/// The duration to assert IO_Update for the pounder DDS. +// IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile write. With pounder +// SYNC_CLK running at 100MHz (1/4 of the pounder reference clock of 400MHz), this corresponds to +// 40ns. To accomodate rounding errors, we use 50ns instead. +pub const POUNDER_IO_UPDATE_DURATION: f32 = 50_e-9; diff --git a/src/digital_input_stamper.rs b/src/digital_input_stamper.rs new file mode 100644 index 0000000..910ae98 --- /dev/null +++ b/src/digital_input_stamper.rs @@ -0,0 +1,109 @@ +///! Digital Input 0 (DI0) reference clock timestamper +///! +///! This module provides a means of timestamping the rising edges of an external reference clock on +///! the DI0 with a timer value from TIM5. +///! +///! # Design +///! An input capture channel is configured on DI0 and fed into TIM5's capture channel 4. TIM5 is +///! then run in a free-running mode with a configured tick rate (PSC) and maximum count value +///! (ARR). Whenever an edge on DI0 triggers, the current TIM5 counter value is captured and +///! recorded as a timestamp. This timestamp can be either directly read from the timer channel or +///! can be collected asynchronously via DMA collection. +///! +///! To prevent silently discarding timestamps, the TIM5 input capture over-capture flag is +///! continually checked. Any over-capture event (which indicates an overwritten timestamp) then +///! triggers a panic to indicate the dropped timestamp so that design parameters can be adjusted. +///! +///! # Tradeoffs +///! It appears that DMA transfers can take a significant amount of time to disable (400ns) if they +///! are being prematurely stopped (such is the case here). As such, for a sample batch size of 1, +///! this can take up a significant amount of the total available processing time for the samples. +///! This module checks for any captured timestamps from the timer capture channel manually. In +///! this mode, the maximum input clock frequency supported is dependant on the sampling rate and +///! batch size. +///! +///! This module only supports DI0 for timestamping due to trigger constraints on the DIx pins. If +///! timestamping is desired in DI1, a separate timer + capture channel will be necessary. +use super::{hal, timers, ADC_SAMPLE_TICKS, SAMPLE_BUFFER_SIZE}; + +/// Calculate the period of the digital input timestampe timer. +/// +/// # Note +/// The period returned will be 1 less than the required period in timer ticks. The value returned +/// can be immediately programmed into a hardware timer period register. +/// +/// The period is calcualted to be some power-of-two multiple of the batch size, such that N batches +/// will occur between each timestamp timer overflow. +/// +/// # Returns +/// A 32-bit value that can be programmed into a hardware timer period register. +pub fn calculate_timestamp_timer_period() -> u32 { + // Calculate how long a single batch requires in timer ticks. + let batch_duration_ticks: u64 = + SAMPLE_BUFFER_SIZE as u64 * ADC_SAMPLE_TICKS as u64; + + // Calculate the largest power-of-two that is less than or equal to + // `batches_per_overflow`. This is completed by eliminating the least significant + // bits of the value until only the msb remains, which is always a power of two. + let batches_per_overflow: u64 = + (1u64 + u32::MAX as u64) / batch_duration_ticks; + let mut j = batches_per_overflow; + while (j & (j - 1)) != 0 { + j = j & (j - 1); + } + + // Once the number of batches per timestamp overflow is calculated, we can figure out the final + // period of the timestamp timer. The period is always 1 larger than the value configured in the + // register. + let period: u64 = batch_duration_ticks * j - 1u64; + assert!(period <= u32::MAX as u64); + + period as u32 +} + +/// The timestamper for DI0 reference clock inputs. +pub struct InputStamper { + _di0_trigger: hal::gpio::gpioa::PA3>, + capture_channel: timers::tim5::Channel4InputCapture, +} + +impl InputStamper { + /// Construct the DI0 input timestamper. + /// + /// # Args + /// * `trigger` - The capture trigger input pin. + /// * `timer_channel - The timer channel used for capturing timestamps. + pub fn new( + trigger: hal::gpio::gpioa::PA3>, + timer_channel: timers::tim5::Channel4, + ) -> Self { + // Utilize the TIM5 CH4 as an input capture channel - use TI4 (the DI0 input trigger) as the + // capture source. + let input_capture = + timer_channel.into_input_capture(timers::tim5::CC4S_A::TI4); + + Self { + capture_channel: input_capture, + _di0_trigger: trigger, + } + } + + /// Start to capture timestamps on DI0. + pub fn start(&mut self) { + self.capture_channel.enable(); + } + + /// Get the latest timestamp that has occurred. + /// + /// # Note + /// This function must be called sufficiently often. If an over-capture event occurs, this + /// function will panic, as this indicates a timestamp was inadvertently dropped. + /// + /// To prevent timestamp loss, the batch size and sampling rate must be adjusted such that at + /// most one timestamp will occur in each data processing cycle. + pub fn latest_timestamp(&mut self) -> Option { + self.capture_channel + .latest_capture() + .expect("DI0 timestamp overrun") + } +} diff --git a/src/main.rs b/src/main.rs index 0ac8857..c38baeb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -54,8 +54,10 @@ use smoltcp::wire::Ipv4Address; use heapless::{consts::*, String}; -// The desired sampling frequency of the ADCs. -const SAMPLE_FREQUENCY_KHZ: u32 = 500; +// The number of ticks in the ADC sampling timer. The timer runs at 100MHz, so the step size is +// equal to 10ns per tick. +// Currently, the sample rate is equal to: Fsample = 100/256 MHz = 390.625 KHz +const ADC_SAMPLE_TICKS: u32 = 256; // The desired ADC sample processing buffer size. const SAMPLE_BUFFER_SIZE: usize = 1; @@ -70,11 +72,12 @@ mod adc; mod afe; mod dac; mod design_parameters; +mod digital_input_stamper; mod eeprom; mod hrtimer; mod pounder; -mod sampling_timer; mod server; +mod timers; use adc::{Adc0Input, Adc1Input}; use dac::{Dac0Output, Dac1Output}; @@ -196,9 +199,9 @@ macro_rules! route_request { const APP: () = { struct Resources { afes: (AFE0, AFE1), - adcs: (Adc0Input, Adc1Input), dacs: (Dac0Output, Dac1Output), + input_stamper: digital_input_stamper::InputStamper, eeprom_i2c: hal::i2c::I2c, @@ -281,15 +284,50 @@ const APP: () = { hal::dma::dma::StreamsTuple::new(dp.DMA1, ccdr.peripheral.DMA1); // Configure timer 2 to trigger conversions for the ADC - let timer2 = dp.TIM2.timer( - SAMPLE_FREQUENCY_KHZ.khz(), - ccdr.peripheral.TIM2, - &ccdr.clocks, - ); + let mut sampling_timer = { + // The timer frequency is manually adjusted below, so the 1KHz setting here is a + // dont-care. + let mut timer2 = + dp.TIM2.timer(1.khz(), ccdr.peripheral.TIM2, &ccdr.clocks); + + // Configure the timer to count at the designed tick rate. We will manually set the + // period below. + timer2.pause(); + timer2.set_tick_freq(design_parameters::TIMER_FREQUENCY); + + let mut sampling_timer = timers::SamplingTimer::new(timer2); + sampling_timer.set_period_ticks(ADC_SAMPLE_TICKS - 1); + + sampling_timer + }; - let mut sampling_timer = sampling_timer::SamplingTimer::new(timer2); let sampling_timer_channels = sampling_timer.channels(); + let mut timestamp_timer = { + // The timer frequency is manually adjusted below, so the 1KHz setting here is a + // dont-care. + let mut timer5 = + dp.TIM5.timer(1.khz(), ccdr.peripheral.TIM5, &ccdr.clocks); + + // Configure the timer to count at the designed tick rate. We will manually set the + // period below. + timer5.pause(); + timer5.set_tick_freq(design_parameters::TIMER_FREQUENCY); + + // The time stamp timer must run at exactly a multiple of the sample timer based on the + // batch size. To accomodate this, we manually set the prescaler identical to the sample + // timer, but use a period that is longer. + let mut timer = timers::TimestampTimer::new(timer5); + + let period = + digital_input_stamper::calculate_timestamp_timer_period(); + timer.set_period_ticks(period); + + timer + }; + + let timestamp_timer_channels = timestamp_timer.channels(); + // Configure the SPI interfaces to the ADCs and DACs. let adcs = { let adc0 = { @@ -317,7 +355,7 @@ const APP: () = { let spi: hal::spi::Spi<_, _, u16> = dp.SPI2.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI2, &ccdr.clocks, ); @@ -355,7 +393,7 @@ const APP: () = { let spi: hal::spi::Spi<_, _, u16> = dp.SPI3.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI3, &ccdr.clocks, ); @@ -405,7 +443,7 @@ const APP: () = { dp.SPI4.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI4, &ccdr.clocks, ) @@ -437,7 +475,7 @@ const APP: () = { dp.SPI5.spi( (spi_sck, spi_miso, hal::spi::NoMosi), config, - design_parameters::ADC_DAC_SCK_MHZ_MAX.mhz(), + design_parameters::ADC_DAC_SCK_MAX, ccdr.peripheral.SPI5, &ccdr.clocks, ) @@ -507,7 +545,7 @@ const APP: () = { let qspi = hal::qspi::Qspi::bank2( dp.QUADSPI, qspi_pins, - 40.mhz(), + design_parameters::POUNDER_QSPI_FREQUENCY, &ccdr.clocks, ccdr.peripheral.QSPI, ); @@ -629,25 +667,26 @@ const APP: () = { ccdr.peripheral.HRTIM, ); - // IO_Update should be latched for 4 SYNC_CLK cycles after the QSPI profile - // write. With pounder SYNC_CLK running at 100MHz (1/4 of the pounder reference - // clock of 400MHz), this corresponds to 40ns. To accomodate rounding errors, we - // use 50ns instead. - // - // Profile writes are always 16 bytes, with 2 cycles required per byte, coming - // out to a total of 32 QSPI clock cycles. The QSPI is configured for 40MHz, so - // this comes out to an offset of 800nS. We use 900ns to be safe - note that the - // timer is triggered after the QSPI write, which can take approximately 120nS, - // so there is additional margin. + // IO_Update occurs after a fixed delay from the QSPI write. Note that the timer + // is triggered after the QSPI write, which can take approximately 120nS, so + // there is additional margin. hrtimer.configure_single_shot( hrtimer::Channel::Two, - 50_e-9, - 900_e-9, + design_parameters::POUNDER_IO_UPDATE_DURATION, + design_parameters::POUNDER_IO_UPDATE_DELAY, ); // Ensure that we have enough time for an IO-update every sample. + let sample_frequency = { + let timer_frequency: hal::time::Hertz = + design_parameters::TIMER_FREQUENCY.into(); + timer_frequency.0 as f32 / ADC_SAMPLE_TICKS as f32 + }; + + let sample_period = 1.0 / sample_frequency; assert!( - 1.0 / (1000 * SAMPLE_FREQUENCY_KHZ) as f32 > 900_e-9 + sample_period + > design_parameters::POUNDER_IO_UPDATE_DELAY ); hrtimer @@ -781,14 +820,25 @@ const APP: () = { // Utilize the cycle counter for RTIC scheduling. cp.DWT.enable_cycle_counter(); + let mut input_stamper = { + let trigger = gpioa.pa3.into_alternate_af2(); + digital_input_stamper::InputStamper::new( + trigger, + timestamp_timer_channels.ch4, + ) + }; + // Start sampling ADCs. sampling_timer.start(); + timestamp_timer.start(); + input_stamper.start(); init::LateResources { afes: (afe0, afe1), adcs, dacs, + input_stamper, dds_output, pounder: pounder_devices, @@ -799,7 +849,7 @@ const APP: () = { } } - #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch, dds_output], priority=2)] + #[task(binds=DMA1_STR3, resources=[adcs, dacs, iir_state, iir_ch, dds_output, input_stamper], priority=2)] fn process(c: process::Context) { let adc_samples = [ c.resources.adcs.0.acquire_buffer(), @@ -811,6 +861,8 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; + let _timestamp = c.resources.input_stamper.latest_timestamp(); + for channel in 0..adc_samples.len() { for sample in 0..adc_samples[0].len() { let x = f32::from(adc_samples[channel][sample] as i16); diff --git a/src/sampling_timer.rs b/src/sampling_timer.rs deleted file mode 100644 index 4755886..0000000 --- a/src/sampling_timer.rs +++ /dev/null @@ -1,119 +0,0 @@ -///! The sampling timer is used for managing ADC sampling and external reference timestamping. -use super::hal; - -/// The timer used for managing ADC sampling. -pub struct SamplingTimer { - timer: hal::timer::Timer, - channels: Option, -} - -impl SamplingTimer { - /// Construct the sampling timer. - pub fn new(mut timer: hal::timer::Timer) -> Self { - timer.pause(); - - Self { - timer, - // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any - // of the underlying timer channel registers, as ownership of the channels is now - // provided through the associated channel structures. We additionally guarantee this - // can only be called once because there is only one Timer2 and this resource takes - // ownership of it once instantiated. - channels: unsafe { Some(tim2::Channels::new()) }, - } - } - - /// Get the timer capture/compare channels. - pub fn channels(&mut self) -> tim2::Channels { - self.channels.take().unwrap() - } - - /// Start the sampling timer. - pub fn start(&mut self) { - self.timer.reset_counter(); - self.timer.resume(); - } -} - -macro_rules! timer_channel { - ($name:ident, $TY:ty, ($ccxde:expr, $ccrx:expr, $ccmrx_output:expr, $ccxs:expr)) => { - pub struct $name {} - - paste::paste! { - impl $name { - /// Construct a new timer channel. - /// - /// Note(unsafe): This function must only be called once. Once constructed, the - /// constructee guarantees to never modify the timer channel. - unsafe fn new() -> Self { - Self {} - } - - /// Allow CH4 to generate DMA requests. - pub fn listen_dma(&self) { - let regs = unsafe { &*<$TY>::ptr() }; - regs.dier.modify(|_, w| w.[< $ccxde >]().set_bit()); - } - - /// Operate CH2 as an output-compare. - /// - /// # Args - /// * `value` - The value to compare the sampling timer's counter against. - pub fn to_output_compare(&self, value: u32) { - let regs = unsafe { &*<$TY>::ptr() }; - assert!(value <= regs.arr.read().bits()); - regs.[< $ccrx >].write(|w| w.ccr().bits(value)); - regs.[< $ccmrx_output >]() - .modify(|_, w| unsafe { w.[< $ccxs >]().bits(0) }); - } - } - } - }; -} - -pub mod tim2 { - use stm32h7xx_hal as hal; - - /// The channels representing the timer. - pub struct Channels { - pub ch1: Channel1, - pub ch2: Channel2, - pub ch3: Channel3, - pub ch4: Channel4, - } - - impl Channels { - /// Construct a new set of channels. - /// - /// Note(unsafe): This is only safe to call once. - pub unsafe fn new() -> Self { - Self { - ch1: Channel1::new(), - ch2: Channel2::new(), - ch3: Channel3::new(), - ch4: Channel4::new(), - } - } - } - - timer_channel!( - Channel1, - hal::stm32::TIM2, - (cc1de, ccr1, ccmr1_output, cc1s) - ); - timer_channel!( - Channel2, - hal::stm32::TIM2, - (cc2de, ccr2, ccmr1_output, cc1s) - ); - timer_channel!( - Channel3, - hal::stm32::TIM2, - (cc3de, ccr3, ccmr2_output, cc3s) - ); - timer_channel!( - Channel4, - hal::stm32::TIM2, - (cc4de, ccr4, ccmr2_output, cc4s) - ); -} diff --git a/src/timers.rs b/src/timers.rs new file mode 100644 index 0000000..8d7d010 --- /dev/null +++ b/src/timers.rs @@ -0,0 +1,221 @@ +///! The sampling timer is used for managing ADC sampling and external reference timestamping. +use super::hal; + +macro_rules! timer_channels { + ($name:ident, $TY:ident, u32) => { + paste::paste! { + + /// The timer used for managing ADC sampling. + pub struct $name { + timer: hal::timer::Timer]>, + channels: Option<[< $TY:lower >]::Channels>, + } + + impl $name { + /// Construct the sampling timer. + pub fn new(mut timer: hal::timer::Timer]>) -> Self { + timer.pause(); + + Self { + timer, + // Note(unsafe): Once these channels are taken, we guarantee that we do not modify any + // of the underlying timer channel registers, as ownership of the channels is now + // provided through the associated channel structures. We additionally guarantee this + // can only be called once because there is only one Timer2 and this resource takes + // ownership of it once instantiated. + channels: unsafe { Some([< $TY:lower >]::Channels::new()) }, + } + } + + /// Get the timer capture/compare channels. + pub fn channels(&mut self) -> [< $TY:lower >]::Channels { + self.channels.take().unwrap() + } + + /// Get the period of the timer. + #[allow(dead_code)] + pub fn get_period(&self) -> u32 { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + regs.arr.read().arr().bits() + } + + /// Manually set the period of the timer. + #[allow(dead_code)] + pub fn set_period_ticks(&mut self, period: u32) { + let regs = unsafe { &*hal::stm32::$TY::ptr() }; + regs.arr.write(|w| w.arr().bits(period)); + } + + /// Start the timer. + pub fn start(mut self) { + // Force a refresh of the frequency settings. + self.timer.apply_freq(); + + self.timer.reset_counter(); + self.timer.resume(); + } + } + + pub mod [< $TY:lower >] { + pub use hal::stm32::tim2::ccmr1_input::{CC1S_A, CC2S_A}; + pub use hal::stm32::tim2::ccmr2_input::{CC3S_A, CC4S_A}; + + use stm32h7xx_hal as hal; + use hal::dma::{traits::TargetAddress, PeripheralToMemory, dma::DMAReq}; + use hal::stm32::$TY; + + /// The channels representing the timer. + pub struct Channels { + pub ch1: Channel1, + pub ch2: Channel2, + pub ch3: Channel3, + pub ch4: Channel4, + } + + impl Channels { + /// Construct a new set of channels. + /// + /// Note(unsafe): This is only safe to call once. + pub unsafe fn new() -> Self { + Self { + ch1: Channel1::new(), + ch2: Channel2::new(), + ch3: Channel3::new(), + ch4: Channel4::new(), + } + } + } + + timer_channels!(1, $TY, ccmr1); + timer_channels!(2, $TY, ccmr1); + timer_channels!(3, $TY, ccmr2); + timer_channels!(4, $TY, ccmr2); + } + } + }; + + ($index:expr, $TY:ty, $ccmrx:expr) => { + paste::paste! { + /// A capture/compare channel of the timer. + pub struct [< Channel $index >] {} + + /// A capture channel of the timer. + pub struct [< Channel $index InputCapture>] {} + + impl [< Channel $index >] { + /// Construct a new timer channel. + /// + /// Note(unsafe): This function must only be called once. Once constructed, the + /// constructee guarantees to never modify the timer channel. + unsafe fn new() -> Self { + Self {} + } + + /// Allow the channel to generate DMA requests. + #[allow(dead_code)] + pub fn listen_dma(&self) { + let regs = unsafe { &*<$TY>::ptr() }; + regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); + } + + /// Operate the channel as an output-compare. + /// + /// # Args + /// * `value` - The value to compare the sampling timer's counter against. + #[allow(dead_code)] + pub fn to_output_compare(&self, value: u32) { + let regs = unsafe { &*<$TY>::ptr() }; + assert!(value <= regs.arr.read().bits()); + regs.[< ccr $index >].write(|w| w.ccr().bits(value)); + regs.[< $ccmrx _output >]() + .modify(|_, w| unsafe { w.[< cc $index s >]().bits(0) }); + } + + /// Operate the channel in input-capture mode. + /// + /// # Args + /// * `input` - The input source for the input capture event. + #[allow(dead_code)] + pub fn into_input_capture(self, input: hal::stm32::tim2::[< $ccmrx _input >]::[< CC $index S_A >]) -> [< Channel $index InputCapture >]{ + let regs = unsafe { &*<$TY>::ptr() }; + regs.[< $ccmrx _input >]().modify(|_, w| w.[< cc $index s>]().variant(input)); + + [< Channel $index InputCapture >] {} + } + } + + impl [< Channel $index InputCapture >] { + /// Get the latest capture from the channel. + #[allow(dead_code)] + pub fn latest_capture(&mut self) -> Result, ()> { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + let sr = regs.sr.read(); + + let result = if sr.[< cc $index if >]().bit_is_set() { + // Read the capture value. Reading the captured value clears the flag in the + // status register automatically. + let ccx = regs.[< ccr $index >].read(); + Some(ccx.ccr().bits()) + } else { + None + }; + + // Read SR again to check for a potential over-capture. If there is an + // overcapture, return an error. + if regs.sr.read().[< cc $index of >]().bit_is_clear() { + Ok(result) + } else { + regs.sr.modify(|_, w| w.[< cc $index of >]().clear_bit()); + Err(()) + } + } + + /// Allow the channel to generate DMA requests. + #[allow(dead_code)] + pub fn listen_dma(&self) { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.dier.modify(|_, w| w.[< cc $index de >]().set_bit()); + } + + /// Enable the input capture to begin capturing timer values. + #[allow(dead_code)] + pub fn enable(&mut self) { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.ccer.modify(|_, w| w.[< cc $index e >]().set_bit()); + } + + /// Check if an over-capture event has occurred. + #[allow(dead_code)] + pub fn check_overcapture(&self) -> bool { + // Note(unsafe): This channel owns all access to the specific timer channel. + // Only atomic operations on completed on the timer registers. + let regs = unsafe { &*<$TY>::ptr() }; + regs.sr.read().[< cc $index of >]().bit_is_set() + } + } + + // Note(unsafe): This manually implements DMA support for input-capture channels. This + // is safe as it is only completed once per channel and each DMA request is allocated to + // each channel as the owner. + unsafe impl TargetAddress for [< Channel $index InputCapture >] { + type MemSize = u32; + + const REQUEST_LINE: Option = Some(DMAReq::[< $TY _CH $index >]as u8); + + fn address(&self) -> u32 { + let regs = unsafe { &*<$TY>::ptr() }; + ®s.[] as *const _ as u32 + } + } + } + }; +} + +timer_channels!(SamplingTimer, TIM2, u32); +timer_channels!(TimestampTimer, TIM5, u32);