diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4b66b0..cf68b3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,7 +5,9 @@ on: branches: [master, staging, trying] pull_request: branches: [master] - + schedule: + # UTC + - cron: '48 4 * * *' env: CARGO_TERM_COLOR: always diff --git a/Cargo.lock b/Cargo.lock index 7fa03f0..0999288 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,6 +724,7 @@ dependencies = [ "dsp", "embedded-hal", "enum-iterator", + "generic-array 0.14.4", "heapless", "log", "mcp23017", diff --git a/Cargo.toml b/Cargo.toml index 599e595..f384fe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ dsp = { path = "dsp" } ad9959 = { path = "ad9959" } smoltcp-nal = "0.1.0" miniconf = "0.1" +generic-array = "0.14" [patch.crates-io.miniconf] git = "https://github.com/quartiq/miniconf.git" diff --git a/dsp/benches/micro.rs b/dsp/benches/micro.rs index 1f2cc1f..0902a4c 100644 --- a/dsp/benches/micro.rs +++ b/dsp/benches/micro.rs @@ -1,8 +1,9 @@ use core::f32::consts::PI; -use dsp::{atan2, cossin}; -use dsp::{iir, iir_int}; -use dsp::{PLL, RPLL}; + use easybench::bench_env; +use generic_array::typenum::U4; + +use dsp::{atan2, cossin, iir, iir_int, Lowpass, PLL, RPLL}; fn atan2_bench() { let xi = (10 << 16) as i32; @@ -70,6 +71,18 @@ fn iir_bench() { ); } +fn lowpass_bench() { + let mut dut = Lowpass::::default(); + println!( + "Lowpass::::update(x, k): {}", + bench_env((0x32421, 14), |(x, k)| dut.update(*x, *k)) + ); + println!( + "Lowpass::::update(x, 14): {}", + bench_env(0x32421, |x| dut.update(*x, 14)) + ); +} + fn main() { atan2_bench(); cossin_bench(); @@ -77,4 +90,5 @@ fn main() { pll_bench(); iir_int_bench(); iir_bench(); + lowpass_bench(); } diff --git a/dsp/build.rs b/dsp/build.rs index ca43736..edd56b0 100644 --- a/dsp/build.rs +++ b/dsp/build.rs @@ -15,7 +15,7 @@ fn write_cossin_table() { .unwrap(); write!( file, - "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = [" + "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = [" ) .unwrap(); @@ -26,15 +26,17 @@ fn write_cossin_table() { const AMPLITUDE: f64 = u16::MAX as f64; for i in 0..(1 << DEPTH) { - // use midpoint samples to save one entry in the LUT - let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5); - // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 - let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16; - let sin = (phase.sin() * AMPLITUDE).round() as u16; if i % 4 == 0 { write!(file, "\n ").unwrap(); } - write!(file, " ({}, {}),", cos, sin).unwrap(); + // Use midpoint samples to save one entry in the LUT + let (sin, cos) = + (PI / 4. * ((i as f64 + 0.5) / (1 << DEPTH) as f64)).sin_cos(); + // Add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 + // The -1 LSB is cancelled when unscaling with the biased half amplitude + let cos = ((cos * 2. - 1.) * AMPLITUDE - 1.).round() as u32; + let sin = (sin * AMPLITUDE).round() as u32; + write!(file, " {},", cos + (sin << 16)).unwrap(); } writeln!(file, "\n];").unwrap(); diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs index 3349460..1eb003f 100644 --- a/dsp/src/complex.rs +++ b/dsp/src/complex.rs @@ -8,6 +8,8 @@ pub trait ComplexExt { fn abs_sqr(&self) -> U; fn log2(&self) -> T; fn arg(&self) -> T; + fn saturating_add(&self, other: Self) -> Self; + fn saturating_sub(&self, other: Self) -> Self; } impl ComplexExt for Complex { @@ -23,7 +25,7 @@ impl ComplexExt for Complex { /// ``` fn from_angle(angle: i32) -> Self { let (c, s) = cossin(angle); - Self { re: c, im: s } + Self::new(c, s) } /// Return the absolute square (the squared magnitude). @@ -85,6 +87,20 @@ impl ComplexExt for Complex { fn arg(&self) -> i32 { atan2(self.im, self.re) } + + fn saturating_add(&self, other: Self) -> Self { + Self::new( + self.re.saturating_add(other.re), + self.im.saturating_add(other.im), + ) + } + + fn saturating_sub(&self, other: Self) -> Self { + Self::new( + self.re.saturating_sub(other.re), + self.im.saturating_sub(other.im), + ) + } } /// Full scale fixed point multiplication. diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 4a5cccd..5dae7c3 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -11,10 +11,10 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// /// # Returns /// The cos and sin values of the provided phase as a `(i32, i32)` -/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error +/// tuple. With a 7-bit deep LUT there is 9e-6 max and 4e-6 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { - // Phase bits excluding the three highes MSB + // Phase bits excluding the three highest MSB const OCTANT_BITS: usize = 32 - 3; // This is a slightly more compact way to compute the four flags for @@ -22,50 +22,55 @@ pub fn cossin(phase: i32) -> (i32, i32) { let mut octant = (phase as u32) >> OCTANT_BITS; octant ^= octant << 1; - // Mask off octant bits. This leaves the angle in the range [0, pi/4). - let mut phase = phase & ((1 << OCTANT_BITS) - 1); - + let mut phase = phase; if octant & 1 != 0 { // phase = pi/4 - phase - phase = (1 << OCTANT_BITS) - 1 - phase; + phase = !phase; } - let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize]; - // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos - // values and scale the sine values as encoded in the LUT. - let mut cos = lookup.0 as i32 + u16::MAX as i32; - let mut sin = (lookup.1 as i32) << 1; + // Mask off octant bits. This leaves the angle in the range [0, pi/4). + phase &= (1 << OCTANT_BITS) - 1; // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range. const ALIGN_MSB: usize = 32 - 16 - 1; phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB; + + let lookup = COSSIN[(phase >> ALIGN_MSB) as usize]; phase &= (1 << ALIGN_MSB) - 1; + // The phase values used for the LUT are at midpoint for the truncated phase. // Interpolate relative to the LUT entry midpoint. - phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32; - // Fixed point pi/4. - const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32; - // No rounding bias necessary here since we keep enough low bits. - let dphi = (phase * PI4) >> (32 - ALIGN_MSB); + phase -= 1 << (ALIGN_MSB - 1); - // Make room for the sign bit. - let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1); + // Cancel the -1 bias that was conditionally introduced above. + // This lowers the DC spur from 2e-8 to 2e-10 magnitude. + // phase += (octant & 1) as i32; + + // Fixed point pi/4. + const PI4: i32 = (PI / 4. * (1 << 16) as f64) as _; + // No rounding bias necessary here since we keep enough low bits. + let dphi = (phase * PI4) >> 16; + + // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos + // values and scale the sine values as encoded in the LUT. + let mut cos = (lookup & 0xffff) as i32 + (1 << 16); + let mut sin = (lookup >> 16) as i32; + + let dcos = (sin * dphi) >> COSSIN_DEPTH; let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1); cos = (cos << (ALIGN_MSB - 1)) - dcos; - sin = (sin << (ALIGN_MSB - 1)) + dsin; + sin = (sin << ALIGN_MSB) + dsin; // Unmap using octant bits. if octant & 2 != 0 { core::mem::swap(&mut sin, &mut cos); } - if octant & 4 != 0 { - cos *= -1; + cos = -cos; } - if octant & 8 != 0 { - sin *= -1; + sin = -sin; } (cos, sin) @@ -79,7 +84,7 @@ mod tests { #[test] fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. - const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _; + const AMPLITUDE: f64 = (1i64 << 31) as f64 - 0.85 * (1i64 << 15) as f64; const MAX_PHASE: f64 = (1i64 << 32) as _; let mut rms_err = (0f64, 0f64); let mut sum_err = (0f64, 0f64); @@ -121,8 +126,8 @@ mod tests { max_err.0 = max_err.0.max(err.0.abs()); max_err.1 = max_err.1.max(err.1.abs()); } - rms_err.0 /= MAX_PHASE; - rms_err.1 /= MAX_PHASE; + rms_err.0 /= (1 << PHASE_DEPTH) as f64; + rms_err.1 /= (1 << PHASE_DEPTH) as f64; println!("sum: {:.2e} {:.2e}", sum.0, sum.1); println!("demod: {:.2e} {:.2e}", demod.0, demod.1); @@ -131,18 +136,18 @@ mod tests { println!("max: {:.2e} {:.2e}", max_err.0, max_err.1); assert!(sum.0.abs() < 4e-10); - assert!(sum.1.abs() < 4e-10); + assert!(sum.1.abs() < 3e-8); assert!(demod.0.abs() < 4e-10); - assert!(demod.1.abs() < 4e-10); + assert!(demod.1.abs() < 1e-8); assert!(sum_err.0.abs() < 4e-10); assert!(sum_err.1.abs() < 4e-10); - assert!(rms_err.0.sqrt() < 6e-8); - assert!(rms_err.1.sqrt() < 6e-8); + assert!(rms_err.0.sqrt() < 4e-6); + assert!(rms_err.1.sqrt() < 4e-6); - assert!(max_err.0 < 1.1e-5); - assert!(max_err.1 < 1.1e-5); + assert!(max_err.0 < 1e-5); + assert!(max_err.1 < 1e-5); } } diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs index 4af50f1..ef0c1dc 100644 --- a/dsp/src/iir_int.rs +++ b/dsp/src/iir_int.rs @@ -1,5 +1,7 @@ +use super::tools::macc_i32; use core::f64::consts::PI; -use serde::{Deserialize, Serialize}; +use miniconf::StringSet; +use serde::Deserialize; /// Generic vector for integer IIR filter. /// This struct is used to hold the x/y input/output data vector or the b/a coefficient @@ -39,23 +41,12 @@ impl Coeff for Vec5 { } } -fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { - // Rounding bias, half up - let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); - let y = x - .iter() - .zip(a) - .map(|(x, a)| *x as i64 * *a as i64) - .fold(y0, |y, xa| y + xa); - (y >> shift) as i32 -} - /// Integer biquad IIR /// /// See `dsp::iir::IIR` for general implementation details. /// Offset and limiting disabled to suit lowpass applications. /// Coefficient scaling fixed and optimized. -#[derive(Copy, Clone, Default, Deserialize, Serialize)] +#[derive(Copy, Clone, Default, Debug, StringSet, Deserialize)] pub struct IIR { pub ba: Vec5, // pub y_offset: i32, @@ -85,7 +76,7 @@ impl IIR { // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; // Compute y0 by multiply-accumulate - let y0 = macc(0, xy, &self.ba, IIR::SHIFT); + let y0 = macc_i32(0, xy, &self.ba, IIR::SHIFT); // Limit y0 // let y0 = y0.max(self.y_min).min(self.y_max); // Store y0 x0 x1 y0 y1 y2 diff --git a/dsp/src/lockin.rs b/dsp/src/lockin.rs index 6a977b4..613a133 100644 --- a/dsp/src/lockin.rs +++ b/dsp/src/lockin.rs @@ -1,12 +1,12 @@ use super::{Complex, ComplexExt, Lowpass, MulScaled}; -use generic_array::typenum::U2; +use generic_array::ArrayLength; #[derive(Clone, Default)] -pub struct Lockin { - state: [Lowpass; 2], +pub struct Lockin> { + state: [Lowpass; 2], } -impl Lockin { +impl> Lockin { /// Update the lockin with a sample taken at a given phase. pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex { // Get the LO signal for demodulation and mix the sample; diff --git a/dsp/src/lowpass.rs b/dsp/src/lowpass.rs index 5ab803d..10d5838 100644 --- a/dsp/src/lowpass.rs +++ b/dsp/src/lowpass.rs @@ -27,10 +27,10 @@ impl> Lowpass { // Note T-DF-I and the zeros at Nyquist. let mut x = x; for y in self.y.iter_mut() { - let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k; + let dy = x.saturating_sub(*y) >> k; *y += dy; x = *y - (dy >> 1); } - x + x.saturating_add((self.y.len() as i32) << (k - 1)) } } diff --git a/dsp/src/tools.rs b/dsp/src/tools.rs index 378734f..6fc2b12 100644 --- a/dsp/src/tools.rs +++ b/dsp/src/tools.rs @@ -77,6 +77,17 @@ where .fold(y0, |y, xa| y + xa) } +pub fn macc_i32(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { + // Rounding bias, half up + let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); + let y = x + .iter() + .zip(a) + .map(|(x, a)| *x as i64 * *a as i64) + .fold(y0, |y, xa| y + xa); + (y >> shift) as i32 +} + /// Combine high and low i32 into a single downscaled i32, saturating the type. pub fn saturating_scale(lo: i32, hi: i32, shift: u32) -> i32 { debug_assert!(shift & 31 == shift); diff --git a/src/bin/lockin-external.rs b/src/bin/lockin-external.rs index 3b7c6a8..1b11c76 100644 --- a/src/bin/lockin-external.rs +++ b/src/bin/lockin-external.rs @@ -2,14 +2,12 @@ #![no_std] #![no_main] -use stm32h7xx_hal as hal; - -use stabilizer::{hardware, hardware::design_parameters}; - use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL}; +use generic_array::typenum::U4; use hardware::{ Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1, }; +use stabilizer::{hardware, hardware::design_parameters}; #[rtic::app(device = stm32h7xx_hal::stm32, peripherals = true, monotonic = rtic::cyccnt::CYCCNT)] const APP: () = { @@ -20,7 +18,7 @@ const APP: () = { timestamper: InputStamper, pll: RPLL, - lockin: Lockin, + lockin: Lockin, } #[init] @@ -88,22 +86,20 @@ const APP: () = { .map(|t| t as i32); let (pll_phase, pll_frequency) = c.resources.pll.update( timestamp, - 21, // frequency settling time (log2 counter cycles), TODO: expose - 21, // phase settling time, TODO: expose + 21, // frequency settling time (log2 counter cycles), + 21, // phase settling time ); // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) - let harmonic: i32 = -1; // TODO: expose + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = 0; // TODO: expose + let phase_offset: i32 = 0; // Log2 lowpass time constant - let time_constant: u8 = 6; // TODO: expose + let time_constant: u8 = 6; let sample_frequency = ((pll_frequency - // half-up rounding bias - // .wrapping_add(1 << design_parameters::SAMPLE_BUFFER_SIZE_LOG2 - 1) >> design_parameters::SAMPLE_BUFFER_SIZE_LOG2) as i32) .wrapping_mul(harmonic); @@ -131,7 +127,7 @@ const APP: () = { Quadrature, } - let conf = Conf::FrequencyDiscriminator; // TODO: expose + let conf = Conf::FrequencyDiscriminator; let output = match conf { // Convert from IQ to power and phase. Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()], @@ -149,14 +145,13 @@ const APP: () = { #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } } #[task(binds = ETH, priority = 1)] fn eth(_: eth::Context) { - unsafe { hal::ethernet::interrupt_handler() } + unsafe { stm32h7xx_hal::ethernet::interrupt_handler() } } #[task(binds = SPI2, priority = 3)] @@ -166,7 +161,7 @@ const APP: () = { #[task(binds = SPI3, priority = 3)] fn spi3(_: spi3::Context) { - panic!("ADC0 input overrun"); + panic!("ADC1 input overrun"); } #[task(binds = SPI4, priority = 3)] diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index 1f9df6e..ee9da2f 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -3,6 +3,7 @@ #![no_main] use dsp::{Accu, Complex, ComplexExt, Lockin}; +use generic_array::typenum::U2; use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1}; use stabilizer::{hardware, hardware::design_parameters}; @@ -20,7 +21,7 @@ const APP: () = { adc: Adc1Input, dacs: (Dac0Output, Dac1Output), - lockin: Lockin, + lockin: Lockin, } #[init] @@ -44,24 +45,13 @@ const APP: () = { } } - /// Main DSP processing routine for Stabilizer. + /// Main DSP processing routine. /// - /// # Note - /// Processing time for the DSP application code is bounded by the following constraints: + /// See `dual-iir` for general notes on processing time and timing. /// - /// DSP application code starts after the ADC has generated a batch of samples and must be - /// completed by the time the next batch of ADC samples has been acquired (plus the FIFO buffer - /// time). If this constraint is not met, firmware will panic due to an ADC input overrun. - /// - /// The DSP application code must also fill out the next DAC output buffer in time such that the - /// DAC can switch to it when it has completed the current buffer. If this constraint is not met - /// it's possible that old DAC codes will be generated on the output and the output samples will - /// be delayed by 1 batch. - /// - /// Because the ADC and DAC operate at the same rate, these two constraints actually implement - /// the same time bounds, meeting one also means the other is also met. - /// - /// TODO: Document + /// This is an implementation of an internal-reference lockin on the ADC1 signal. + /// The reference at f_sample/8 is output on DAC0 and the phase of the demodulated + /// signal on DAC1. #[task(binds=DMA1_STR4, resources=[adc, dacs, lockin], priority=2)] fn process(c: process::Context) { let lockin = c.resources.lockin; @@ -71,29 +61,23 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; - // DAC0 always generates a fixed sinusoidal output. - dac_samples[0] - .iter_mut() - .zip(DAC_SEQUENCE.iter()) - .for_each(|(d, s)| *d = *s as u16 ^ 0x8000); - // Reference phase and frequency are known. - let pll_phase = 0; + let pll_phase = 0i32; let pll_frequency = 1i32 << (32 - design_parameters::SAMPLE_BUFFER_SIZE_LOG2); - // Harmonic index of the LO: -1 to _de_modulate the fundamental - let harmonic: i32 = -1; // TODO: expose + // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = (0.25 * i32::MAX as f32) as i32; // TODO: expose + let phase_offset: i32 = 1 << 30; // Log2 lowpass time constant. let time_constant: u8 = 8; let sample_frequency = (pll_frequency as i32).wrapping_mul(harmonic); - let sample_phase = phase_offset - .wrapping_add((pll_phase as i32).wrapping_mul(harmonic)); + let sample_phase = + phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic)); let output: Complex = adc_samples .iter() @@ -109,15 +93,17 @@ const APP: () = { .unwrap() * 2; // Full scale assuming the 2f component is gone. - for value in dac_samples[1].iter_mut() { - *value = (output.arg() >> 16) as u16 ^ 0x8000; + // Convert to DAC data. + for (i, data) in DAC_SEQUENCE.iter().enumerate() { + // DAC0 always generates a fixed sinusoidal output. + dac_samples[0][i] = *data as u16 ^ 0x8000; + dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000; } } #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } }