diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4b66b0..cf68b3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,7 +5,9 @@ on: branches: [master, staging, trying] pull_request: branches: [master] - + schedule: + # UTC + - cron: '48 4 * * *' env: CARGO_TERM_COLOR: always diff --git a/Cargo.lock b/Cargo.lock index 7fa03f0..0999288 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,6 +724,7 @@ dependencies = [ "dsp", "embedded-hal", "enum-iterator", + "generic-array 0.14.4", "heapless", "log", "mcp23017", diff --git a/Cargo.toml b/Cargo.toml index 599e595..f384fe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ dsp = { path = "dsp" } ad9959 = { path = "ad9959" } smoltcp-nal = "0.1.0" miniconf = "0.1" +generic-array = "0.14" [patch.crates-io.miniconf] git = "https://github.com/quartiq/miniconf.git" diff --git a/dsp/benches/micro.rs b/dsp/benches/micro.rs index 1f2cc1f..0902a4c 100644 --- a/dsp/benches/micro.rs +++ b/dsp/benches/micro.rs @@ -1,8 +1,9 @@ use core::f32::consts::PI; -use dsp::{atan2, cossin}; -use dsp::{iir, iir_int}; -use dsp::{PLL, RPLL}; + use easybench::bench_env; +use generic_array::typenum::U4; + +use dsp::{atan2, cossin, iir, iir_int, Lowpass, PLL, RPLL}; fn atan2_bench() { let xi = (10 << 16) as i32; @@ -70,6 +71,18 @@ fn iir_bench() { ); } +fn lowpass_bench() { + let mut dut = Lowpass::::default(); + println!( + "Lowpass::::update(x, k): {}", + bench_env((0x32421, 14), |(x, k)| dut.update(*x, *k)) + ); + println!( + "Lowpass::::update(x, 14): {}", + bench_env(0x32421, |x| dut.update(*x, 14)) + ); +} + fn main() { atan2_bench(); cossin_bench(); @@ -77,4 +90,5 @@ fn main() { pll_bench(); iir_int_bench(); iir_bench(); + lowpass_bench(); } diff --git a/dsp/build.rs b/dsp/build.rs index ca43736..5e5fe1d 100644 --- a/dsp/build.rs +++ b/dsp/build.rs @@ -15,7 +15,7 @@ fn write_cossin_table() { .unwrap(); write!( file, - "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = [" + "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = [" ) .unwrap(); @@ -29,12 +29,12 @@ fn write_cossin_table() { // use midpoint samples to save one entry in the LUT let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5); // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 - let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16; - let sin = (phase.sin() * AMPLITUDE).round() as u16; + let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1; + let sin = (phase.sin() * AMPLITUDE).round() as u32; if i % 4 == 0 { write!(file, "\n ").unwrap(); } - write!(file, " ({}, {}),", cos, sin).unwrap(); + write!(file, " {},", cos + (sin << 16)).unwrap(); } writeln!(file, "\n];").unwrap(); diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs index 3349460..1eb003f 100644 --- a/dsp/src/complex.rs +++ b/dsp/src/complex.rs @@ -8,6 +8,8 @@ pub trait ComplexExt { fn abs_sqr(&self) -> U; fn log2(&self) -> T; fn arg(&self) -> T; + fn saturating_add(&self, other: Self) -> Self; + fn saturating_sub(&self, other: Self) -> Self; } impl ComplexExt for Complex { @@ -23,7 +25,7 @@ impl ComplexExt for Complex { /// ``` fn from_angle(angle: i32) -> Self { let (c, s) = cossin(angle); - Self { re: c, im: s } + Self::new(c, s) } /// Return the absolute square (the squared magnitude). @@ -85,6 +87,20 @@ impl ComplexExt for Complex { fn arg(&self) -> i32 { atan2(self.im, self.re) } + + fn saturating_add(&self, other: Self) -> Self { + Self::new( + self.re.saturating_add(other.re), + self.im.saturating_add(other.im), + ) + } + + fn saturating_sub(&self, other: Self) -> Self { + Self::new( + self.re.saturating_sub(other.re), + self.im.saturating_sub(other.im), + ) + } } /// Full scale fixed point multiplication. diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 4a5cccd..2f21cec 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { - // Phase bits excluding the three highes MSB + // Phase bits excluding the three highest MSB const OCTANT_BITS: usize = 32 - 3; // This is a slightly more compact way to compute the four flags for @@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) { let mut octant = (phase as u32) >> OCTANT_BITS; octant ^= octant << 1; - // Mask off octant bits. This leaves the angle in the range [0, pi/4). - let mut phase = phase & ((1 << OCTANT_BITS) - 1); - + let mut phase = phase; if octant & 1 != 0 { // phase = pi/4 - phase - phase = (1 << OCTANT_BITS) - 1 - phase; + phase = !phase; } - let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize]; - // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos - // values and scale the sine values as encoded in the LUT. - let mut cos = lookup.0 as i32 + u16::MAX as i32; - let mut sin = (lookup.1 as i32) << 1; + // Mask off octant bits. This leaves the angle in the range [0, pi/4). + phase &= (1 << OCTANT_BITS) - 1; // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range. const ALIGN_MSB: usize = 32 - 16 - 1; phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB; + + let lookup = COSSIN[(phase >> ALIGN_MSB) as usize]; phase &= (1 << ALIGN_MSB) - 1; + // The phase values used for the LUT are at midpoint for the truncated phase. // Interpolate relative to the LUT entry midpoint. + // Also cancel the -1 bias that was conditionally introduced above. phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32; - // Fixed point pi/4. - const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32; - // No rounding bias necessary here since we keep enough low bits. - let dphi = (phase * PI4) >> (32 - ALIGN_MSB); - // Make room for the sign bit. - let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1); + // Fixed point pi/4. + const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32; + // No rounding bias necessary here since we keep enough low bits. + let dphi = (phase * PI4) >> 16; + + // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos + // values and scale the sine values as encoded in the LUT. + let mut cos = (lookup & 0xffff) as i32 + (1 << 16); + let mut sin = (lookup >> 16) as i32; + + let dcos = (sin * dphi) >> COSSIN_DEPTH; let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1); cos = (cos << (ALIGN_MSB - 1)) - dcos; - sin = (sin << (ALIGN_MSB - 1)) + dsin; + sin = (sin << ALIGN_MSB) + dsin; // Unmap using octant bits. if octant & 2 != 0 { core::mem::swap(&mut sin, &mut cos); } - if octant & 4 != 0 { - cos *= -1; + cos = -cos; } - if octant & 8 != 0 { - sin *= -1; + sin = -sin; } (cos, sin) diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs index 4af50f1..02fdeb1 100644 --- a/dsp/src/iir_int.rs +++ b/dsp/src/iir_int.rs @@ -1,5 +1,6 @@ use core::f64::consts::PI; -use serde::{Deserialize, Serialize}; +use miniconf::StringSet; +use serde::Deserialize; /// Generic vector for integer IIR filter. /// This struct is used to hold the x/y input/output data vector or the b/a coefficient @@ -55,7 +56,7 @@ fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { /// See `dsp::iir::IIR` for general implementation details. /// Offset and limiting disabled to suit lowpass applications. /// Coefficient scaling fixed and optimized. -#[derive(Copy, Clone, Default, Deserialize, Serialize)] +#[derive(Copy, Clone, Default, Debug, StringSet, Deserialize)] pub struct IIR { pub ba: Vec5, // pub y_offset: i32, diff --git a/dsp/src/lockin.rs b/dsp/src/lockin.rs index 6a977b4..613a133 100644 --- a/dsp/src/lockin.rs +++ b/dsp/src/lockin.rs @@ -1,12 +1,12 @@ use super::{Complex, ComplexExt, Lowpass, MulScaled}; -use generic_array::typenum::U2; +use generic_array::ArrayLength; #[derive(Clone, Default)] -pub struct Lockin { - state: [Lowpass; 2], +pub struct Lockin> { + state: [Lowpass; 2], } -impl Lockin { +impl> Lockin { /// Update the lockin with a sample taken at a given phase. pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex { // Get the LO signal for demodulation and mix the sample; diff --git a/dsp/src/lowpass.rs b/dsp/src/lowpass.rs index 5ab803d..10d5838 100644 --- a/dsp/src/lowpass.rs +++ b/dsp/src/lowpass.rs @@ -27,10 +27,10 @@ impl> Lowpass { // Note T-DF-I and the zeros at Nyquist. let mut x = x; for y in self.y.iter_mut() { - let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k; + let dy = x.saturating_sub(*y) >> k; *y += dy; x = *y - (dy >> 1); } - x + x.saturating_add((self.y.len() as i32) << (k - 1)) } } diff --git a/src/bin/lockin-external.rs b/src/bin/lockin-external.rs index 3b7c6a8..1b11c76 100644 --- a/src/bin/lockin-external.rs +++ b/src/bin/lockin-external.rs @@ -2,14 +2,12 @@ #![no_std] #![no_main] -use stm32h7xx_hal as hal; - -use stabilizer::{hardware, hardware::design_parameters}; - use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL}; +use generic_array::typenum::U4; use hardware::{ Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1, }; +use stabilizer::{hardware, hardware::design_parameters}; #[rtic::app(device = stm32h7xx_hal::stm32, peripherals = true, monotonic = rtic::cyccnt::CYCCNT)] const APP: () = { @@ -20,7 +18,7 @@ const APP: () = { timestamper: InputStamper, pll: RPLL, - lockin: Lockin, + lockin: Lockin, } #[init] @@ -88,22 +86,20 @@ const APP: () = { .map(|t| t as i32); let (pll_phase, pll_frequency) = c.resources.pll.update( timestamp, - 21, // frequency settling time (log2 counter cycles), TODO: expose - 21, // phase settling time, TODO: expose + 21, // frequency settling time (log2 counter cycles), + 21, // phase settling time ); // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) - let harmonic: i32 = -1; // TODO: expose + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = 0; // TODO: expose + let phase_offset: i32 = 0; // Log2 lowpass time constant - let time_constant: u8 = 6; // TODO: expose + let time_constant: u8 = 6; let sample_frequency = ((pll_frequency - // half-up rounding bias - // .wrapping_add(1 << design_parameters::SAMPLE_BUFFER_SIZE_LOG2 - 1) >> design_parameters::SAMPLE_BUFFER_SIZE_LOG2) as i32) .wrapping_mul(harmonic); @@ -131,7 +127,7 @@ const APP: () = { Quadrature, } - let conf = Conf::FrequencyDiscriminator; // TODO: expose + let conf = Conf::FrequencyDiscriminator; let output = match conf { // Convert from IQ to power and phase. Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()], @@ -149,14 +145,13 @@ const APP: () = { #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } } #[task(binds = ETH, priority = 1)] fn eth(_: eth::Context) { - unsafe { hal::ethernet::interrupt_handler() } + unsafe { stm32h7xx_hal::ethernet::interrupt_handler() } } #[task(binds = SPI2, priority = 3)] @@ -166,7 +161,7 @@ const APP: () = { #[task(binds = SPI3, priority = 3)] fn spi3(_: spi3::Context) { - panic!("ADC0 input overrun"); + panic!("ADC1 input overrun"); } #[task(binds = SPI4, priority = 3)] diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index 1f9df6e..ee9da2f 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -3,6 +3,7 @@ #![no_main] use dsp::{Accu, Complex, ComplexExt, Lockin}; +use generic_array::typenum::U2; use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1}; use stabilizer::{hardware, hardware::design_parameters}; @@ -20,7 +21,7 @@ const APP: () = { adc: Adc1Input, dacs: (Dac0Output, Dac1Output), - lockin: Lockin, + lockin: Lockin, } #[init] @@ -44,24 +45,13 @@ const APP: () = { } } - /// Main DSP processing routine for Stabilizer. + /// Main DSP processing routine. /// - /// # Note - /// Processing time for the DSP application code is bounded by the following constraints: + /// See `dual-iir` for general notes on processing time and timing. /// - /// DSP application code starts after the ADC has generated a batch of samples and must be - /// completed by the time the next batch of ADC samples has been acquired (plus the FIFO buffer - /// time). If this constraint is not met, firmware will panic due to an ADC input overrun. - /// - /// The DSP application code must also fill out the next DAC output buffer in time such that the - /// DAC can switch to it when it has completed the current buffer. If this constraint is not met - /// it's possible that old DAC codes will be generated on the output and the output samples will - /// be delayed by 1 batch. - /// - /// Because the ADC and DAC operate at the same rate, these two constraints actually implement - /// the same time bounds, meeting one also means the other is also met. - /// - /// TODO: Document + /// This is an implementation of an internal-reference lockin on the ADC1 signal. + /// The reference at f_sample/8 is output on DAC0 and the phase of the demodulated + /// signal on DAC1. #[task(binds=DMA1_STR4, resources=[adc, dacs, lockin], priority=2)] fn process(c: process::Context) { let lockin = c.resources.lockin; @@ -71,29 +61,23 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; - // DAC0 always generates a fixed sinusoidal output. - dac_samples[0] - .iter_mut() - .zip(DAC_SEQUENCE.iter()) - .for_each(|(d, s)| *d = *s as u16 ^ 0x8000); - // Reference phase and frequency are known. - let pll_phase = 0; + let pll_phase = 0i32; let pll_frequency = 1i32 << (32 - design_parameters::SAMPLE_BUFFER_SIZE_LOG2); - // Harmonic index of the LO: -1 to _de_modulate the fundamental - let harmonic: i32 = -1; // TODO: expose + // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = (0.25 * i32::MAX as f32) as i32; // TODO: expose + let phase_offset: i32 = 1 << 30; // Log2 lowpass time constant. let time_constant: u8 = 8; let sample_frequency = (pll_frequency as i32).wrapping_mul(harmonic); - let sample_phase = phase_offset - .wrapping_add((pll_phase as i32).wrapping_mul(harmonic)); + let sample_phase = + phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic)); let output: Complex = adc_samples .iter() @@ -109,15 +93,17 @@ const APP: () = { .unwrap() * 2; // Full scale assuming the 2f component is gone. - for value in dac_samples[1].iter_mut() { - *value = (output.arg() >> 16) as u16 ^ 0x8000; + // Convert to DAC data. + for (i, data) in DAC_SEQUENCE.iter().enumerate() { + // DAC0 always generates a fixed sinusoidal output. + dac_samples[0][i] = *data as u16 ^ 0x8000; + dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000; } } #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } }