Merge branch 'master' of github.com:quartiq/stabilizer

2021-03-01 14:22:23 +01:00 · 2021-03-01 14:22:23 +01:00 · 46fb62e802
parent ae373370aa 6eb59d4017
commit 46fb62e802
13 changed files with 136 additions and 112 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -5,7 +5,9 @@ on:
    branches: [master, staging, trying]
  pull_request:
    branches: [master]
-
+  schedule:
+    # UTC
+    - cron: '48 4 * * *'
 env:
  CARGO_TERM_COLOR: always

--- a/Cargo.lock
+++ b/Cargo.lock
@ -724,6 +724,7 @@ dependencies = [
 "dsp",
 "embedded-hal",
 "enum-iterator",
+ "generic-array 0.14.4",
 "heapless",
 "log",
 "mcp23017",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -46,6 +46,7 @@ dsp = { path = "dsp" }
 ad9959 = { path = "ad9959" }
 smoltcp-nal = "0.1.0"
 miniconf = "0.1"
+generic-array = "0.14"

 [patch.crates-io.miniconf]
 git = "https://github.com/quartiq/miniconf.git"
--- a/dsp/benches/micro.rs
+++ b/dsp/benches/micro.rs
@ -1,8 +1,9 @@
 use core::f32::consts::PI;
-use dsp::{atan2, cossin};
-use dsp::{iir, iir_int};
-use dsp::{PLL, RPLL};
+
 use easybench::bench_env;
+use generic_array::typenum::U4;
+
+use dsp::{atan2, cossin, iir, iir_int, Lowpass, PLL, RPLL};

 fn atan2_bench() {
    let xi = (10 << 16) as i32;
@ -70,6 +71,18 @@ fn iir_bench() {
    );
 }

+fn lowpass_bench() {
+    let mut dut = Lowpass::<U4>::default();
+    println!(
+        "Lowpass::<U4>::update(x, k): {}",
+        bench_env((0x32421, 14), |(x, k)| dut.update(*x, *k))
+    );
+    println!(
+        "Lowpass::<U4>::update(x, 14): {}",
+        bench_env(0x32421, |x| dut.update(*x, 14))
+    );
+}
+
 fn main() {
    atan2_bench();
    cossin_bench();
@ -77,4 +90,5 @@ fn main() {
    pll_bench();
    iir_int_bench();
    iir_bench();
+    lowpass_bench();
 }
--- a/dsp/build.rs
+++ b/dsp/build.rs
@ -15,7 +15,7 @@ fn write_cossin_table() {
        .unwrap();
    write!(
        file,
-        "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
+        "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = ["
    )
    .unwrap();

@ -26,15 +26,17 @@ fn write_cossin_table() {
    const AMPLITUDE: f64 = u16::MAX as f64;

    for i in 0..(1 << DEPTH) {
-        // use midpoint samples to save one entry in the LUT
-        let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
-        // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
-        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
-        let sin = (phase.sin() * AMPLITUDE).round() as u16;
        if i % 4 == 0 {
            write!(file, "\n   ").unwrap();
        }
-        write!(file, " ({}, {}),", cos, sin).unwrap();
+        // Use midpoint samples to save one entry in the LUT
+        let (sin, cos) =
+            (PI / 4. * ((i as f64 + 0.5) / (1 << DEPTH) as f64)).sin_cos();
+        // Add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
+        // The -1 LSB is cancelled when unscaling with the biased half amplitude
+        let cos = ((cos * 2. - 1.) * AMPLITUDE - 1.).round() as u32;
+        let sin = (sin * AMPLITUDE).round() as u32;
+        write!(file, " {},", cos + (sin << 16)).unwrap();
    }
    writeln!(file, "\n];").unwrap();

--- a/dsp/src/complex.rs
+++ b/dsp/src/complex.rs
@ -8,6 +8,8 @@ pub trait ComplexExt<T, U> {
    fn abs_sqr(&self) -> U;
    fn log2(&self) -> T;
    fn arg(&self) -> T;
+    fn saturating_add(&self, other: Self) -> Self;
+    fn saturating_sub(&self, other: Self) -> Self;
 }

 impl ComplexExt<i32, u32> for Complex<i32> {
@ -23,7 +25,7 @@ impl ComplexExt<i32, u32> for Complex<i32> {
    /// ```
    fn from_angle(angle: i32) -> Self {
        let (c, s) = cossin(angle);
-        Self { re: c, im: s }
+        Self::new(c, s)
    }

    /// Return the absolute square (the squared magnitude).
@ -85,6 +87,20 @@ impl ComplexExt<i32, u32> for Complex<i32> {
    fn arg(&self) -> i32 {
        atan2(self.im, self.re)
    }
+
+    fn saturating_add(&self, other: Self) -> Self {
+        Self::new(
+            self.re.saturating_add(other.re),
+            self.im.saturating_add(other.im),
+        )
+    }
+
+    fn saturating_sub(&self, other: Self) -> Self {
+        Self::new(
+            self.re.saturating_sub(other.re),
+            self.im.saturating_sub(other.im),
+        )
+    }
 }

 /// Full scale fixed point multiplication.
--- a/dsp/src/cossin.rs
+++ b/dsp/src/cossin.rs
@ -11,10 +11,10 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 ///
 /// # Returns
 /// The cos and sin values of the provided phase as a `(i32, i32)`
-/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
+/// tuple. With a 7-bit deep LUT there is 9e-6 max and 4e-6 RMS error
 /// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> (i32, i32) {
-    // Phase bits excluding the three highes MSB
+    // Phase bits excluding the three highest MSB
    const OCTANT_BITS: usize = 32 - 3;

    // This is a slightly more compact way to compute the four flags for
@ -22,50 +22,55 @@ pub fn cossin(phase: i32) -> (i32, i32) {
    let mut octant = (phase as u32) >> OCTANT_BITS;
    octant ^= octant << 1;

-    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
-    let mut phase = phase & ((1 << OCTANT_BITS) - 1);
-
+    let mut phase = phase;
    if octant & 1 != 0 {
        // phase = pi/4 - phase
-        phase = (1 << OCTANT_BITS) - 1 - phase;
+        phase = !phase;
    }

-    let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
-    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
-    // values and scale the sine values as encoded in the LUT.
-    let mut cos = lookup.0 as i32 + u16::MAX as i32;
-    let mut sin = (lookup.1 as i32) << 1;
+    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
+    phase &= (1 << OCTANT_BITS) - 1;

    // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
    const ALIGN_MSB: usize = 32 - 16 - 1;
    phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
+
+    let lookup = COSSIN[(phase >> ALIGN_MSB) as usize];
    phase &= (1 << ALIGN_MSB) - 1;
+
    // The phase values used for the LUT are at midpoint for the truncated phase.
    // Interpolate relative to the LUT entry midpoint.
-    phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
-    // Fixed point pi/4.
-    const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
-    // No rounding bias necessary here since we keep enough low bits.
-    let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
+    phase -= 1 << (ALIGN_MSB - 1);

-    // Make room for the sign bit.
-    let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
+    // Cancel the -1 bias that was conditionally introduced above.
+    // This lowers the DC spur from 2e-8 to 2e-10 magnitude.
+    // phase += (octant & 1) as i32;
+
+    // Fixed point pi/4.
+    const PI4: i32 = (PI / 4. * (1 << 16) as f64) as _;
+    // No rounding bias necessary here since we keep enough low bits.
+    let dphi = (phase * PI4) >> 16;
+
+    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
+    // values and scale the sine values as encoded in the LUT.
+    let mut cos = (lookup & 0xffff) as i32 + (1 << 16);
+    let mut sin = (lookup >> 16) as i32;
+
+    let dcos = (sin * dphi) >> COSSIN_DEPTH;
    let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);

    cos = (cos << (ALIGN_MSB - 1)) - dcos;
-    sin = (sin << (ALIGN_MSB - 1)) + dsin;
+    sin = (sin << ALIGN_MSB) + dsin;

    // Unmap using octant bits.
    if octant & 2 != 0 {
        core::mem::swap(&mut sin, &mut cos);
    }
-
    if octant & 4 != 0 {
-        cos *= -1;
+        cos = -cos;
    }
-
    if octant & 8 != 0 {
-        sin *= -1;
+        sin = -sin;
    }

    (cos, sin)
@ -79,7 +84,7 @@ mod tests {
    #[test]
    fn cossin_error_max_rms_all_phase() {
        // Constant amplitude error due to LUT data range.
-        const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _;
+        const AMPLITUDE: f64 = (1i64 << 31) as f64 - 0.85 * (1i64 << 15) as f64;
        const MAX_PHASE: f64 = (1i64 << 32) as _;
        let mut rms_err = (0f64, 0f64);
        let mut sum_err = (0f64, 0f64);
@ -121,8 +126,8 @@ mod tests {
            max_err.0 = max_err.0.max(err.0.abs());
            max_err.1 = max_err.1.max(err.1.abs());
        }
-        rms_err.0 /= MAX_PHASE;
-        rms_err.1 /= MAX_PHASE;
+        rms_err.0 /= (1 << PHASE_DEPTH) as f64;
+        rms_err.1 /= (1 << PHASE_DEPTH) as f64;

        println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
        println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
@ -131,18 +136,18 @@ mod tests {
        println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);

        assert!(sum.0.abs() < 4e-10);
-        assert!(sum.1.abs() < 4e-10);
+        assert!(sum.1.abs() < 3e-8);

        assert!(demod.0.abs() < 4e-10);
-        assert!(demod.1.abs() < 4e-10);
+        assert!(demod.1.abs() < 1e-8);

        assert!(sum_err.0.abs() < 4e-10);
        assert!(sum_err.1.abs() < 4e-10);

-        assert!(rms_err.0.sqrt() < 6e-8);
-        assert!(rms_err.1.sqrt() < 6e-8);
+        assert!(rms_err.0.sqrt() < 4e-6);
+        assert!(rms_err.1.sqrt() < 4e-6);

-        assert!(max_err.0 < 1.1e-5);
-        assert!(max_err.1 < 1.1e-5);
+        assert!(max_err.0 < 1e-5);
+        assert!(max_err.1 < 1e-5);
    }
 }
--- a/dsp/src/iir_int.rs
+++ b/dsp/src/iir_int.rs
@ -1,5 +1,7 @@
+use super::tools::macc_i32;
 use core::f64::consts::PI;
-use serde::{Deserialize, Serialize};
+use miniconf::StringSet;
+use serde::Deserialize;

 /// Generic vector for integer IIR filter.
 /// This struct is used to hold the x/y input/output data vector or the b/a coefficient
@ -39,23 +41,12 @@ impl Coeff for Vec5 {
    }
 }

-fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
-    // Rounding bias, half up
-    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
-    let y = x
-        .iter()
-        .zip(a)
-        .map(|(x, a)| *x as i64 * *a as i64)
-        .fold(y0, |y, xa| y + xa);
-    (y >> shift) as i32
-}
-
 /// Integer biquad IIR
 ///
 /// See `dsp::iir::IIR` for general implementation details.
 /// Offset and limiting disabled to suit lowpass applications.
 /// Coefficient scaling fixed and optimized.
-#[derive(Copy, Clone, Default, Deserialize, Serialize)]
+#[derive(Copy, Clone, Default, Debug, StringSet, Deserialize)]
 pub struct IIR {
    pub ba: Vec5,
    // pub y_offset: i32,
@ -85,7 +76,7 @@ impl IIR {
        // Store x0            x0 x1 x2 y1 y2
        xy[0] = x0;
        // Compute y0 by multiply-accumulate
-        let y0 = macc(0, xy, &self.ba, IIR::SHIFT);
+        let y0 = macc_i32(0, xy, &self.ba, IIR::SHIFT);
        // Limit y0
        // let y0 = y0.max(self.y_min).min(self.y_max);
        // Store y0            x0 x1 y0 y1 y2
--- a/dsp/src/lockin.rs
+++ b/dsp/src/lockin.rs
@ -1,12 +1,12 @@
 use super::{Complex, ComplexExt, Lowpass, MulScaled};
-use generic_array::typenum::U2;
+use generic_array::ArrayLength;

 #[derive(Clone, Default)]
-pub struct Lockin {
-    state: [Lowpass<U2>; 2],
+pub struct Lockin<N: ArrayLength<i32>> {
+    state: [Lowpass<N>; 2],
 }

-impl Lockin {
+impl<N: ArrayLength<i32>> Lockin<N> {
    /// Update the lockin with a sample taken at a given phase.
    pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex<i32> {
        // Get the LO signal for demodulation and mix the sample;
--- a/dsp/src/lowpass.rs
+++ b/dsp/src/lowpass.rs
@ -27,10 +27,10 @@ impl<N: ArrayLength<i32>> Lowpass<N> {
        // Note T-DF-I and the zeros at Nyquist.
        let mut x = x;
        for y in self.y.iter_mut() {
-            let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k;
+            let dy = x.saturating_sub(*y) >> k;
            *y += dy;
            x = *y - (dy >> 1);
        }
-        x
+        x.saturating_add((self.y.len() as i32) << (k - 1))
    }
 }
--- a/dsp/src/tools.rs
+++ b/dsp/src/tools.rs
@ -77,6 +77,17 @@ where
        .fold(y0, |y, xa| y + xa)
 }

+pub fn macc_i32(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
+    // Rounding bias, half up
+    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
+    let y = x
+        .iter()
+        .zip(a)
+        .map(|(x, a)| *x as i64 * *a as i64)
+        .fold(y0, |y, xa| y + xa);
+    (y >> shift) as i32
+}
+
 /// Combine high and low i32 into a single downscaled i32, saturating the type.
 pub fn saturating_scale(lo: i32, hi: i32, shift: u32) -> i32 {
    debug_assert!(shift & 31 == shift);
--- a/src/bin/lockin-external.rs
+++ b/src/bin/lockin-external.rs
@ -2,14 +2,12 @@
 #![no_std]
 #![no_main]

-use stm32h7xx_hal as hal;
-
-use stabilizer::{hardware, hardware::design_parameters};
-
 use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL};
+use generic_array::typenum::U4;
 use hardware::{
    Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1,
 };
+use stabilizer::{hardware, hardware::design_parameters};

 #[rtic::app(device = stm32h7xx_hal::stm32, peripherals = true, monotonic = rtic::cyccnt::CYCCNT)]
 const APP: () = {
@ -20,7 +18,7 @@ const APP: () = {

        timestamper: InputStamper,
        pll: RPLL,
-        lockin: Lockin,
+        lockin: Lockin<U4>,
    }

    #[init]
@ -88,22 +86,20 @@ const APP: () = {
            .map(|t| t as i32);
        let (pll_phase, pll_frequency) = c.resources.pll.update(
            timestamp,
-            21, // frequency settling time (log2 counter cycles), TODO: expose
-            21, // phase settling time, TODO: expose
+            21, // frequency settling time (log2 counter cycles),
+            21, // phase settling time
        );

        // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate)
-        let harmonic: i32 = -1; // TODO: expose
+        let harmonic: i32 = -1;

        // Demodulation LO phase offset
-        let phase_offset: i32 = 0; // TODO: expose
+        let phase_offset: i32 = 0;

        // Log2 lowpass time constant
-        let time_constant: u8 = 6; // TODO: expose
+        let time_constant: u8 = 6;

        let sample_frequency = ((pll_frequency
-            // half-up rounding bias
-            // .wrapping_add(1 << design_parameters::SAMPLE_BUFFER_SIZE_LOG2 - 1)
            >> design_parameters::SAMPLE_BUFFER_SIZE_LOG2)
            as i32)
            .wrapping_mul(harmonic);
@ -131,7 +127,7 @@ const APP: () = {
            Quadrature,
        }

-        let conf = Conf::FrequencyDiscriminator; // TODO: expose
+        let conf = Conf::FrequencyDiscriminator;
        let output = match conf {
            // Convert from IQ to power and phase.
            Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()],
@ -149,14 +145,13 @@ const APP: () = {
    #[idle(resources=[afes])]
    fn idle(_: idle::Context) -> ! {
        loop {
-            // TODO: Implement network interface.
            cortex_m::asm::wfi();
        }
    }

    #[task(binds = ETH, priority = 1)]
    fn eth(_: eth::Context) {
-        unsafe { hal::ethernet::interrupt_handler() }
+        unsafe { stm32h7xx_hal::ethernet::interrupt_handler() }
    }

    #[task(binds = SPI2, priority = 3)]
@ -166,7 +161,7 @@ const APP: () = {

    #[task(binds = SPI3, priority = 3)]
    fn spi3(_: spi3::Context) {
-        panic!("ADC0 input overrun");
+        panic!("ADC1 input overrun");
    }

    #[task(binds = SPI4, priority = 3)]
--- a/src/bin/lockin-internal.rs
+++ b/src/bin/lockin-internal.rs
@ -3,6 +3,7 @@
 #![no_main]

 use dsp::{Accu, Complex, ComplexExt, Lockin};
+use generic_array::typenum::U2;
 use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1};
 use stabilizer::{hardware, hardware::design_parameters};

@ -20,7 +21,7 @@ const APP: () = {
        adc: Adc1Input,
        dacs: (Dac0Output, Dac1Output),

-        lockin: Lockin,
+        lockin: Lockin<U2>,
    }

    #[init]
@ -44,24 +45,13 @@ const APP: () = {
        }
    }

-    /// Main DSP processing routine for Stabilizer.
+    /// Main DSP processing routine.
    ///
-    /// # Note
-    /// Processing time for the DSP application code is bounded by the following constraints:
+    /// See `dual-iir` for general notes on processing time and timing.
    ///
-    /// DSP application code starts after the ADC has generated a batch of samples and must be
-    /// completed by the time the next batch of ADC samples has been acquired (plus the FIFO buffer
-    /// time). If this constraint is not met, firmware will panic due to an ADC input overrun.
-    ///
-    /// The DSP application code must also fill out the next DAC output buffer in time such that the
-    /// DAC can switch to it when it has completed the current buffer. If this constraint is not met
-    /// it's possible that old DAC codes will be generated on the output and the output samples will
-    /// be delayed by 1 batch.
-    ///
-    /// Because the ADC and DAC operate at the same rate, these two constraints actually implement
-    /// the same time bounds, meeting one also means the other is also met.
-    ///
-    /// TODO: Document
+    /// This is an implementation of an internal-reference lockin on the ADC1 signal.
+    /// The reference at f_sample/8 is output on DAC0 and the phase of the demodulated
+    /// signal on DAC1.
    #[task(binds=DMA1_STR4, resources=[adc, dacs, lockin], priority=2)]
    fn process(c: process::Context) {
        let lockin = c.resources.lockin;
@ -71,29 +61,23 @@ const APP: () = {
            c.resources.dacs.1.acquire_buffer(),
        ];

-        // DAC0 always generates a fixed sinusoidal output.
-        dac_samples[0]
-            .iter_mut()
-            .zip(DAC_SEQUENCE.iter())
-            .for_each(|(d, s)| *d = *s as u16 ^ 0x8000);
-
        // Reference phase and frequency are known.
-        let pll_phase = 0;
+        let pll_phase = 0i32;
        let pll_frequency =
            1i32 << (32 - design_parameters::SAMPLE_BUFFER_SIZE_LOG2);

-        // Harmonic index of the LO: -1 to _de_modulate the fundamental
-        let harmonic: i32 = -1; // TODO: expose
+        // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate)
+        let harmonic: i32 = -1;

        // Demodulation LO phase offset
-        let phase_offset: i32 = (0.25 * i32::MAX as f32) as i32; // TODO: expose
+        let phase_offset: i32 = 1 << 30;

        // Log2 lowpass time constant.
        let time_constant: u8 = 8;

        let sample_frequency = (pll_frequency as i32).wrapping_mul(harmonic);
-        let sample_phase = phase_offset
-            .wrapping_add((pll_phase as i32).wrapping_mul(harmonic));
+        let sample_phase =
+            phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic));

        let output: Complex<i32> = adc_samples
            .iter()
@ -109,15 +93,17 @@ const APP: () = {
            .unwrap()
            * 2; // Full scale assuming the 2f component is gone.

-        for value in dac_samples[1].iter_mut() {
-            *value = (output.arg() >> 16) as u16 ^ 0x8000;
+        // Convert to DAC data.
+        for (i, data) in DAC_SEQUENCE.iter().enumerate() {
+            // DAC0 always generates a fixed sinusoidal output.
+            dac_samples[0][i] = *data as u16 ^ 0x8000;
+            dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000;
        }
    }

    #[idle(resources=[afes])]
    fn idle(_: idle::Context) -> ! {
        loop {
-            // TODO: Implement network interface.
            cortex_m::asm::wfi();
        }
    }