Merge #275

275: use num crate r=jordens a=jordens * Use `num` crate * Clean up lowpass gain handling * [x] test on hardware Co-authored-by: Robert Jördens <rj@quartiq.de>
2021-02-19 08:51:04 +00:00 · 2021-02-19 08:51:04 +00:00 · 83e770509e
commit 83e770509e
parent 1239a29904 fcb7bb0025
12 changed files with 257 additions and 196 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -356,6 +356,7 @@ dependencies = [
 "generic-array 0.14.4",
 "libm",
 "ndarray",
+ "num",
 "rand",
 "serde",
 ]
@ -623,6 +624,19 @@ dependencies = [
 "rawpointer",
 ]

+[[package]]
+name = "num"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b7a8e9be5e039e2ff869df49155f1c06bd01ade2117ec783e56ab0932b67a8f"
+dependencies = [
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
 [[package]]
 name = "num-complex"
 version = "0.3.1"
@ -642,6 +656,28 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "num-iter"
+version = "0.1.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.14"
--- a/dsp/Cargo.toml
+++ b/dsp/Cargo.toml
@ -8,6 +8,7 @@ edition = "2018"
 libm = "0.2.1"
 serde = { version = "1.0", features = ["derive"], default-features = false }
 generic-array = "0.14"
+num = { version = "0.3.1", default-features = false }

 [dev-dependencies]
 criterion = "0.3"
--- a/dsp/benches/micro.rs
+++ b/dsp/benches/micro.rs
@ -1,8 +1,6 @@
 use core::f32::consts::PI;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use dsp::{atan2, cossin};
-use dsp::{iir, iir_int};
-use dsp::{pll::PLL, rpll::RPLL};
+use dsp::{atan2, cossin, iir, iir_int, PLL, RPLL};

 fn atan2_bench(c: &mut Criterion) {
    let xi = (10 << 16) as i32;
--- a/dsp/src/complex.rs
+++ b/dsp/src/complex.rs
@ -1,33 +1,29 @@
-use core::ops::Mul;
+pub use num::Complex;

 use super::{atan2, cossin};

-#[derive(Copy, Clone, Default, PartialEq, Debug)]
-pub struct Complex<T>(pub T, pub T);
-
-impl<T: Copy> Complex<T> {
-    pub fn map<F>(&self, func: F) -> Self
-    where
-        F: Fn(T) -> T,
-    {
-        Complex(func(self.0), func(self.1))
-    }
+/// Complex extension trait offering DSP (fast, good accuracy) functionality.
+pub trait ComplexExt<T, U> {
+    fn from_angle(angle: T) -> Self;
+    fn abs_sqr(&self) -> U;
+    fn log2(&self) -> T;
+    fn arg(&self) -> T;
 }

-impl Complex<i32> {
+impl ComplexExt<i32, u32> for Complex<i32> {
    /// Return a Complex on the unit circle given an angle.
    ///
    /// Example:
    ///
    /// ```
-    /// use dsp::Complex;
+    /// use dsp::{Complex, ComplexExt};
    /// Complex::<i32>::from_angle(0);
    /// Complex::<i32>::from_angle(1 << 30);  // pi/2
    /// Complex::<i32>::from_angle(-1 << 30);  // -pi/2
    /// ```
-    pub fn from_angle(angle: i32) -> Self {
+    fn from_angle(angle: i32) -> Self {
        let (c, s) = cossin(angle);
-        Self(c, s)
+        Self { re: c, im: s }
    }

    /// Return the absolute square (the squared magnitude).
@ -39,13 +35,13 @@ impl Complex<i32> {
    /// Example:
    ///
    /// ```
-    /// use dsp::Complex;
-    /// assert_eq!(Complex(i32::MIN, 0).abs_sqr(), 1 << 31);
-    /// assert_eq!(Complex(i32::MAX, i32::MAX).abs_sqr(), u32::MAX - 3);
+    /// use dsp::{Complex, ComplexExt};
+    /// assert_eq!(Complex::new(i32::MIN, 0).abs_sqr(), 1 << 31);
+    /// assert_eq!(Complex::new(i32::MAX, i32::MAX).abs_sqr(), u32::MAX - 3);
    /// ```
-    pub fn abs_sqr(&self) -> u32 {
-        (((self.0 as i64) * (self.0 as i64)
-            + (self.1 as i64) * (self.1 as i64))
+    fn abs_sqr(&self) -> u32 {
+        (((self.re as i64) * (self.re as i64)
+            + (self.im as i64) * (self.im as i64))
            >> 31) as u32
    }

@ -59,15 +55,15 @@ impl Complex<i32> {
    /// Example:
    ///
    /// ```
-    /// use dsp::Complex;
-    /// assert_eq!(Complex(i32::MAX, i32::MAX).log2(), -1);
-    /// assert_eq!(Complex(i32::MAX, 0).log2(), -2);
-    /// assert_eq!(Complex(1, 0).log2(), -63);
-    /// assert_eq!(Complex(0, 0).log2(), -64);
+    /// use dsp::{Complex, ComplexExt};
+    /// assert_eq!(Complex::new(i32::MAX, i32::MAX).log2(), -1);
+    /// assert_eq!(Complex::new(i32::MAX, 0).log2(), -2);
+    /// assert_eq!(Complex::new(1, 0).log2(), -63);
+    /// assert_eq!(Complex::new(0, 0).log2(), -64);
    /// ```
-    pub fn log2(&self) -> i32 {
-        let a = (self.0 as i64) * (self.0 as i64)
-            + (self.1 as i64) * (self.1 as i64);
+    fn log2(&self) -> i32 {
+        let a = (self.re as i64) * (self.re as i64)
+            + (self.im as i64) * (self.im as i64);
        -(a.leading_zeros() as i32)
    }

@ -78,52 +74,51 @@ impl Complex<i32> {
    /// Example:
    ///
    /// ```
-    /// use dsp::Complex;
-    /// assert_eq!(Complex(1, 0).arg(), 0);
-    /// assert_eq!(Complex(-i32::MAX, 1).arg(), i32::MAX);
-    /// assert_eq!(Complex(-i32::MAX, -1).arg(), -i32::MAX);
-    /// assert_eq!(Complex(0, -1).arg(), -i32::MAX >> 1);
-    /// assert_eq!(Complex(0, 1).arg(), (i32::MAX >> 1) + 1);
-    /// assert_eq!(Complex(1, 1).arg(), (i32::MAX >> 2) + 1);
+    /// use dsp::{Complex, ComplexExt};
+    /// assert_eq!(Complex::new(1, 0).arg(), 0);
+    /// assert_eq!(Complex::new(-i32::MAX, 1).arg(), i32::MAX);
+    /// assert_eq!(Complex::new(-i32::MAX, -1).arg(), -i32::MAX);
+    /// assert_eq!(Complex::new(0, -1).arg(), -i32::MAX >> 1);
+    /// assert_eq!(Complex::new(0, 1).arg(), (i32::MAX >> 1) + 1);
+    /// assert_eq!(Complex::new(1, 1).arg(), (i32::MAX >> 2) + 1);
    /// ```
-    pub fn arg(&self) -> i32 {
-        atan2(self.1, self.0)
+    fn arg(&self) -> i32 {
+        atan2(self.im, self.re)
    }
 }

-impl Mul for Complex<i32> {
-    type Output = Self;
+/// Full scale fixed point multiplication.
+pub trait MulScaled<T> {
+    fn mul_scaled(self, other: T) -> Self;
+}

-    fn mul(self, other: Self) -> Self {
-        let a = self.0 as i64;
-        let b = self.1 as i64;
-        let c = other.0 as i64;
-        let d = other.1 as i64;
-        Complex(
-            ((a * c - b * d + (1 << 31)) >> 32) as i32,
-            ((b * c + a * d + (1 << 31)) >> 32) as i32,
-        )
+impl MulScaled<Complex<i32>> for Complex<i32> {
+    fn mul_scaled(self, other: Self) -> Self {
+        let a = self.re as i64;
+        let b = self.im as i64;
+        let c = other.re as i64;
+        let d = other.im as i64;
+        Complex {
+            re: ((a * c - b * d + (1 << 30)) >> 31) as i32,
+            im: ((b * c + a * d + (1 << 30)) >> 31) as i32,
+        }
    }
 }

-impl Mul<i32> for Complex<i32> {
-    type Output = Self;
-
-    fn mul(self, other: i32) -> Self {
-        Complex(
-            ((other as i64 * self.0 as i64 + (1 << 31)) >> 32) as i32,
-            ((other as i64 * self.1 as i64 + (1 << 31)) >> 32) as i32,
-        )
+impl MulScaled<i32> for Complex<i32> {
+    fn mul_scaled(self, other: i32) -> Self {
+        Complex {
+            re: ((other as i64 * self.re as i64 + (1 << 30)) >> 31) as i32,
+            im: ((other as i64 * self.im as i64 + (1 << 30)) >> 31) as i32,
+        }
    }
 }

-impl Mul<i16> for Complex<i32> {
-    type Output = Self;
-
-    fn mul(self, other: i16) -> Self {
-        Complex(
-            (other as i32 * (self.0 >> 16) + (1 << 15)) >> 16,
-            (other as i32 * (self.1 >> 16) + (1 << 15)) >> 16,
-        )
+impl MulScaled<i16> for Complex<i32> {
+    fn mul_scaled(self, other: i16) -> Self {
+        Complex {
+            re: (other as i32 * (self.re >> 16) + (1 << 14)) >> 15,
+            im: (other as i32 * (self.im >> 16) + (1 << 14)) >> 15,
+        }
    }
 }
--- a/dsp/src/cossin.rs
+++ b/dsp/src/cossin.rs
@ -74,7 +74,6 @@ pub fn cossin(phase: i32) -> (i32, i32) {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::Complex;
    use core::f64::consts::PI;

    #[test]
@ -82,11 +81,11 @@ mod tests {
        // Constant amplitude error due to LUT data range.
        const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _;
        const MAX_PHASE: f64 = (1i64 << 32) as _;
-        let mut rms_err = Complex(0f64, 0f64);
-        let mut sum_err = Complex(0f64, 0f64);
-        let mut max_err = Complex(0f64, 0f64);
-        let mut sum = Complex(0f64, 0f64);
-        let mut demod = Complex(0f64, 0f64);
+        let mut rms_err = (0f64, 0f64);
+        let mut sum_err = (0f64, 0f64);
+        let mut max_err = (0f64, 0f64);
+        let mut sum = (0f64, 0f64);
+        let mut demod = (0f64, 0f64);

        // use std::{fs::File, io::{BufWriter, prelude::*}, path::Path};
        // let mut file = BufWriter::new(File::create(Path::new("data.bin")).unwrap());
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@ -1,101 +1,28 @@
 #![cfg_attr(not(test), no_std)]
 #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]

-use core::ops::{Add, Mul, Neg};
-
-fn abs<T>(x: T) -> T
-where
-    T: PartialOrd + Default + Neg<Output = T>,
-{
-    if x >= T::default() {
-        x
-    } else {
-        -x
-    }
-}
-
-// These are implemented here because core::f32 doesn't have them (yet).
-// They are naive and don't handle inf/nan.
-// `compiler-intrinsics`/llvm should have better (robust, universal, and
-// faster) implementations.
-
-fn copysign<T>(x: T, y: T) -> T
-where
-    T: PartialOrd + Default + Neg<Output = T>,
-{
-    if (x >= T::default() && y >= T::default())
-        || (x <= T::default() && y <= T::default())
-    {
-        x
-    } else {
-        -x
-    }
-}
-
-#[cfg(not(feature = "nightly"))]
-fn max<T>(x: T, y: T) -> T
-where
-    T: PartialOrd,
-{
-    if x > y {
-        x
-    } else {
-        y
-    }
-}
-
-#[cfg(not(feature = "nightly"))]
-fn min<T>(x: T, y: T) -> T
-where
-    T: PartialOrd,
-{
-    if x < y {
-        x
-    } else {
-        y
-    }
-}
-
-#[cfg(feature = "nightly")]
-fn max(x: f32, y: f32) -> f32 {
-    core::intrinsics::maxnumf32(x, y)
-}
-
-#[cfg(feature = "nightly")]
-fn min(x: f32, y: f32) -> f32 {
-    core::intrinsics::minnumf32(x, y)
-}
-
-// Multiply-accumulate vectors `x` and `a`.
-//
-// A.k.a. dot product.
-// Rust/LLVM optimize this nicely.
-fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
-where
-    T: Add<Output = T> + Mul<Output = T> + Copy,
-{
-    x.iter()
-        .zip(a)
-        .map(|(x, a)| *x * *a)
-        .fold(y0, |y, xa| y + xa)
-}
-
-pub mod accu;
+mod tools;
+pub use tools::*;
 mod atan2;
+pub use atan2::*;
+mod accu;
+pub use accu::*;
 mod complex;
+pub use complex::*;
 mod cossin;
+pub use cossin::*;
 pub mod iir;
 pub mod iir_int;
-pub mod lockin;
-pub mod lowpass;
-pub mod pll;
-pub mod rpll;
-pub mod unwrap;
-
-pub use accu::Accu;
-pub use atan2::atan2;
-pub use complex::Complex;
-pub use cossin::cossin;
+mod lockin;
+pub use lockin::*;
+mod lowpass;
+pub use lowpass::*;
+mod pll;
+pub use pll::*;
+mod rpll;
+pub use rpll::*;
+mod unwrap;
+pub use unwrap::*;

 #[cfg(test)]
 pub mod testing;
--- a/dsp/src/lockin.rs
+++ b/dsp/src/lockin.rs
@ -1,4 +1,4 @@
-use super::{lowpass::Lowpass, Complex};
+use super::{Complex, ComplexExt, Lowpass, MulScaled};
 use generic_array::typenum::U2;

 #[derive(Clone, Default)]
@ -8,19 +8,15 @@ pub struct Lockin {

 impl Lockin {
    /// Update the lockin with a sample taken at a given phase.
-    /// The lowpass has a gain of `1 << k`.
-    pub fn update(&mut self, sample: i16, phase: i32, k: u8) -> Complex<i32> {
-        // Get the LO signal for demodulation.
-        let lo = Complex::from_angle(phase);
-
-        // Mix with the LO signal
-        let mix = lo * sample;
+    pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex<i32> {
+        // Get the LO signal for demodulation and mix the sample;
+        let mix = Complex::from_angle(phase).mul_scaled(sample);

        // Filter with the IIR lowpass,
        // return IQ (in-phase and quadrature) data.
-        Complex(
-            self.state[0].update(mix.0, k),
-            self.state[1].update(mix.1, k),
-        )
+        Complex {
+            re: self.state[0].update(mix.re, k),
+            im: self.state[1].update(mix.im, k),
+        }
    }
 }
--- a/dsp/src/lowpass.rs
+++ b/dsp/src/lowpass.rs
@ -14,19 +14,20 @@ impl<N: ArrayLength<i32>> Lowpass<N> {
    /// Update the filter with a new sample.
    ///
    /// # Args
-    /// * `x`: Input data, needs `k` bits headroom.
-    /// * `k`: Log2 time constant, 0..31.
+    /// * `x`: Input data. Needs 1 bit headroom but will saturate cleanly beyond that.
+    /// * `k`: Log2 time constant, 1..=31.
    ///
    /// # Return
-    /// Filtered output y, with gain of `1 << k`.
+    /// Filtered output y.
    pub fn update(&mut self, x: i32, k: u8) -> i32 {
        debug_assert!(k & 31 == k);
+        debug_assert!((k - 1) & 31 == k - 1);
        // This is an unrolled and optimized first-order IIR loop
        // that works for all possible time constants.
-        // Note DF-II and the zeros at Nyquist.
-        let mut x = x << k;
+        // Note T-DF-I and the zeros at Nyquist.
+        let mut x = x;
        for y in self.y.iter_mut() {
-            let dy = (x - *y + (1 << (k - 1))) >> k;
+            let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k;
            *y += dy;
            x = *y - (dy >> 1);
        }
--- a/dsp/src/testing.rs
+++ b/dsp/src/testing.rs
@ -31,7 +31,7 @@ pub fn complex_isclose(
    rtol: f32,
    atol: f32,
 ) -> bool {
-    isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol)
+    isclosef(a.re, b.re, rtol, atol) && isclosef(a.im, b.im, rtol, atol)
 }

 pub fn complex_allclose(
--- a/dsp/src/tools.rs
+++ b/dsp/src/tools.rs
@ -0,0 +1,95 @@
+use core::ops::{Add, Mul, Neg};
+
+pub fn abs<T>(x: T) -> T
+where
+    T: PartialOrd + Default + Neg<Output = T>,
+{
+    if x >= T::default() {
+        x
+    } else {
+        -x
+    }
+}
+
+// These are implemented here because core::f32 doesn't have them (yet).
+// They are naive and don't handle inf/nan.
+// `compiler-intrinsics`/llvm should have better (robust, universal, and
+// faster) implementations.
+
+pub fn copysign<T>(x: T, y: T) -> T
+where
+    T: PartialOrd + Default + Neg<Output = T>,
+{
+    if (x >= T::default() && y >= T::default())
+        || (x <= T::default() && y <= T::default())
+    {
+        x
+    } else {
+        -x
+    }
+}
+
+#[cfg(not(feature = "nightly"))]
+pub fn max<T>(x: T, y: T) -> T
+where
+    T: PartialOrd,
+{
+    if x > y {
+        x
+    } else {
+        y
+    }
+}
+
+#[cfg(not(feature = "nightly"))]
+pub fn min<T>(x: T, y: T) -> T
+where
+    T: PartialOrd,
+{
+    if x < y {
+        x
+    } else {
+        y
+    }
+}
+
+#[cfg(feature = "nightly")]
+pub fn max(x: f32, y: f32) -> f32 {
+    core::intrinsics::maxnumf32(x, y)
+}
+
+#[cfg(feature = "nightly")]
+pub fn min(x: f32, y: f32) -> f32 {
+    core::intrinsics::minnumf32(x, y)
+}
+
+// Multiply-accumulate vectors `x` and `a`.
+//
+// A.k.a. dot product.
+// Rust/LLVM optimize this nicely.
+pub fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
+where
+    T: Add<Output = T> + Mul<Output = T> + Copy,
+{
+    x.iter()
+        .zip(a)
+        .map(|(x, a)| *x * *a)
+        .fold(y0, |y, xa| y + xa)
+}
+
+/// Combine high and low i32 into a single downscaled i32, saturating the type.
+pub fn saturating_scale(lo: i32, hi: i32, shift: u32) -> i32 {
+    debug_assert!(shift & 31 == shift);
+
+    let shift_hi = 31 - shift;
+    debug_assert!(shift_hi & 31 == shift_hi);
+
+    let over = hi >> shift;
+    if over < -1 {
+        i32::MIN
+    } else if over > 0 {
+        i32::MAX
+    } else {
+        (lo >> shift) + (hi << shift_hi)
+    }
+}
--- a/src/bin/lockin-external.rs
+++ b/src/bin/lockin-external.rs
@ -6,7 +6,7 @@ use stm32h7xx_hal as hal;

 use stabilizer::{hardware, hardware::design_parameters};

-use dsp::{lockin::Lockin, rpll::RPLL, Accu};
+use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL};
 use hardware::{
    Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1,
 };
@ -112,22 +112,33 @@ const APP: () = {
        let sample_phase =
            phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic));

-        let output = adc_samples[0]
+        let output: Complex<i32> = adc_samples[0]
            .iter()
+            // Zip in the LO phase.
            .zip(Accu::new(sample_phase, sample_frequency))
-            // Convert to signed, MSB align the ADC sample.
+            // Convert to signed, MSB align the ADC sample, update the Lockin (demodulate, filter)
            .map(|(&sample, phase)| {
-                lockin.update(sample as i16, phase, time_constant)
+                let s = (sample as i16 as i32) << 16;
+                lockin.update(s, phase, time_constant)
            })
+            // Decimate
            .last()
-            .unwrap();
+            .unwrap()
+            * 2; // Full scale assuming the 2f component is gone.

-        let conf = "frequency_discriminator";
+        #[allow(dead_code)]
+        enum Conf {
+            PowerPhase,
+            FrequencyDiscriminator,
+            Quadrature,
+        }
+
+        let conf = Conf::FrequencyDiscriminator; // TODO: expose
        let output = match conf {
            // Convert from IQ to power and phase.
-            "power_phase" => [(output.log2() << 24) as _, output.arg()],
-            "frequency_discriminator" => [pll_frequency as _, output.arg()],
-            _ => [output.0, output.1],
+            Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()],
+            Conf::FrequencyDiscriminator => [pll_frequency as _, output.arg()],
+            Conf::Quadrature => [output.re, output.im],
        };

        // Convert to DAC data.
--- a/src/bin/lockin-internal.rs
+++ b/src/bin/lockin-internal.rs
@ -2,7 +2,7 @@
 #![no_std]
 #![no_main]

-use dsp::{lockin::Lockin, Accu};
+use dsp::{Accu, Complex, ComplexExt, Lockin};
 use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1};
 use stabilizer::{hardware, hardware::design_parameters};

@ -95,17 +95,19 @@ const APP: () = {
        let sample_phase = phase_offset
            .wrapping_add((pll_phase as i32).wrapping_mul(harmonic));

-        let output = adc_samples
+        let output: Complex<i32> = adc_samples
            .iter()
            // Zip in the LO phase.
            .zip(Accu::new(sample_phase, sample_frequency))
            // Convert to signed, MSB align the ADC sample, update the Lockin (demodulate, filter)
            .map(|(&sample, phase)| {
-                lockin.update(sample as i16, phase, time_constant)
+                let s = (sample as i16 as i32) << 16;
+                lockin.update(s, phase, time_constant)
            })
            // Decimate
            .last()
-            .unwrap();
+            .unwrap()
+            * 2; // Full scale assuming the 2f component is gone.

        for value in dac_samples[1].iter_mut() {
            *value = (output.arg() >> 16) as u16 ^ 0x8000;