From 0cd2140668d7f02ab09a9f04a35b29e094f1fc20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Thu, 21 Jan 2021 16:12:59 +0100
Subject: [PATCH] rafactor complex, cossin, atan2

---
 dsp/src/atan2.rs               | 135 +++++++++++++++++++++++++++++++
 dsp/src/complex.rs             |  17 ++++
 dsp/src/{trig.rs => cossin.rs} | 141 ++-------------------------------
 dsp/src/lib.rs                 |  12 +--
 dsp/src/lockin.rs              |  21 +----
 dsp/tests/lockin.rs            |   4 +-
 src/bin/lockin.rs              |  14 ++--
 7 files changed, 175 insertions(+), 169 deletions(-)
 create mode 100644 dsp/src/atan2.rs
 create mode 100644 dsp/src/complex.rs
 rename dsp/src/{trig.rs => cossin.rs} (53%)

diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs
new file mode 100644
index 0000000..6d6e476
--- /dev/null
+++ b/dsp/src/atan2.rs
@@ -0,0 +1,135 @@
+/// 2-argument arctangent function.
+///
+/// This implementation uses all integer arithmetic for fast
+/// computation. It is designed to have high accuracy near the axes
+/// and lower away from the axes. It is additionally designed so that
+/// the error changes slowly with respect to the angle.
+///
+/// # Arguments
+///
+/// * `y` - Y-axis component.
+/// * `x` - X-axis component.
+///
+/// # Returns
+///
+/// The angle between the x-axis and the ray to the point (x,y). The
+/// result range is from i32::MIN to i32::MAX, where i32::MIN
+/// represents -pi and, equivalently, +pi. i32::MAX represents one
+/// count less than +pi.
+pub fn atan2(y: i32, x: i32) -> i32 {
+    let sign = (x < 0, y < 0);
+
+    let mut y = y.wrapping_abs() as u32;
+    let mut x = x.wrapping_abs() as u32;
+
+    let y_greater = y > x;
+    if y_greater {
+        core::mem::swap(&mut y, &mut x);
+    }
+
+    let z = (16 - y.leading_zeros() as i32).max(0);
+
+    x >>= z;
+    if x == 0 {
+        return 0;
+    }
+    y >>= z;
+    let r = (y << 16) / x;
+    debug_assert!(r <= 1 << 16);
+
+    // Uses the general procedure described in the following
+    // Mathematics stack exchange answer:
+    //
+    // https://math.stackexchange.com/a/1105038/583981
+    //
+    // The atan approximation method has been modified to be cheaper
+    // to compute and to be more compatible with integer
+    // arithmetic. The approximation technique used here is
+    //
+    // pi / 4 * r + C * r * (1 - abs(r))
+    //
+    // which is taken from Rajan 2006: Efficient Approximations for
+    // the Arctangent Function.
+    //
+    // The least mean squared error solution is C = 0.279 (no the 0.285 that
+    // Rajan uses). K = C*4/pi.
+    // Q5 for K provides sufficient correction accuracy while preserving
+    // as much smoothness of the quadratic correction as possible.
+    const FP_K: usize = 5;
+    const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32;
+    // debug_assert!(K == 11);
+
+    // `r` is unsigned Q16.16 and <= 1
+    // `angle` is signed Q1.31 with 1 << 31 == +- pi
+    // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use
+    // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range.
+    let mut angle = ((r << 13)
+        + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1)))
+        as i32;
+
+    if y_greater {
+        angle = (1 << 30) - angle;
+    }
+
+    if sign.0 {
+        angle = i32::MAX - angle;
+    }
+
+    if sign.1 {
+        angle = angle.wrapping_neg();
+    }
+
+    angle
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use core::f64::consts::PI;
+
+    fn angle_to_axis(angle: f64) -> f64 {
+        let angle = angle % (PI / 2.);
+        (PI / 2. - angle).min(angle)
+    }
+
+    #[test]
+    fn atan2_absolute_error() {
+        const N: usize = 321;
+        let mut test_vals = [0i32; N + 4];
+        let scale = (1i64 << 31) as f64;
+        for i in 0..N {
+            test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32;
+        }
+
+        assert!(test_vals.contains(&i32::MIN));
+        test_vals[N] = i32::MAX;
+        test_vals[N + 1] = 0;
+        test_vals[N + 2] = -1;
+        test_vals[N + 3] = 1;
+
+        let mut rms_err = 0f64;
+        let mut abs_err = 0f64;
+        let mut rel_err = 0f64;
+
+        for &x in test_vals.iter() {
+            for &y in test_vals.iter() {
+                let want = (y as f64 / scale).atan2(x as f64 / scale);
+                let have = atan2(y, x) as f64 * PI / scale;
+
+                let err = (have - want).abs();
+                abs_err = abs_err.max(err);
+                rms_err += err * err;
+                if err > 3e-5 {
+                    rel_err = rel_err.max(err / angle_to_axis(want));
+                }
+            }
+        }
+        rms_err = rms_err.sqrt() / test_vals.len() as f64;
+        println!("max abs err: {:.2e}", abs_err);
+        println!("rms abs err: {:.2e}", rms_err);
+        println!("max rel err: {:.2e}", rel_err);
+        assert!(abs_err < 5e-3);
+        assert!(rms_err < 3e-3);
+        assert!(rel_err < 0.6);
+    }
+}
diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs
new file mode 100644
index 0000000..4366b43
--- /dev/null
+++ b/dsp/src/complex.rs
@@ -0,0 +1,17 @@
+use super::atan2;
+use serde::{Deserialize, Serialize};
+
+#[derive(Copy, Clone, Default, Deserialize, Serialize)]
+pub struct Complex<T>(pub T, pub T);
+
+impl Complex<i32> {
+    pub fn power(&self) -> i32 {
+        (((self.0 as i64) * (self.0 as i64)
+            + (self.1 as i64) * (self.1 as i64))
+            >> 32) as i32
+    }
+
+    pub fn phase(&self) -> i32 {
+        atan2(self.1, self.0)
+    }
+}
diff --git a/dsp/src/trig.rs b/dsp/src/cossin.rs
similarity index 53%
rename from dsp/src/trig.rs
rename to dsp/src/cossin.rs
index 2269fe7..f9cc42e 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/cossin.rs
@@ -3,90 +3,6 @@ use core::f64::consts::PI;
 
 include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 
-/// 2-argument arctangent function.
-///
-/// This implementation uses all integer arithmetic for fast
-/// computation. It is designed to have high accuracy near the axes
-/// and lower away from the axes. It is additionally designed so that
-/// the error changes slowly with respect to the angle.
-///
-/// # Arguments
-///
-/// * `y` - Y-axis component.
-/// * `x` - X-axis component.
-///
-/// # Returns
-///
-/// The angle between the x-axis and the ray to the point (x,y). The
-/// result range is from i32::MIN to i32::MAX, where i32::MIN
-/// represents -pi and, equivalently, +pi. i32::MAX represents one
-/// count less than +pi.
-pub fn atan2(y: i32, x: i32) -> i32 {
-    let sign = (x < 0, y < 0);
-
-    let mut y = y.wrapping_abs() as u32;
-    let mut x = x.wrapping_abs() as u32;
-
-    let y_greater = y > x;
-    if y_greater {
-        core::mem::swap(&mut y, &mut x);
-    }
-
-    let z = (16 - y.leading_zeros() as i32).max(0);
-
-    x >>= z;
-    if x == 0 {
-        return 0;
-    }
-    y >>= z;
-    let r = (y << 16) / x;
-    debug_assert!(r <= 1 << 16);
-
-    // Uses the general procedure described in the following
-    // Mathematics stack exchange answer:
-    //
-    // https://math.stackexchange.com/a/1105038/583981
-    //
-    // The atan approximation method has been modified to be cheaper
-    // to compute and to be more compatible with integer
-    // arithmetic. The approximation technique used here is
-    //
-    // pi / 4 * r + C * r * (1 - abs(r))
-    //
-    // which is taken from Rajan 2006: Efficient Approximations for
-    // the Arctangent Function.
-    //
-    // The least mean squared error solution is C = 0.279 (no the 0.285 that
-    // Rajan uses). K = C*4/pi.
-    // Q5 for K provides sufficient correction accuracy while preserving
-    // as much smoothness of the quadratic correction as possible.
-    const FP_K: usize = 5;
-    const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32;
-    // debug_assert!(K == 11);
-
-    // `r` is unsigned Q16.16 and <= 1
-    // `angle` is signed Q1.31 with 1 << 31 == +- pi
-    // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use
-    // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range.
-    let mut angle = ((r << 13)
-        + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1)))
-        as i32;
-
-    if y_greater {
-        angle = (1 << 30) - angle;
-    }
-
-    if sign.0 {
-        angle = i32::MAX - angle;
-    }
-
-    if sign.1 {
-        angle = angle.wrapping_neg();
-    }
-
-    angle
-}
-
 /// Compute the cosine and sine of an angle.
 /// This is ported from the MiSoC cossin core.
 /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
@@ -161,66 +77,21 @@ mod tests {
     use super::*;
     use core::f64::consts::PI;
 
-    fn angle_to_axis(angle: f64) -> f64 {
-        let angle = angle % (PI / 2.);
-        (PI / 2. - angle).min(angle)
-    }
-
-    #[test]
-    fn atan2_absolute_error() {
-        const N: usize = 321;
-        let mut test_vals = [0i32; N + 4];
-        let scale = (1i64 << 31) as f64;
-        for i in 0..N {
-            test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32;
-        }
-
-        assert!(test_vals.contains(&i32::MIN));
-        test_vals[N] = i32::MAX;
-        test_vals[N + 1] = 0;
-        test_vals[N + 2] = -1;
-        test_vals[N + 3] = 1;
-
-        let mut rms_err = 0f64;
-        let mut abs_err = 0f64;
-        let mut rel_err = 0f64;
-
-        for &x in test_vals.iter() {
-            for &y in test_vals.iter() {
-                let want = (y as f64 / scale).atan2(x as f64 / scale);
-                let have = atan2(y, x) as f64 * PI / scale;
-
-                let err = (have - want).abs();
-                abs_err = abs_err.max(err);
-                rms_err += err * err;
-                if err > 3e-5 {
-                    rel_err = rel_err.max(err / angle_to_axis(want));
-                }
-            }
-        }
-        rms_err = rms_err.sqrt() / test_vals.len() as f64;
-        println!("max abs err: {:.2e}", abs_err);
-        println!("rms abs err: {:.2e}", rms_err);
-        println!("max rel err: {:.2e}", rel_err);
-        assert!(abs_err < 5e-3);
-        assert!(rms_err < 3e-3);
-        assert!(rel_err < 0.6);
-    }
-
     #[test]
     fn cossin_error_max_rms_all_phase() {
         // Constant amplitude error due to LUT data range.
         const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _;
         const MAX_PHASE: f64 = (1i64 << 32) as _;
-        let mut rms_err = Complex(0., 0.);
-        let mut sum_err = Complex(0., 0.);
-        let mut max_err = Complex(0f64, 0.);
-        let mut sum = Complex(0., 0.);
-        let mut demod = Complex(0., 0.);
+        let mut rms_err = Complex(0f64, 0f64);
+        let mut sum_err = Complex(0f64, 0f64);
+        let mut max_err = Complex(0f64, 0f64);
+        let mut sum = Complex(0f64, 0f64);
+        let mut demod = Complex(0f64, 0f64);
 
         // use std::{fs::File, io::{BufWriter, prelude::*}, path::Path};
         // let mut file = BufWriter::new(File::create(Path::new("data.bin")).unwrap());
 
+        // log2 of the number of phase values to check
         const PHASE_DEPTH: usize = 20;
 
         for phase in 0..(1 << PHASE_DEPTH) {
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 4e9b642..56a3004 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -2,10 +2,6 @@
 #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]
 
 use core::ops::{Add, Mul, Neg};
-use serde::{Deserialize, Serialize};
-
-#[derive(Copy, Clone, Default, Deserialize, Serialize)]
-pub struct Complex<T>(pub T, pub T);
 
 /// Bit shift, round up half.
 ///
@@ -116,13 +112,19 @@ where
         .fold(y0, |y, xa| y + xa)
 }
 
+mod atan2;
+mod complex;
+mod cossin;
 pub mod iir;
 pub mod iir_int;
 pub mod lockin;
 pub mod pll;
 pub mod reciprocal_pll;
-pub mod trig;
 pub mod unwrap;
 
+pub use atan2::atan2;
+pub use complex::Complex;
+pub use cossin::cossin;
+
 #[cfg(test)]
 pub mod testing;
diff --git a/dsp/src/lockin.rs b/dsp/src/lockin.rs
index d4339ac..591d7e0 100644
--- a/dsp/src/lockin.rs
+++ b/dsp/src/lockin.rs
@@ -1,8 +1,4 @@
-use super::{
-    iir_int,
-    trig::{atan2, cossin},
-    Complex,
-};
+use super::{cossin, iir_int, Complex};
 use serde::{Deserialize, Serialize};
 
 #[derive(Copy, Clone, Default, Deserialize, Serialize)]
@@ -37,19 +33,4 @@ impl Lockin {
             ),
         )
     }
-
-    pub fn iq(&self) -> Complex<i32> {
-        Complex(self.iir_state[0].get_y(0), self.iir_state[1].get_y(0))
-    }
-
-    pub fn power(&self) -> i32 {
-        let iq = self.iq();
-        (((iq.0 as i64) * (iq.0 as i64) + (iq.1 as i64) * (iq.1 as i64)) >> 32)
-            as i32
-    }
-
-    pub fn phase(&self) -> i32 {
-        let iq = self.iq();
-        atan2(iq.1, iq.0)
-    }
 }
diff --git a/dsp/tests/lockin.rs b/dsp/tests/lockin.rs
index 015bb58..dc57dcb 100644
--- a/dsp/tests/lockin.rs
+++ b/dsp/tests/lockin.rs
@@ -1,7 +1,7 @@
 use dsp::{
+    atan2, cossin,
     iir_int::{IIRState, IIR},
     reciprocal_pll::TimestampHandler,
-    trig::{atan2, cossin},
     Complex,
 };
 
@@ -185,7 +185,7 @@ fn adc_batch_timestamps(
 /// https://webaudio.github.io/Audio-EQ-Cookbook/audio-eq-cookbook.html
 ///
 /// # Args
-/// * `fc` - Corner frequency, or 3dB cutoff frequency (in Hz).
+/// * `fc` - Corner frequency, or 3dB cutoff frequency (in units of sample rate).
 /// * `q` - Quality factor (1/sqrt(2) for critical).
 /// * `k` - DC gain.
 ///
diff --git a/src/bin/lockin.rs b/src/bin/lockin.rs
index 8fbf96f..a9a63d4 100644
--- a/src/bin/lockin.rs
+++ b/src/bin/lockin.rs
@@ -70,12 +70,12 @@ const APP: () = {
         stabilizer.dacs.0.start();
         stabilizer.dacs.1.start();
 
-        // Start sampling ADCs.
-        stabilizer.adc_dac_timer.start();
-
         // Start recording digital input timestamps.
         stabilizer.timestamp_timer.start();
 
+        // Start sampling ADCs.
+        stabilizer.adc_dac_timer.start();
+
         init::LateResources {
             afes: stabilizer.afes,
             adcs: stabilizer.adcs,
@@ -127,7 +127,7 @@ const APP: () = {
             .pll
             .update(c.resources.timestamper.latest_timestamp());
 
-        // Harmonic index to demodulate
+        // Harmonic index of the LO: -1 to _de_modulate the fundamental
         let harmonic: i32 = -1;
         // Demodulation LO phase offset
         let phase_offset: i32 = 0;
@@ -139,13 +139,13 @@ const APP: () = {
             // Convert to signed, MSB align the ADC sample.
             let input = (adc_samples[0][i] as i16 as i32) << 16;
             // Obtain demodulated, filtered IQ sample.
-            lockin.update(input, sample_phase);
+            let output = lockin.update(input, sample_phase);
             // Advance the sample phase.
             sample_phase = sample_phase.wrapping_add(sample_frequency);
 
             // Convert from IQ to power and phase.
-            let mut power = lockin.power() as _;
-            let mut phase = lockin.phase() as _;
+            let mut power = output.power() as _;
+            let mut phase = output.phase() as _;
 
             // Filter power and phase through IIR filters.
             // Note: Normalization to be done in filters. Phase will wrap happily.