From 1805961d5dade1ed90361a8de6bc49b7c2d4a928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Thu, 25 Feb 2021 18:01:53 +0100
Subject: [PATCH 1/3] macc_i32: move to tools

---
 dsp/src/iir_int.rs | 14 ++------------
 dsp/src/tools.rs   | 11 +++++++++++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs
index 02fdeb1..ef0c1dc 100644
--- a/dsp/src/iir_int.rs
+++ b/dsp/src/iir_int.rs
@@ -1,3 +1,4 @@
+use super::tools::macc_i32;
 use core::f64::consts::PI;
 use miniconf::StringSet;
 use serde::Deserialize;
@@ -40,17 +41,6 @@ impl Coeff for Vec5 {
     }
 }
 
-fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
-    // Rounding bias, half up
-    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
-    let y = x
-        .iter()
-        .zip(a)
-        .map(|(x, a)| *x as i64 * *a as i64)
-        .fold(y0, |y, xa| y + xa);
-    (y >> shift) as i32
-}
-
 /// Integer biquad IIR
 ///
 /// See `dsp::iir::IIR` for general implementation details.
@@ -86,7 +76,7 @@ impl IIR {
         // Store x0            x0 x1 x2 y1 y2
         xy[0] = x0;
         // Compute y0 by multiply-accumulate
-        let y0 = macc(0, xy, &self.ba, IIR::SHIFT);
+        let y0 = macc_i32(0, xy, &self.ba, IIR::SHIFT);
         // Limit y0
         // let y0 = y0.max(self.y_min).min(self.y_max);
         // Store y0            x0 x1 y0 y1 y2
diff --git a/dsp/src/tools.rs b/dsp/src/tools.rs
index 378734f..6fc2b12 100644
--- a/dsp/src/tools.rs
+++ b/dsp/src/tools.rs
@@ -77,6 +77,17 @@ where
         .fold(y0, |y, xa| y + xa)
 }
 
+pub fn macc_i32(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
+    // Rounding bias, half up
+    let y0 = ((y0 as i64) << shift) + (1 << (shift - 1));
+    let y = x
+        .iter()
+        .zip(a)
+        .map(|(x, a)| *x as i64 * *a as i64)
+        .fold(y0, |y, xa| y + xa);
+    (y >> shift) as i32
+}
+
 /// Combine high and low i32 into a single downscaled i32, saturating the type.
 pub fn saturating_scale(lo: i32, hi: i32, shift: u32) -> i32 {
     debug_assert!(shift & 31 == shift);

From cd289687eafd20f24c5d81ac9a0fe15a42152278 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Sat, 27 Feb 2021 15:37:15 +0100
Subject: [PATCH 2/3] cossin: tighten test

---
 dsp/src/cossin.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs
index 2f21cec..9240d0d 100644
--- a/dsp/src/cossin.rs
+++ b/dsp/src/cossin.rs
@@ -11,7 +11,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 ///
 /// # Returns
 /// The cos and sin values of the provided phase as a `(i32, i32)`
-/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
+/// tuple. With a 7-bit deep LUT there is 9e-6 max and 6e-8 RMS error
 /// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> (i32, i32) {
     // Phase bits excluding the three highest MSB
@@ -81,7 +81,7 @@ mod tests {
     #[test]
     fn cossin_error_max_rms_all_phase() {
         // Constant amplitude error due to LUT data range.
-        const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _;
+        const AMPLITUDE: f64 = (1i64 << 31) as f64 - 0.85 * (1i64 << 15) as f64;
         const MAX_PHASE: f64 = (1i64 << 32) as _;
         let mut rms_err = (0f64, 0f64);
         let mut sum_err = (0f64, 0f64);
@@ -144,7 +144,7 @@ mod tests {
         assert!(rms_err.0.sqrt() < 6e-8);
         assert!(rms_err.1.sqrt() < 6e-8);
 
-        assert!(max_err.0 < 1.1e-5);
-        assert!(max_err.1 < 1.1e-5);
+        assert!(max_err.0 < 9e-6);
+        assert!(max_err.1 < 9e-6);
     }
 }

From 0d6b297d4e0e983d39b6118f8f9f69bd5958aea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Mon, 1 Mar 2021 13:29:59 +0100
Subject: [PATCH 3/3] cossin: don't cancel -1 phase offset

---
 dsp/build.rs      | 12 +++++++-----
 dsp/src/cossin.rs | 27 +++++++++++++++------------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/dsp/build.rs b/dsp/build.rs
index 5e5fe1d..edd56b0 100644
--- a/dsp/build.rs
+++ b/dsp/build.rs
@@ -26,14 +26,16 @@ fn write_cossin_table() {
     const AMPLITUDE: f64 = u16::MAX as f64;
 
     for i in 0..(1 << DEPTH) {
-        // use midpoint samples to save one entry in the LUT
-        let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
-        // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
-        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1;
-        let sin = (phase.sin() * AMPLITUDE).round() as u32;
         if i % 4 == 0 {
             write!(file, "\n   ").unwrap();
         }
+        // Use midpoint samples to save one entry in the LUT
+        let (sin, cos) =
+            (PI / 4. * ((i as f64 + 0.5) / (1 << DEPTH) as f64)).sin_cos();
+        // Add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
+        // The -1 LSB is cancelled when unscaling with the biased half amplitude
+        let cos = ((cos * 2. - 1.) * AMPLITUDE - 1.).round() as u32;
+        let sin = (sin * AMPLITUDE).round() as u32;
         write!(file, " {},", cos + (sin << 16)).unwrap();
     }
     writeln!(file, "\n];").unwrap();
diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs
index 9240d0d..5dae7c3 100644
--- a/dsp/src/cossin.rs
+++ b/dsp/src/cossin.rs
@@ -11,7 +11,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 ///
 /// # Returns
 /// The cos and sin values of the provided phase as a `(i32, i32)`
-/// tuple. With a 7-bit deep LUT there is 9e-6 max and 6e-8 RMS error
+/// tuple. With a 7-bit deep LUT there is 9e-6 max and 4e-6 RMS error
 /// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> (i32, i32) {
     // Phase bits excluding the three highest MSB
@@ -40,11 +40,14 @@ pub fn cossin(phase: i32) -> (i32, i32) {
 
     // The phase values used for the LUT are at midpoint for the truncated phase.
     // Interpolate relative to the LUT entry midpoint.
-    // Also cancel the -1 bias that was conditionally introduced above.
-    phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
+    phase -= 1 << (ALIGN_MSB - 1);
+
+    // Cancel the -1 bias that was conditionally introduced above.
+    // This lowers the DC spur from 2e-8 to 2e-10 magnitude.
+    // phase += (octant & 1) as i32;
 
     // Fixed point pi/4.
-    const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32;
+    const PI4: i32 = (PI / 4. * (1 << 16) as f64) as _;
     // No rounding bias necessary here since we keep enough low bits.
     let dphi = (phase * PI4) >> 16;
 
@@ -123,8 +126,8 @@ mod tests {
             max_err.0 = max_err.0.max(err.0.abs());
             max_err.1 = max_err.1.max(err.1.abs());
         }
-        rms_err.0 /= MAX_PHASE;
-        rms_err.1 /= MAX_PHASE;
+        rms_err.0 /= (1 << PHASE_DEPTH) as f64;
+        rms_err.1 /= (1 << PHASE_DEPTH) as f64;
 
         println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
         println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
@@ -133,18 +136,18 @@ mod tests {
         println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);
 
         assert!(sum.0.abs() < 4e-10);
-        assert!(sum.1.abs() < 4e-10);
+        assert!(sum.1.abs() < 3e-8);
 
         assert!(demod.0.abs() < 4e-10);
-        assert!(demod.1.abs() < 4e-10);
+        assert!(demod.1.abs() < 1e-8);
 
         assert!(sum_err.0.abs() < 4e-10);
         assert!(sum_err.1.abs() < 4e-10);
 
-        assert!(rms_err.0.sqrt() < 6e-8);
-        assert!(rms_err.1.sqrt() < 6e-8);
+        assert!(rms_err.0.sqrt() < 4e-6);
+        assert!(rms_err.1.sqrt() < 4e-6);
 
-        assert!(max_err.0 < 9e-6);
-        assert!(max_err.1 < 9e-6);
+        assert!(max_err.0 < 1e-5);
+        assert!(max_err.1 < 1e-5);
     }
 }