From 77cb0bbad0743868a80ab79c6d47eb34bff80053 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20J=C3=B6rdens?= <rj@quartiq.de>
Date: Thu, 10 Dec 2020 16:56:13 +0100
Subject: [PATCH] cossin: refactor and tweak

* shrink the LUT by another bit
* correctly use the octant bit to offset the dphi to LUT entry midpoint
* add more diagnistics to the unittest and rewrite it in relative units
* MSB-align phase and output to match the PLL data, dynamic range and
  remove the need for roudning bias.
* clean up the build.rs table generator a bit
---
 .gitignore      |   2 +-
 dsp/build.rs    |  68 +++++++++--------
 dsp/src/lib.rs  |   4 +-
 dsp/src/trig.rs | 192 ++++++++++++++++++++++++++----------------------
 4 files changed, 142 insertions(+), 124 deletions(-)
diff --git a/.gitignore b/.gitignore
index 241d97c..1a29db4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 /target
 /dsp/target
 .gdb_history
-/dsp/src/cossin_table.txt
+/dsp/src/cossin_table.rs
diff --git a/dsp/build.rs b/dsp/build.rs
index 1b60363..07a4c2f 100644
--- a/dsp/build.rs
+++ b/dsp/build.rs
@@ -3,41 +3,39 @@ use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;
 
-const TABLE_DEPTH: usize = 8;
-const TABLE_SIZE: usize = 1 << TABLE_DEPTH;
-// Treat sin and cos as unsigned values since the sign will always be
-// positive in the range [0, pi/4).
-const SINCOS_MAX: f64 = u16::MAX as f64;
+fn write_cossin_table() {
+    const DEPTH: usize = 7;
+
+    let mut file =
+        File::create(Path::new("src").join("cossin_table.rs")).unwrap();
+    writeln!(file, "pub(crate) const COSSIN_DEPTH: usize = {};", DEPTH)
+        .unwrap();
+    write!(
+        file,
+        "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
+    )
+    .unwrap();
+
+    // Treat sin and cos as unsigned values since the sign will always be
+    // positive in the range [0, pi/4).
+    // No headroom for interpolation rounding error (this is needed for
+    // DEPTH = 6 for example).
+    const AMPLITUDE: f64 = u16::MAX as f64;
+
+    for i in 0..(1 << DEPTH) {
+        // use midpoint samples to save one entry in the LUT
+        let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
+        // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
+        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
+        let sin = (phase.sin() * AMPLITUDE).round() as u16;
+        if i % 4 == 0 {
+            write!(file, "\n   ").unwrap();
+        }
+        write!(file, " ({}, {}),", cos, sin).unwrap();
+    }
+    writeln!(file, "\n];").unwrap();
+}
 
 fn main() {
-    let path = Path::new("src").join("cossin_table.txt");
-    let display = path.display();
-
-    let mut file = match File::create(&path) {
-        Err(why) => panic!("failed to write to {}: {}", display, why),
-        Ok(file) => file,
-    };
-
-    match file.write_all("[\n".as_bytes()) {
-        Err(why) => panic!("failed to write to {}: {}", display, why),
-        Ok(_) => (),
-    }
-
-    let phase_delta = PI / 4. / TABLE_SIZE as f64;
-    let phase_offset = phase_delta / 2.;
-    for i in 0..TABLE_SIZE {
-        let phase = phase_offset + phase_delta * (i as f64);
-        let cos = ((phase.cos() - 0.5) * 2. * SINCOS_MAX).round() as u16;
-        let sin = (phase.sin() * SINCOS_MAX).round() as u16;
-        let s = format!("    ({}, {}),\n", cos, sin);
-        match file.write_all(s.as_bytes()) {
-            Err(why) => panic!("failed to write to {}: {}", display, why),
-            Ok(_) => (),
-        }
-    }
-
-    match file.write_all("]\n".as_bytes()) {
-        Err(why) => panic!("failed to write to {}: {}", display, why),
-        Ok(_) => (),
-    }
+    write_cossin_table();
 }
diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs
index 798b0fc..98bd661 100644
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@@ -13,10 +13,12 @@ pub type Complex<T> = (T, T);
 /// # Returns
 ///
 /// Shifted and rounded value.
-pub fn shift_round(x: i32, shift: i32) -> i32 {
+#[inline(always)]
+pub fn shift_round(x: i32, shift: usize) -> i32 {
     (x + (1 << (shift - 1))) >> shift
 }
 
+mod cossin_table;
 pub mod iir;
 pub mod lockin;
 pub mod pll;
diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs
index 3c7eb2b..cdded60 100644
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@@ -1,90 +1,72 @@
-use super::{shift_round, Complex};
-use core::mem::swap;
-
-const PHASE_BITS: i32 = 20;
-const LUT_DEPTH: i32 = 8;
-const LUT_SIZE: usize = 1 << LUT_DEPTH as usize;
-const OCTANT_BITS: i32 = 3;
-const INTERPOLATION_BITS: i32 = PHASE_BITS - LUT_DEPTH - OCTANT_BITS;
-static COSSIN_TABLE: [(u16, u16); LUT_SIZE] = include!("cossin_table.txt");
-
-// Approximate pi/4 with an integer multiplier and right bit
-// shift. The numerator is designed to saturate the i32 range.
-const PI_4_NUMERATOR: i32 = 50;
-const PI_4_RIGHT_SHIFT: i32 = 6;
+use super::{
+    cossin_table::{COSSIN, COSSIN_DEPTH},
+    Complex,
+};
+use core::f64::consts::PI;
 
 /// Compute the cosine and sine of an angle.
+/// This is ported from the MiSoC cossin core.
+/// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
 ///
 /// # Arguments
-///
-/// `phase` - 20-bit fixed-point phase value.
+/// * `phase` - 32-bit phase.
 ///
 /// # Returns
-///
 /// The cos and sin values of the provided phase as a `Complex<i32>`
-/// value.
+/// value. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
+/// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> Complex<i32> {
-    let mut phase = phase;
-    let octant = (
-        (phase & (1 << (PHASE_BITS - 1))) >> (PHASE_BITS - 1),
-        (phase & (1 << (PHASE_BITS - 2))) >> (PHASE_BITS - 2),
-        (phase & (1 << (PHASE_BITS - 3))) >> (PHASE_BITS - 3),
-    );
+    // Phase bits excluding the three highes MSB
+    const OCTANT_BITS: usize = 32 - 3;
 
-    // Mask off octant bits. This leaves the angle in the range [0,
-    // pi/4).
-    phase &= (1 << (PHASE_BITS - OCTANT_BITS)) - 1;
+    // This is a slightly more compact way to compute the four flags for
+    // octant mapping/unmapping used below.
+    let mut octant = (phase as u32) >> OCTANT_BITS;
+    octant ^= octant << 1;
 
-    if octant.2 == 1 {
+    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
+    let mut phase = phase & ((1 << OCTANT_BITS) - 1);
+
+    if octant & 1 != 0 {
         // phase = pi/4 - phase
-        phase = (1 << (INTERPOLATION_BITS + LUT_DEPTH)) - 1 - phase;
+        phase = (1 << OCTANT_BITS) - 1 - phase;
     }
 
-    let interpolation: i32 = phase & ((1 << INTERPOLATION_BITS) - 1);
+    let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
+    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
+    // values and scale the sine values as encoded in the LUT.
+    let mut cos = lookup.0 as i32 + u16::MAX as i32;
+    let mut sin = (lookup.1 as i32) << 1;
 
-    phase >>= INTERPOLATION_BITS;
+    // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
+    const ALIGN_MSB: usize = 32 - 16 - 1;
+    phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
+    phase &= (1 << ALIGN_MSB) - 1;
+    // The phase values used for the LUT are at midpoint for the truncated phase.
+    // Interpolate relative to the LUT entry midpoint.
+    phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
+    // Fixed point pi/4.
+    const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
+    // No rounding bias necessary here since we keep enough low bits.
+    let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
 
-    let (mut cos, mut sin) = {
-        let lookup = COSSIN_TABLE[phase as usize];
-        (
-            // 1/2 < cos(0<=x<=pi/4) <= 1. So, to spread out the cos
-            // values and use the space more efficiently, we can
-            // subtract 1/2 and multiply by 2. Therefore, we add 1
-            // back in here. The sin values must be multiplied by 2 to
-            // have the same scale as the cos values.
-            lookup.0 as i32 + u16::MAX as i32,
-            (lookup.1 as i32) << 1,
-        )
-    };
+    // Make room for the sign bit.
+    let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
+    let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
 
-    // The phase values used for the LUT are adjusted up by half the
-    // phase step. The interpolation must accurately reflect this. So,
-    // an interpolation phase offset less than half the maximum
-    // involves a negative phase offset. The rest us a non-negative
-    // phase offset.
-    let interpolation_factor =
-        (interpolation - (1 << (INTERPOLATION_BITS - 1))) * PI_4_NUMERATOR;
-    let dsin = shift_round(
-        cos * interpolation_factor,
-        LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
-    );
-    let dcos = shift_round(
-        -sin * interpolation_factor,
-        LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
-    );
+    cos = (cos << (ALIGN_MSB - 1)) - dcos;
+    sin = (sin << (ALIGN_MSB - 1)) + dsin;
 
-    cos += dcos;
-    sin += dsin;
-
-    if octant.1 ^ octant.2 == 1 {
-        swap(&mut sin, &mut cos);
+    // Unmap using octant bits.
+    if octant & 2 != 0 {
+        core::mem::swap(&mut sin, &mut cos);
     }
 
-    if octant.0 ^ octant.1 == 1 {
+    if octant & 4 != 0 {
         cos *= -1;
     }
 
-    if octant.0 == 1 {
+    if octant & 8 != 0 {
         sin *= -1;
     }
 
@@ -94,35 +76,71 @@ pub fn cossin(phase: i32) -> Complex<i32> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use core::f64::consts::PI;
-
     #[test]
     fn error_max_rms_all_phase() {
-        let max_amplitude: f64 = ((1 << 15) - 1) as f64;
+        // Constant amplitude error due to LUT data range.
+        const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
+        const MAX_PHASE: f64 = (1i64 << 32) as f64;
         let mut rms_err: Complex<f64> = (0., 0.);
+        let mut sum_err: Complex<f64> = (0., 0.);
+        let mut max_err: Complex<f64> = (0., 0.);
+        let mut sum: Complex<f64> = (0., 0.);
+        let mut demod: Complex<f64> = (0., 0.);
 
-        for i in 0..(1 << PHASE_BITS) {
-            let phase = i as i32;
-            let radian_phase: f64 =
-                2. * PI * (phase as f64 + 0.5) / ((1 << PHASE_BITS) as f64);
+        // use std::{fs::File, io::prelude::*, path::Path};
+        // let mut file = File::create(Path::new("data.csv")).unwrap();
 
-            let actual: Complex<f64> = (
-                max_amplitude * radian_phase.cos(),
-                max_amplitude * radian_phase.sin(),
-            );
-            let computed = cossin(phase);
+        const PHASE_DEPTH: usize = 20;
 
-            let err = (
-                computed.0 as f64 / 4. - actual.0,
-                computed.1 as f64 / 4. - actual.1,
-            );
-            rms_err.0 += err.0 * err.0 / (1 << PHASE_BITS) as f64;
-            rms_err.1 += err.1 * err.1 / (1 << PHASE_BITS) as f64;
+        for phase in 0..(1 << PHASE_DEPTH) {
+            let phase = (phase << (32 - PHASE_DEPTH)) as i32;
+            let have = cossin(phase);
+            // writeln!(file, " {},{}", have.0, have.1).unwrap();
 
-            assert!(err.0.abs() < 0.89);
-            assert!(err.1.abs() < 0.89);
+            let have = (have.0 as f64 / AMPLITUDE, have.1 as f64 / AMPLITUDE);
+
+            let radian_phase = 2. * PI * phase as f64 / MAX_PHASE;
+            let want = (radian_phase.cos(), radian_phase.sin());
+
+            sum.0 += have.0;
+            sum.1 += have.1;
+
+            demod.0 += have.0 * want.0 - have.1 * want.1;
+            demod.1 += have.1 * want.0 + have.0 * want.1;
+
+            let err = (have.0 - want.0, have.1 - want.1);
+
+            sum_err.0 += err.0;
+            sum_err.1 += err.1;
+
+            rms_err.0 += err.0 * err.0;
+            rms_err.1 += err.1 * err.1;
+
+            max_err.0 = max_err.0.max(err.0.abs());
+            max_err.1 = max_err.1.max(err.1.abs());
         }
-        assert!(rms_err.0.sqrt() < 0.41);
-        assert!(rms_err.1.sqrt() < 0.41);
+        rms_err.0 /= MAX_PHASE;
+        rms_err.1 /= MAX_PHASE;
+
+        println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
+        println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
+        println!("sum_err: {:.2e} {:.2e}", sum_err.0, sum_err.1);
+        println!("rms: {:.2e} {:.2e}", rms_err.0.sqrt(), rms_err.1.sqrt());
+        println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);
+
+        assert!(sum.0.abs() < 4e-10);
+        assert!(sum.1.abs() < 4e-10);
+
+        assert!(demod.0.abs() < 4e-10);
+        assert!(demod.1.abs() < 4e-10);
+
+        assert!(sum_err.0.abs() < 4e-10);
+        assert!(sum_err.1.abs() < 4e-10);
+
+        assert!(rms_err.0.sqrt() < 6e-8);
+        assert!(rms_err.1.sqrt() < 6e-8);
+
+        assert!(max_err.0 < 1.1e-5);
+        assert!(max_err.1 < 1.1e-5);
     }
 }