Merge pull request #199 from quartiq/feature/cossin-tuneup

Feature/cossin tuneup
2020-12-10 18:38:11 +01:00 · 2020-12-10 18:38:11 +01:00 · a53da3dc84
parent f6ca79a992 de304c503b
commit a53da3dc84
5 changed files with 144 additions and 124 deletions
--- a/.cargo/config
+++ b/.cargo/config
@ -5,7 +5,7 @@ rustflags = [
 # The target (below) defaults to cortex-m4
 # There currently are two different options to go beyond that:
 # 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet
-    "-C", "target-cpu=cortex-m7",
+#    "-C", "target-cpu=cortex-m7",
 # 2. cortex-m4 with the additional fpv5 instructions and a potentially
 # better-than-nothing instruction schedule
    "-C", "target-feature=+fp-armv8d16",
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,3 @@
 /target
 /dsp/target
 .gdb_history
 /dsp/src/cossin_table.txt
--- a/dsp/build.rs
+++ b/dsp/build.rs
@ -1,43 +1,46 @@
 use std::env;
 use std::f64::consts::PI;
 use std::fs::File;
 use std::io::prelude::*;
 use std::path::Path;
-const TABLE_DEPTH: usize = 8;
+fn write_cossin_table() {
-const TABLE_SIZE: usize = 1 << TABLE_DEPTH;
+    const DEPTH: usize = 7;
    let out_dir = env::var_os("OUT_DIR").unwrap();
    let dest_path = Path::new(&out_dir).join("cossin_table.rs");
    let mut file = File::create(dest_path).unwrap();
    writeln!(file, "pub(crate) const COSSIN_DEPTH: usize = {};", DEPTH)
        .unwrap();
    write!(
        file,
        "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
    )
    .unwrap();
    // Treat sin and cos as unsigned values since the sign will always be
    // positive in the range [0, pi/4).
-const SINCOS_MAX: f64 = u16::MAX as f64;
+    // No headroom for interpolation rounding error (this is needed for
    // DEPTH = 6 for example).
    const AMPLITUDE: f64 = u16::MAX as f64;
    for i in 0..(1 << DEPTH) {
        // use midpoint samples to save one entry in the LUT
        let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
        // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
        let sin = (phase.sin() * AMPLITUDE).round() as u16;
        if i % 4 == 0 {
            write!(file, "\n   ").unwrap();
        }
        write!(file, " ({}, {}),", cos, sin).unwrap();
    }
    writeln!(file, "\n];").unwrap();
    println!("cargo:rerun-if-changed=build.rs");
 }
 fn main() {
-    let path = Path::new("src").join("cossin_table.txt");
+    write_cossin_table();
    let display = path.display();
    let mut file = match File::create(&path) {
        Err(why) => panic!("failed to write to {}: {}", display, why),
        Ok(file) => file,
    };
    match file.write_all("[\n".as_bytes()) {
        Err(why) => panic!("failed to write to {}: {}", display, why),
        Ok(_) => (),
    }
    let phase_delta = PI / 4. / TABLE_SIZE as f64;
    let phase_offset = phase_delta / 2.;
    for i in 0..TABLE_SIZE {
        let phase = phase_offset + phase_delta * (i as f64);
        let cos = ((phase.cos() - 0.5) * 2. * SINCOS_MAX).round() as u16;
        let sin = (phase.sin() * SINCOS_MAX).round() as u16;
        let s = format!("    ({}, {}),\n", cos, sin);
        match file.write_all(s.as_bytes()) {
            Err(why) => panic!("failed to write to {}: {}", display, why),
            Ok(_) => (),
        }
    }
    match file.write_all("]\n".as_bytes()) {
        Err(why) => panic!("failed to write to {}: {}", display, why),
        Ok(_) => (),
    }
 }
--- a/dsp/src/lib.rs
+++ b/dsp/src/lib.rs
@ -13,7 +13,8 @@ pub type Complex<T> = (T, T);
 /// # Returns
 ///
 /// Shifted and rounded value.
-pub fn shift_round(x: i32, shift: i32) -> i32 {
+#[inline(always)]
 pub fn shift_round(x: i32, shift: usize) -> i32 {
    (x + (1 << (shift - 1))) >> shift
 }
--- a/dsp/src/trig.rs
+++ b/dsp/src/trig.rs
@ -1,90 +1,71 @@
-use super::{shift_round, Complex};
+use super::Complex;
-use core::mem::swap;
+use core::f64::consts::PI;
-const PHASE_BITS: i32 = 20;
+include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 const LUT_DEPTH: i32 = 8;
 const LUT_SIZE: usize = 1 << LUT_DEPTH as usize;
 const OCTANT_BITS: i32 = 3;
 const INTERPOLATION_BITS: i32 = PHASE_BITS - LUT_DEPTH - OCTANT_BITS;
 static COSSIN_TABLE: [(u16, u16); LUT_SIZE] = include!("cossin_table.txt");
 // Approximate pi/4 with an integer multiplier and right bit
 // shift. The numerator is designed to saturate the i32 range.
 const PI_4_NUMERATOR: i32 = 50;
 const PI_4_RIGHT_SHIFT: i32 = 6;
 /// Compute the cosine and sine of an angle.
 /// This is ported from the MiSoC cossin core.
 /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
 ///
 /// # Arguments
-///
+/// * `phase` - 32-bit phase.
 /// `phase` - 20-bit fixed-point phase value.
 ///
 /// # Returns
 ///
 /// The cos and sin values of the provided phase as a `Complex<i32>`
-/// value.
+/// value. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
 /// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> Complex<i32> {
-    let mut phase = phase;
+    // Phase bits excluding the three highes MSB
-    let octant = (
+    const OCTANT_BITS: usize = 32 - 3;
        (phase & (1 << (PHASE_BITS - 1))) >> (PHASE_BITS - 1),
        (phase & (1 << (PHASE_BITS - 2))) >> (PHASE_BITS - 2),
        (phase & (1 << (PHASE_BITS - 3))) >> (PHASE_BITS - 3),
    );
-    // Mask off octant bits. This leaves the angle in the range [0,
+    // This is a slightly more compact way to compute the four flags for
-    // pi/4).
+    // octant mapping/unmapping used below.
-    phase &= (1 << (PHASE_BITS - OCTANT_BITS)) - 1;
+    let mut octant = (phase as u32) >> OCTANT_BITS;
    octant ^= octant << 1;
-    if octant.2 == 1 {
+    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
    let mut phase = phase & ((1 << OCTANT_BITS) - 1);
    if octant & 1 != 0 {
        // phase = pi/4 - phase
-        phase = (1 << (INTERPOLATION_BITS + LUT_DEPTH)) - 1 - phase;
+        phase = (1 << OCTANT_BITS) - 1 - phase;
    }
-    let interpolation: i32 = phase & ((1 << INTERPOLATION_BITS) - 1);
+    let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
    // values and scale the sine values as encoded in the LUT.
    let mut cos = lookup.0 as i32 + u16::MAX as i32;
    let mut sin = (lookup.1 as i32) << 1;
-    phase >>= INTERPOLATION_BITS;
+    // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
    const ALIGN_MSB: usize = 32 - 16 - 1;
    phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
    phase &= (1 << ALIGN_MSB) - 1;
    // The phase values used for the LUT are at midpoint for the truncated phase.
    // Interpolate relative to the LUT entry midpoint.
    phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
    // Fixed point pi/4.
    const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
    // No rounding bias necessary here since we keep enough low bits.
    let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
-    let (mut cos, mut sin) = {
+    // Make room for the sign bit.
-        let lookup = COSSIN_TABLE[phase as usize];
+    let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
-        (
+    let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
            // 1/2 < cos(0<=x<=pi/4) <= 1. So, to spread out the cos
            // values and use the space more efficiently, we can
            // subtract 1/2 and multiply by 2. Therefore, we add 1
            // back in here. The sin values must be multiplied by 2 to
            // have the same scale as the cos values.
            lookup.0 as i32 + u16::MAX as i32,
            (lookup.1 as i32) << 1,
        )
    };
-    // The phase values used for the LUT are adjusted up by half the
+    cos = (cos << (ALIGN_MSB - 1)) - dcos;
-    // phase step. The interpolation must accurately reflect this. So,
+    sin = (sin << (ALIGN_MSB - 1)) + dsin;
    // an interpolation phase offset less than half the maximum
    // involves a negative phase offset. The rest us a non-negative
    // phase offset.
    let interpolation_factor =
        (interpolation - (1 << (INTERPOLATION_BITS - 1))) * PI_4_NUMERATOR;
    let dsin = shift_round(
        cos * interpolation_factor,
        LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
    );
    let dcos = shift_round(
        -sin * interpolation_factor,
        LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
    );
-    cos += dcos;
+    // Unmap using octant bits.
-    sin += dsin;
+    if octant & 2 != 0 {
-
+        core::mem::swap(&mut sin, &mut cos);
    if octant.1 ^ octant.2 == 1 {
        swap(&mut sin, &mut cos);
    }
-    if octant.0 ^ octant.1 == 1 {
+    if octant & 4 != 0 {
        cos *= -1;
    }
-    if octant.0 == 1 {
+    if octant & 8 != 0 {
        sin *= -1;
    }
@ -94,35 +75,71 @@ pub fn cossin(phase: i32) -> Complex<i32> {
 #[cfg(test)]
 mod tests {
    use super::*;
    use core::f64::consts::PI;
    #[test]
    fn error_max_rms_all_phase() {
-        let max_amplitude: f64 = ((1 << 15) - 1) as f64;
+        // Constant amplitude error due to LUT data range.
        const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
        const MAX_PHASE: f64 = (1i64 << 32) as f64;
        let mut rms_err: Complex<f64> = (0., 0.);
        let mut sum_err: Complex<f64> = (0., 0.);
        let mut max_err: Complex<f64> = (0., 0.);
        let mut sum: Complex<f64> = (0., 0.);
        let mut demod: Complex<f64> = (0., 0.);
-        for i in 0..(1 << PHASE_BITS) {
+        // use std::{fs::File, io::prelude::*, path::Path};
-            let phase = i as i32;
+        // let mut file = File::create(Path::new("data.csv")).unwrap();
            let radian_phase: f64 =
                2. * PI * (phase as f64 + 0.5) / ((1 << PHASE_BITS) as f64);
-            let actual: Complex<f64> = (
+        const PHASE_DEPTH: usize = 20;
                max_amplitude * radian_phase.cos(),
                max_amplitude * radian_phase.sin(),
            );
            let computed = cossin(phase);
-            let err = (
+        for phase in 0..(1 << PHASE_DEPTH) {
-                computed.0 as f64 / 4. - actual.0,
+            let phase = (phase << (32 - PHASE_DEPTH)) as i32;
-                computed.1 as f64 / 4. - actual.1,
+            let have = cossin(phase);
-            );
+            // writeln!(file, " {},{}", have.0, have.1).unwrap();
            rms_err.0 += err.0 * err.0 / (1 << PHASE_BITS) as f64;
            rms_err.1 += err.1 * err.1 / (1 << PHASE_BITS) as f64;
-            assert!(err.0.abs() < 0.89);
+            let have = (have.0 as f64 / AMPLITUDE, have.1 as f64 / AMPLITUDE);
-            assert!(err.1.abs() < 0.89);
+
            let radian_phase = 2. * PI * phase as f64 / MAX_PHASE;
            let want = (radian_phase.cos(), radian_phase.sin());
            sum.0 += have.0;
            sum.1 += have.1;
            demod.0 += have.0 * want.0 - have.1 * want.1;
            demod.1 += have.1 * want.0 + have.0 * want.1;
            let err = (have.0 - want.0, have.1 - want.1);
            sum_err.0 += err.0;
            sum_err.1 += err.1;
            rms_err.0 += err.0 * err.0;
            rms_err.1 += err.1 * err.1;
            max_err.0 = max_err.0.max(err.0.abs());
            max_err.1 = max_err.1.max(err.1.abs());
        }
-        assert!(rms_err.0.sqrt() < 0.41);
+        rms_err.0 /= MAX_PHASE;
-        assert!(rms_err.1.sqrt() < 0.41);
+        rms_err.1 /= MAX_PHASE;
        println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
        println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
        println!("sum_err: {:.2e} {:.2e}", sum_err.0, sum_err.1);
        println!("rms: {:.2e} {:.2e}", rms_err.0.sqrt(), rms_err.1.sqrt());
        println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);
        assert!(sum.0.abs() < 4e-10);
        assert!(sum.1.abs() < 4e-10);
        assert!(demod.0.abs() < 4e-10);
        assert!(demod.1.abs() < 4e-10);
        assert!(sum_err.0.abs() < 4e-10);
        assert!(sum_err.1.abs() < 4e-10);
        assert!(rms_err.0.sqrt() < 6e-8);
        assert!(rms_err.1.sqrt() < 6e-8);
        assert!(max_err.0 < 1.1e-5);
        assert!(max_err.1 < 1.1e-5);
    }
 }