Merge pull request #199 from quartiq/feature/cossin-tuneup

Feature/cossin tuneup
This commit is contained in:
Robert Jördens 2020-12-10 18:38:11 +01:00 committed by GitHub
commit a53da3dc84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 144 additions and 124 deletions

View File

@ -5,7 +5,7 @@ rustflags = [
# The target (below) defaults to cortex-m4
# There currently are two different options to go beyond that:
# 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet
"-C", "target-cpu=cortex-m7",
# "-C", "target-cpu=cortex-m7",
# 2. cortex-m4 with the additional fpv5 instructions and a potentially
# better-than-nothing instruction schedule
"-C", "target-feature=+fp-armv8d16",

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
/target
/dsp/target
.gdb_history
/dsp/src/cossin_table.txt

View File

@ -1,43 +1,46 @@
use std::env;
use std::f64::consts::PI;
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
const TABLE_DEPTH: usize = 8;
const TABLE_SIZE: usize = 1 << TABLE_DEPTH;
fn write_cossin_table() {
const DEPTH: usize = 7;
let out_dir = env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("cossin_table.rs");
let mut file = File::create(dest_path).unwrap();
writeln!(file, "pub(crate) const COSSIN_DEPTH: usize = {};", DEPTH)
.unwrap();
write!(
file,
"pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
)
.unwrap();
// Treat sin and cos as unsigned values since the sign will always be
// positive in the range [0, pi/4).
const SINCOS_MAX: f64 = u16::MAX as f64;
// No headroom for interpolation rounding error (this is needed for
// DEPTH = 6 for example).
const AMPLITUDE: f64 = u16::MAX as f64;
for i in 0..(1 << DEPTH) {
// use midpoint samples to save one entry in the LUT
let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
// add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
let sin = (phase.sin() * AMPLITUDE).round() as u16;
if i % 4 == 0 {
write!(file, "\n ").unwrap();
}
write!(file, " ({}, {}),", cos, sin).unwrap();
}
writeln!(file, "\n];").unwrap();
println!("cargo:rerun-if-changed=build.rs");
}
fn main() {
let path = Path::new("src").join("cossin_table.txt");
let display = path.display();
let mut file = match File::create(&path) {
Err(why) => panic!("failed to write to {}: {}", display, why),
Ok(file) => file,
};
match file.write_all("[\n".as_bytes()) {
Err(why) => panic!("failed to write to {}: {}", display, why),
Ok(_) => (),
}
let phase_delta = PI / 4. / TABLE_SIZE as f64;
let phase_offset = phase_delta / 2.;
for i in 0..TABLE_SIZE {
let phase = phase_offset + phase_delta * (i as f64);
let cos = ((phase.cos() - 0.5) * 2. * SINCOS_MAX).round() as u16;
let sin = (phase.sin() * SINCOS_MAX).round() as u16;
let s = format!(" ({}, {}),\n", cos, sin);
match file.write_all(s.as_bytes()) {
Err(why) => panic!("failed to write to {}: {}", display, why),
Ok(_) => (),
}
}
match file.write_all("]\n".as_bytes()) {
Err(why) => panic!("failed to write to {}: {}", display, why),
Ok(_) => (),
}
write_cossin_table();
}

View File

@ -13,7 +13,8 @@ pub type Complex<T> = (T, T);
/// # Returns
///
/// Shifted and rounded value.
pub fn shift_round(x: i32, shift: i32) -> i32 {
#[inline(always)]
pub fn shift_round(x: i32, shift: usize) -> i32 {
(x + (1 << (shift - 1))) >> shift
}

View File

@ -1,90 +1,71 @@
use super::{shift_round, Complex};
use core::mem::swap;
use super::Complex;
use core::f64::consts::PI;
const PHASE_BITS: i32 = 20;
const LUT_DEPTH: i32 = 8;
const LUT_SIZE: usize = 1 << LUT_DEPTH as usize;
const OCTANT_BITS: i32 = 3;
const INTERPOLATION_BITS: i32 = PHASE_BITS - LUT_DEPTH - OCTANT_BITS;
static COSSIN_TABLE: [(u16, u16); LUT_SIZE] = include!("cossin_table.txt");
// Approximate pi/4 with an integer multiplier and right bit
// shift. The numerator is designed to saturate the i32 range.
const PI_4_NUMERATOR: i32 = 50;
const PI_4_RIGHT_SHIFT: i32 = 6;
include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
/// Compute the cosine and sine of an angle.
/// This is ported from the MiSoC cossin core.
/// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
///
/// # Arguments
///
/// `phase` - 20-bit fixed-point phase value.
/// * `phase` - 32-bit phase.
///
/// # Returns
///
/// The cos and sin values of the provided phase as a `Complex<i32>`
/// value.
/// value. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
/// in each quadrature over 20 bit phase.
pub fn cossin(phase: i32) -> Complex<i32> {
let mut phase = phase;
let octant = (
(phase & (1 << (PHASE_BITS - 1))) >> (PHASE_BITS - 1),
(phase & (1 << (PHASE_BITS - 2))) >> (PHASE_BITS - 2),
(phase & (1 << (PHASE_BITS - 3))) >> (PHASE_BITS - 3),
);
// Phase bits excluding the three highes MSB
const OCTANT_BITS: usize = 32 - 3;
// Mask off octant bits. This leaves the angle in the range [0,
// pi/4).
phase &= (1 << (PHASE_BITS - OCTANT_BITS)) - 1;
// This is a slightly more compact way to compute the four flags for
// octant mapping/unmapping used below.
let mut octant = (phase as u32) >> OCTANT_BITS;
octant ^= octant << 1;
if octant.2 == 1 {
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
let mut phase = phase & ((1 << OCTANT_BITS) - 1);
if octant & 1 != 0 {
// phase = pi/4 - phase
phase = (1 << (INTERPOLATION_BITS + LUT_DEPTH)) - 1 - phase;
phase = (1 << OCTANT_BITS) - 1 - phase;
}
let interpolation: i32 = phase & ((1 << INTERPOLATION_BITS) - 1);
let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
// values and scale the sine values as encoded in the LUT.
let mut cos = lookup.0 as i32 + u16::MAX as i32;
let mut sin = (lookup.1 as i32) << 1;
phase >>= INTERPOLATION_BITS;
// 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
const ALIGN_MSB: usize = 32 - 16 - 1;
phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
phase &= (1 << ALIGN_MSB) - 1;
// The phase values used for the LUT are at midpoint for the truncated phase.
// Interpolate relative to the LUT entry midpoint.
phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
// Fixed point pi/4.
const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
// No rounding bias necessary here since we keep enough low bits.
let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
let (mut cos, mut sin) = {
let lookup = COSSIN_TABLE[phase as usize];
(
// 1/2 < cos(0<=x<=pi/4) <= 1. So, to spread out the cos
// values and use the space more efficiently, we can
// subtract 1/2 and multiply by 2. Therefore, we add 1
// back in here. The sin values must be multiplied by 2 to
// have the same scale as the cos values.
lookup.0 as i32 + u16::MAX as i32,
(lookup.1 as i32) << 1,
)
};
// Make room for the sign bit.
let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
// The phase values used for the LUT are adjusted up by half the
// phase step. The interpolation must accurately reflect this. So,
// an interpolation phase offset less than half the maximum
// involves a negative phase offset. The rest us a non-negative
// phase offset.
let interpolation_factor =
(interpolation - (1 << (INTERPOLATION_BITS - 1))) * PI_4_NUMERATOR;
let dsin = shift_round(
cos * interpolation_factor,
LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
);
let dcos = shift_round(
-sin * interpolation_factor,
LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
);
cos = (cos << (ALIGN_MSB - 1)) - dcos;
sin = (sin << (ALIGN_MSB - 1)) + dsin;
cos += dcos;
sin += dsin;
if octant.1 ^ octant.2 == 1 {
swap(&mut sin, &mut cos);
// Unmap using octant bits.
if octant & 2 != 0 {
core::mem::swap(&mut sin, &mut cos);
}
if octant.0 ^ octant.1 == 1 {
if octant & 4 != 0 {
cos *= -1;
}
if octant.0 == 1 {
if octant & 8 != 0 {
sin *= -1;
}
@ -94,35 +75,71 @@ pub fn cossin(phase: i32) -> Complex<i32> {
#[cfg(test)]
mod tests {
use super::*;
use core::f64::consts::PI;
#[test]
fn error_max_rms_all_phase() {
let max_amplitude: f64 = ((1 << 15) - 1) as f64;
// Constant amplitude error due to LUT data range.
const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
const MAX_PHASE: f64 = (1i64 << 32) as f64;
let mut rms_err: Complex<f64> = (0., 0.);
let mut sum_err: Complex<f64> = (0., 0.);
let mut max_err: Complex<f64> = (0., 0.);
let mut sum: Complex<f64> = (0., 0.);
let mut demod: Complex<f64> = (0., 0.);
for i in 0..(1 << PHASE_BITS) {
let phase = i as i32;
let radian_phase: f64 =
2. * PI * (phase as f64 + 0.5) / ((1 << PHASE_BITS) as f64);
// use std::{fs::File, io::prelude::*, path::Path};
// let mut file = File::create(Path::new("data.csv")).unwrap();
let actual: Complex<f64> = (
max_amplitude * radian_phase.cos(),
max_amplitude * radian_phase.sin(),
);
let computed = cossin(phase);
const PHASE_DEPTH: usize = 20;
let err = (
computed.0 as f64 / 4. - actual.0,
computed.1 as f64 / 4. - actual.1,
);
rms_err.0 += err.0 * err.0 / (1 << PHASE_BITS) as f64;
rms_err.1 += err.1 * err.1 / (1 << PHASE_BITS) as f64;
for phase in 0..(1 << PHASE_DEPTH) {
let phase = (phase << (32 - PHASE_DEPTH)) as i32;
let have = cossin(phase);
// writeln!(file, " {},{}", have.0, have.1).unwrap();
assert!(err.0.abs() < 0.89);
assert!(err.1.abs() < 0.89);
let have = (have.0 as f64 / AMPLITUDE, have.1 as f64 / AMPLITUDE);
let radian_phase = 2. * PI * phase as f64 / MAX_PHASE;
let want = (radian_phase.cos(), radian_phase.sin());
sum.0 += have.0;
sum.1 += have.1;
demod.0 += have.0 * want.0 - have.1 * want.1;
demod.1 += have.1 * want.0 + have.0 * want.1;
let err = (have.0 - want.0, have.1 - want.1);
sum_err.0 += err.0;
sum_err.1 += err.1;
rms_err.0 += err.0 * err.0;
rms_err.1 += err.1 * err.1;
max_err.0 = max_err.0.max(err.0.abs());
max_err.1 = max_err.1.max(err.1.abs());
}
assert!(rms_err.0.sqrt() < 0.41);
assert!(rms_err.1.sqrt() < 0.41);
rms_err.0 /= MAX_PHASE;
rms_err.1 /= MAX_PHASE;
println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
println!("sum_err: {:.2e} {:.2e}", sum_err.0, sum_err.1);
println!("rms: {:.2e} {:.2e}", rms_err.0.sqrt(), rms_err.1.sqrt());
println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);
assert!(sum.0.abs() < 4e-10);
assert!(sum.1.abs() < 4e-10);
assert!(demod.0.abs() < 4e-10);
assert!(demod.1.abs() < 4e-10);
assert!(sum_err.0.abs() < 4e-10);
assert!(sum_err.1.abs() < 4e-10);
assert!(rms_err.0.sqrt() < 6e-8);
assert!(rms_err.1.sqrt() < 6e-8);
assert!(max_err.0 < 1.1e-5);
assert!(max_err.1 < 1.1e-5);
}
}