Merge pull request #199 from quartiq/feature/cossin-tuneup
Feature/cossin tuneup
This commit is contained in:
commit
a53da3dc84
|
@ -5,7 +5,7 @@ rustflags = [
|
||||||
# The target (below) defaults to cortex-m4
|
# The target (below) defaults to cortex-m4
|
||||||
# There currently are two different options to go beyond that:
|
# There currently are two different options to go beyond that:
|
||||||
# 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet
|
# 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet
|
||||||
"-C", "target-cpu=cortex-m7",
|
# "-C", "target-cpu=cortex-m7",
|
||||||
# 2. cortex-m4 with the additional fpv5 instructions and a potentially
|
# 2. cortex-m4 with the additional fpv5 instructions and a potentially
|
||||||
# better-than-nothing instruction schedule
|
# better-than-nothing instruction schedule
|
||||||
"-C", "target-feature=+fp-armv8d16",
|
"-C", "target-feature=+fp-armv8d16",
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
/target
|
/target
|
||||||
/dsp/target
|
/dsp/target
|
||||||
.gdb_history
|
.gdb_history
|
||||||
/dsp/src/cossin_table.txt
|
|
||||||
|
|
69
dsp/build.rs
69
dsp/build.rs
|
@ -1,43 +1,46 @@
|
||||||
|
use std::env;
|
||||||
use std::f64::consts::PI;
|
use std::f64::consts::PI;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
const TABLE_DEPTH: usize = 8;
|
fn write_cossin_table() {
|
||||||
const TABLE_SIZE: usize = 1 << TABLE_DEPTH;
|
const DEPTH: usize = 7;
|
||||||
|
|
||||||
|
let out_dir = env::var_os("OUT_DIR").unwrap();
|
||||||
|
let dest_path = Path::new(&out_dir).join("cossin_table.rs");
|
||||||
|
let mut file = File::create(dest_path).unwrap();
|
||||||
|
|
||||||
|
writeln!(file, "pub(crate) const COSSIN_DEPTH: usize = {};", DEPTH)
|
||||||
|
.unwrap();
|
||||||
|
write!(
|
||||||
|
file,
|
||||||
|
"pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// Treat sin and cos as unsigned values since the sign will always be
|
// Treat sin and cos as unsigned values since the sign will always be
|
||||||
// positive in the range [0, pi/4).
|
// positive in the range [0, pi/4).
|
||||||
const SINCOS_MAX: f64 = u16::MAX as f64;
|
// No headroom for interpolation rounding error (this is needed for
|
||||||
|
// DEPTH = 6 for example).
|
||||||
|
const AMPLITUDE: f64 = u16::MAX as f64;
|
||||||
|
|
||||||
|
for i in 0..(1 << DEPTH) {
|
||||||
|
// use midpoint samples to save one entry in the LUT
|
||||||
|
let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
|
||||||
|
// add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
|
||||||
|
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
|
||||||
|
let sin = (phase.sin() * AMPLITUDE).round() as u16;
|
||||||
|
if i % 4 == 0 {
|
||||||
|
write!(file, "\n ").unwrap();
|
||||||
|
}
|
||||||
|
write!(file, " ({}, {}),", cos, sin).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(file, "\n];").unwrap();
|
||||||
|
|
||||||
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let path = Path::new("src").join("cossin_table.txt");
|
write_cossin_table();
|
||||||
let display = path.display();
|
|
||||||
|
|
||||||
let mut file = match File::create(&path) {
|
|
||||||
Err(why) => panic!("failed to write to {}: {}", display, why),
|
|
||||||
Ok(file) => file,
|
|
||||||
};
|
|
||||||
|
|
||||||
match file.write_all("[\n".as_bytes()) {
|
|
||||||
Err(why) => panic!("failed to write to {}: {}", display, why),
|
|
||||||
Ok(_) => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
let phase_delta = PI / 4. / TABLE_SIZE as f64;
|
|
||||||
let phase_offset = phase_delta / 2.;
|
|
||||||
for i in 0..TABLE_SIZE {
|
|
||||||
let phase = phase_offset + phase_delta * (i as f64);
|
|
||||||
let cos = ((phase.cos() - 0.5) * 2. * SINCOS_MAX).round() as u16;
|
|
||||||
let sin = (phase.sin() * SINCOS_MAX).round() as u16;
|
|
||||||
let s = format!(" ({}, {}),\n", cos, sin);
|
|
||||||
match file.write_all(s.as_bytes()) {
|
|
||||||
Err(why) => panic!("failed to write to {}: {}", display, why),
|
|
||||||
Ok(_) => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
match file.write_all("]\n".as_bytes()) {
|
|
||||||
Err(why) => panic!("failed to write to {}: {}", display, why),
|
|
||||||
Ok(_) => (),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,8 @@ pub type Complex<T> = (T, T);
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// Shifted and rounded value.
|
/// Shifted and rounded value.
|
||||||
pub fn shift_round(x: i32, shift: i32) -> i32 {
|
#[inline(always)]
|
||||||
|
pub fn shift_round(x: i32, shift: usize) -> i32 {
|
||||||
(x + (1 << (shift - 1))) >> shift
|
(x + (1 << (shift - 1))) >> shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
189
dsp/src/trig.rs
189
dsp/src/trig.rs
|
@ -1,90 +1,71 @@
|
||||||
use super::{shift_round, Complex};
|
use super::Complex;
|
||||||
use core::mem::swap;
|
use core::f64::consts::PI;
|
||||||
|
|
||||||
const PHASE_BITS: i32 = 20;
|
include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
|
||||||
const LUT_DEPTH: i32 = 8;
|
|
||||||
const LUT_SIZE: usize = 1 << LUT_DEPTH as usize;
|
|
||||||
const OCTANT_BITS: i32 = 3;
|
|
||||||
const INTERPOLATION_BITS: i32 = PHASE_BITS - LUT_DEPTH - OCTANT_BITS;
|
|
||||||
static COSSIN_TABLE: [(u16, u16); LUT_SIZE] = include!("cossin_table.txt");
|
|
||||||
|
|
||||||
// Approximate pi/4 with an integer multiplier and right bit
|
|
||||||
// shift. The numerator is designed to saturate the i32 range.
|
|
||||||
const PI_4_NUMERATOR: i32 = 50;
|
|
||||||
const PI_4_RIGHT_SHIFT: i32 = 6;
|
|
||||||
|
|
||||||
/// Compute the cosine and sine of an angle.
|
/// Compute the cosine and sine of an angle.
|
||||||
|
/// This is ported from the MiSoC cossin core.
|
||||||
|
/// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
/// * `phase` - 32-bit phase.
|
||||||
/// `phase` - 20-bit fixed-point phase value.
|
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
|
||||||
/// The cos and sin values of the provided phase as a `Complex<i32>`
|
/// The cos and sin values of the provided phase as a `Complex<i32>`
|
||||||
/// value.
|
/// value. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
|
||||||
|
/// in each quadrature over 20 bit phase.
|
||||||
pub fn cossin(phase: i32) -> Complex<i32> {
|
pub fn cossin(phase: i32) -> Complex<i32> {
|
||||||
let mut phase = phase;
|
// Phase bits excluding the three highes MSB
|
||||||
let octant = (
|
const OCTANT_BITS: usize = 32 - 3;
|
||||||
(phase & (1 << (PHASE_BITS - 1))) >> (PHASE_BITS - 1),
|
|
||||||
(phase & (1 << (PHASE_BITS - 2))) >> (PHASE_BITS - 2),
|
|
||||||
(phase & (1 << (PHASE_BITS - 3))) >> (PHASE_BITS - 3),
|
|
||||||
);
|
|
||||||
|
|
||||||
// Mask off octant bits. This leaves the angle in the range [0,
|
// This is a slightly more compact way to compute the four flags for
|
||||||
// pi/4).
|
// octant mapping/unmapping used below.
|
||||||
phase &= (1 << (PHASE_BITS - OCTANT_BITS)) - 1;
|
let mut octant = (phase as u32) >> OCTANT_BITS;
|
||||||
|
octant ^= octant << 1;
|
||||||
|
|
||||||
if octant.2 == 1 {
|
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
|
||||||
|
let mut phase = phase & ((1 << OCTANT_BITS) - 1);
|
||||||
|
|
||||||
|
if octant & 1 != 0 {
|
||||||
// phase = pi/4 - phase
|
// phase = pi/4 - phase
|
||||||
phase = (1 << (INTERPOLATION_BITS + LUT_DEPTH)) - 1 - phase;
|
phase = (1 << OCTANT_BITS) - 1 - phase;
|
||||||
}
|
}
|
||||||
|
|
||||||
let interpolation: i32 = phase & ((1 << INTERPOLATION_BITS) - 1);
|
let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
|
||||||
|
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
|
||||||
|
// values and scale the sine values as encoded in the LUT.
|
||||||
|
let mut cos = lookup.0 as i32 + u16::MAX as i32;
|
||||||
|
let mut sin = (lookup.1 as i32) << 1;
|
||||||
|
|
||||||
phase >>= INTERPOLATION_BITS;
|
// 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
|
||||||
|
const ALIGN_MSB: usize = 32 - 16 - 1;
|
||||||
|
phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
|
||||||
|
phase &= (1 << ALIGN_MSB) - 1;
|
||||||
|
// The phase values used for the LUT are at midpoint for the truncated phase.
|
||||||
|
// Interpolate relative to the LUT entry midpoint.
|
||||||
|
phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
|
||||||
|
// Fixed point pi/4.
|
||||||
|
const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
|
||||||
|
// No rounding bias necessary here since we keep enough low bits.
|
||||||
|
let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
|
||||||
|
|
||||||
let (mut cos, mut sin) = {
|
// Make room for the sign bit.
|
||||||
let lookup = COSSIN_TABLE[phase as usize];
|
let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
|
||||||
(
|
let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
|
||||||
// 1/2 < cos(0<=x<=pi/4) <= 1. So, to spread out the cos
|
|
||||||
// values and use the space more efficiently, we can
|
|
||||||
// subtract 1/2 and multiply by 2. Therefore, we add 1
|
|
||||||
// back in here. The sin values must be multiplied by 2 to
|
|
||||||
// have the same scale as the cos values.
|
|
||||||
lookup.0 as i32 + u16::MAX as i32,
|
|
||||||
(lookup.1 as i32) << 1,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// The phase values used for the LUT are adjusted up by half the
|
cos = (cos << (ALIGN_MSB - 1)) - dcos;
|
||||||
// phase step. The interpolation must accurately reflect this. So,
|
sin = (sin << (ALIGN_MSB - 1)) + dsin;
|
||||||
// an interpolation phase offset less than half the maximum
|
|
||||||
// involves a negative phase offset. The rest us a non-negative
|
|
||||||
// phase offset.
|
|
||||||
let interpolation_factor =
|
|
||||||
(interpolation - (1 << (INTERPOLATION_BITS - 1))) * PI_4_NUMERATOR;
|
|
||||||
let dsin = shift_round(
|
|
||||||
cos * interpolation_factor,
|
|
||||||
LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
|
|
||||||
);
|
|
||||||
let dcos = shift_round(
|
|
||||||
-sin * interpolation_factor,
|
|
||||||
LUT_DEPTH + INTERPOLATION_BITS + PI_4_RIGHT_SHIFT,
|
|
||||||
);
|
|
||||||
|
|
||||||
cos += dcos;
|
// Unmap using octant bits.
|
||||||
sin += dsin;
|
if octant & 2 != 0 {
|
||||||
|
core::mem::swap(&mut sin, &mut cos);
|
||||||
if octant.1 ^ octant.2 == 1 {
|
|
||||||
swap(&mut sin, &mut cos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if octant.0 ^ octant.1 == 1 {
|
if octant & 4 != 0 {
|
||||||
cos *= -1;
|
cos *= -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if octant.0 == 1 {
|
if octant & 8 != 0 {
|
||||||
sin *= -1;
|
sin *= -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,35 +75,71 @@ pub fn cossin(phase: i32) -> Complex<i32> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use core::f64::consts::PI;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn error_max_rms_all_phase() {
|
fn error_max_rms_all_phase() {
|
||||||
let max_amplitude: f64 = ((1 << 15) - 1) as f64;
|
// Constant amplitude error due to LUT data range.
|
||||||
|
const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
|
||||||
|
const MAX_PHASE: f64 = (1i64 << 32) as f64;
|
||||||
let mut rms_err: Complex<f64> = (0., 0.);
|
let mut rms_err: Complex<f64> = (0., 0.);
|
||||||
|
let mut sum_err: Complex<f64> = (0., 0.);
|
||||||
|
let mut max_err: Complex<f64> = (0., 0.);
|
||||||
|
let mut sum: Complex<f64> = (0., 0.);
|
||||||
|
let mut demod: Complex<f64> = (0., 0.);
|
||||||
|
|
||||||
for i in 0..(1 << PHASE_BITS) {
|
// use std::{fs::File, io::prelude::*, path::Path};
|
||||||
let phase = i as i32;
|
// let mut file = File::create(Path::new("data.csv")).unwrap();
|
||||||
let radian_phase: f64 =
|
|
||||||
2. * PI * (phase as f64 + 0.5) / ((1 << PHASE_BITS) as f64);
|
|
||||||
|
|
||||||
let actual: Complex<f64> = (
|
const PHASE_DEPTH: usize = 20;
|
||||||
max_amplitude * radian_phase.cos(),
|
|
||||||
max_amplitude * radian_phase.sin(),
|
|
||||||
);
|
|
||||||
let computed = cossin(phase);
|
|
||||||
|
|
||||||
let err = (
|
for phase in 0..(1 << PHASE_DEPTH) {
|
||||||
computed.0 as f64 / 4. - actual.0,
|
let phase = (phase << (32 - PHASE_DEPTH)) as i32;
|
||||||
computed.1 as f64 / 4. - actual.1,
|
let have = cossin(phase);
|
||||||
);
|
// writeln!(file, " {},{}", have.0, have.1).unwrap();
|
||||||
rms_err.0 += err.0 * err.0 / (1 << PHASE_BITS) as f64;
|
|
||||||
rms_err.1 += err.1 * err.1 / (1 << PHASE_BITS) as f64;
|
|
||||||
|
|
||||||
assert!(err.0.abs() < 0.89);
|
let have = (have.0 as f64 / AMPLITUDE, have.1 as f64 / AMPLITUDE);
|
||||||
assert!(err.1.abs() < 0.89);
|
|
||||||
|
let radian_phase = 2. * PI * phase as f64 / MAX_PHASE;
|
||||||
|
let want = (radian_phase.cos(), radian_phase.sin());
|
||||||
|
|
||||||
|
sum.0 += have.0;
|
||||||
|
sum.1 += have.1;
|
||||||
|
|
||||||
|
demod.0 += have.0 * want.0 - have.1 * want.1;
|
||||||
|
demod.1 += have.1 * want.0 + have.0 * want.1;
|
||||||
|
|
||||||
|
let err = (have.0 - want.0, have.1 - want.1);
|
||||||
|
|
||||||
|
sum_err.0 += err.0;
|
||||||
|
sum_err.1 += err.1;
|
||||||
|
|
||||||
|
rms_err.0 += err.0 * err.0;
|
||||||
|
rms_err.1 += err.1 * err.1;
|
||||||
|
|
||||||
|
max_err.0 = max_err.0.max(err.0.abs());
|
||||||
|
max_err.1 = max_err.1.max(err.1.abs());
|
||||||
}
|
}
|
||||||
assert!(rms_err.0.sqrt() < 0.41);
|
rms_err.0 /= MAX_PHASE;
|
||||||
assert!(rms_err.1.sqrt() < 0.41);
|
rms_err.1 /= MAX_PHASE;
|
||||||
|
|
||||||
|
println!("sum: {:.2e} {:.2e}", sum.0, sum.1);
|
||||||
|
println!("demod: {:.2e} {:.2e}", demod.0, demod.1);
|
||||||
|
println!("sum_err: {:.2e} {:.2e}", sum_err.0, sum_err.1);
|
||||||
|
println!("rms: {:.2e} {:.2e}", rms_err.0.sqrt(), rms_err.1.sqrt());
|
||||||
|
println!("max: {:.2e} {:.2e}", max_err.0, max_err.1);
|
||||||
|
|
||||||
|
assert!(sum.0.abs() < 4e-10);
|
||||||
|
assert!(sum.1.abs() < 4e-10);
|
||||||
|
|
||||||
|
assert!(demod.0.abs() < 4e-10);
|
||||||
|
assert!(demod.1.abs() < 4e-10);
|
||||||
|
|
||||||
|
assert!(sum_err.0.abs() < 4e-10);
|
||||||
|
assert!(sum_err.1.abs() < 4e-10);
|
||||||
|
|
||||||
|
assert!(rms_err.0.sqrt() < 6e-8);
|
||||||
|
assert!(rms_err.1.sqrt() < 6e-8);
|
||||||
|
|
||||||
|
assert!(max_err.0 < 1.1e-5);
|
||||||
|
assert!(max_err.1 < 1.1e-5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue