cossin: shave off a few more instructions

master
Robert Jördens 2021-02-26 19:47:48 +01:00
parent eac19a2fe2
commit 067ef79560
2 changed files with 27 additions and 25 deletions

View File

@ -15,7 +15,7 @@ fn write_cossin_table() {
.unwrap();
write!(
file,
"pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
"pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = ["
)
.unwrap();
@ -29,12 +29,12 @@ fn write_cossin_table() {
// use midpoint samples to save one entry in the LUT
let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
// add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
let sin = (phase.sin() * AMPLITUDE).round() as u16;
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1;
let sin = (phase.sin() * AMPLITUDE).round() as u32;
if i % 4 == 0 {
write!(file, "\n ").unwrap();
}
write!(file, " ({}, {}),", cos, sin).unwrap();
write!(file, " {},", cos + (sin << 16)).unwrap();
}
writeln!(file, "\n];").unwrap();

View File

@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
/// in each quadrature over 20 bit phase.
pub fn cossin(phase: i32) -> (i32, i32) {
// Phase bits excluding the three highes MSB
// Phase bits excluding the three highest MSB
const OCTANT_BITS: usize = 32 - 3;
// This is a slightly more compact way to compute the four flags for
@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) {
let mut octant = (phase as u32) >> OCTANT_BITS;
octant ^= octant << 1;
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
let mut phase = phase & ((1 << OCTANT_BITS) - 1);
let mut phase = phase;
if octant & 1 != 0 {
// phase = pi/4 - phase
phase = (1 << OCTANT_BITS) - 1 - phase;
phase = !phase;
}
let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
// values and scale the sine values as encoded in the LUT.
let mut cos = lookup.0 as i32 + u16::MAX as i32;
let mut sin = (lookup.1 as i32) << 1;
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
phase &= (1 << OCTANT_BITS) - 1;
// 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
const ALIGN_MSB: usize = 32 - 16 - 1;
phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
let lookup = COSSIN[(phase >> ALIGN_MSB) as usize];
phase &= (1 << ALIGN_MSB) - 1;
// The phase values used for the LUT are at midpoint for the truncated phase.
// Interpolate relative to the LUT entry midpoint.
// Also cancel the -1 bias that was conditionally introduced above.
phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
// Fixed point pi/4.
const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
// No rounding bias necessary here since we keep enough low bits.
let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
// Make room for the sign bit.
let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
// Fixed point pi/4.
const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32;
// No rounding bias necessary here since we keep enough low bits.
let dphi = (phase * PI4) >> 16;
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
// values and scale the sine values as encoded in the LUT.
let mut cos = (lookup & 0xffff) as i32 + (1 << 16);
let mut sin = (lookup >> 16) as i32;
let dcos = (sin * dphi) >> COSSIN_DEPTH;
let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
cos = (cos << (ALIGN_MSB - 1)) - dcos;
sin = (sin << (ALIGN_MSB - 1)) + dsin;
sin = (sin << ALIGN_MSB) + dsin;
// Unmap using octant bits.
if octant & 2 != 0 {
core::mem::swap(&mut sin, &mut cos);
}
if octant & 4 != 0 {
cos *= -1;
cos = -cos;
}
if octant & 8 != 0 {
sin *= -1;
sin = -sin;
}
(cos, sin)