From 067ef79560623ff6ae461173ad313aa546763ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Fri, 26 Feb 2021 19:47:48 +0100 Subject: [PATCH] cossin: shave off a few more instructions --- dsp/build.rs | 8 ++++---- dsp/src/cossin.rs | 44 +++++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/dsp/build.rs b/dsp/build.rs index ca43736..5e5fe1d 100644 --- a/dsp/build.rs +++ b/dsp/build.rs @@ -15,7 +15,7 @@ fn write_cossin_table() { .unwrap(); write!( file, - "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = [" + "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = [" ) .unwrap(); @@ -29,12 +29,12 @@ fn write_cossin_table() { // use midpoint samples to save one entry in the LUT let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5); // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 - let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16; - let sin = (phase.sin() * AMPLITUDE).round() as u16; + let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1; + let sin = (phase.sin() * AMPLITUDE).round() as u32; if i % 4 == 0 { write!(file, "\n ").unwrap(); } - write!(file, " ({}, {}),", cos, sin).unwrap(); + write!(file, " {},", cos + (sin << 16)).unwrap(); } writeln!(file, "\n];").unwrap(); diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 4a5cccd..2f21cec 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { - // Phase bits excluding the three highes MSB + // Phase bits excluding the three highest MSB const OCTANT_BITS: usize = 32 - 3; // This is a slightly more compact way to compute the four flags for @@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) { let mut octant = (phase as u32) >> OCTANT_BITS; octant ^= octant << 1; - // Mask off octant bits. This leaves the angle in the range [0, pi/4). - let mut phase = phase & ((1 << OCTANT_BITS) - 1); - + let mut phase = phase; if octant & 1 != 0 { // phase = pi/4 - phase - phase = (1 << OCTANT_BITS) - 1 - phase; + phase = !phase; } - let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize]; - // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos - // values and scale the sine values as encoded in the LUT. - let mut cos = lookup.0 as i32 + u16::MAX as i32; - let mut sin = (lookup.1 as i32) << 1; + // Mask off octant bits. This leaves the angle in the range [0, pi/4). + phase &= (1 << OCTANT_BITS) - 1; // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range. const ALIGN_MSB: usize = 32 - 16 - 1; phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB; + + let lookup = COSSIN[(phase >> ALIGN_MSB) as usize]; phase &= (1 << ALIGN_MSB) - 1; + // The phase values used for the LUT are at midpoint for the truncated phase. // Interpolate relative to the LUT entry midpoint. + // Also cancel the -1 bias that was conditionally introduced above. phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32; - // Fixed point pi/4. - const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32; - // No rounding bias necessary here since we keep enough low bits. - let dphi = (phase * PI4) >> (32 - ALIGN_MSB); - // Make room for the sign bit. - let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1); + // Fixed point pi/4. + const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32; + // No rounding bias necessary here since we keep enough low bits. + let dphi = (phase * PI4) >> 16; + + // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos + // values and scale the sine values as encoded in the LUT. + let mut cos = (lookup & 0xffff) as i32 + (1 << 16); + let mut sin = (lookup >> 16) as i32; + + let dcos = (sin * dphi) >> COSSIN_DEPTH; let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1); cos = (cos << (ALIGN_MSB - 1)) - dcos; - sin = (sin << (ALIGN_MSB - 1)) + dsin; + sin = (sin << ALIGN_MSB) + dsin; // Unmap using octant bits. if octant & 2 != 0 { core::mem::swap(&mut sin, &mut cos); } - if octant & 4 != 0 { - cos *= -1; + cos = -cos; } - if octant & 8 != 0 { - sin *= -1; + sin = -sin; } (cos, sin)