Merge pull request #284 from quartiq/cossin-tweaks
cossin: shave off a few more instructions
This commit is contained in:
commit
83496205b3
@ -15,7 +15,7 @@ fn write_cossin_table() {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
write!(
|
write!(
|
||||||
file,
|
file,
|
||||||
"pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
|
"pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = ["
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -29,12 +29,12 @@ fn write_cossin_table() {
|
|||||||
// use midpoint samples to save one entry in the LUT
|
// use midpoint samples to save one entry in the LUT
|
||||||
let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
|
let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
|
||||||
// add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
|
// add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
|
||||||
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
|
let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1;
|
||||||
let sin = (phase.sin() * AMPLITUDE).round() as u16;
|
let sin = (phase.sin() * AMPLITUDE).round() as u32;
|
||||||
if i % 4 == 0 {
|
if i % 4 == 0 {
|
||||||
write!(file, "\n ").unwrap();
|
write!(file, "\n ").unwrap();
|
||||||
}
|
}
|
||||||
write!(file, " ({}, {}),", cos, sin).unwrap();
|
write!(file, " {},", cos + (sin << 16)).unwrap();
|
||||||
}
|
}
|
||||||
writeln!(file, "\n];").unwrap();
|
writeln!(file, "\n];").unwrap();
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
|
|||||||
/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
|
/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
|
||||||
/// in each quadrature over 20 bit phase.
|
/// in each quadrature over 20 bit phase.
|
||||||
pub fn cossin(phase: i32) -> (i32, i32) {
|
pub fn cossin(phase: i32) -> (i32, i32) {
|
||||||
// Phase bits excluding the three highes MSB
|
// Phase bits excluding the three highest MSB
|
||||||
const OCTANT_BITS: usize = 32 - 3;
|
const OCTANT_BITS: usize = 32 - 3;
|
||||||
|
|
||||||
// This is a slightly more compact way to compute the four flags for
|
// This is a slightly more compact way to compute the four flags for
|
||||||
@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) {
|
|||||||
let mut octant = (phase as u32) >> OCTANT_BITS;
|
let mut octant = (phase as u32) >> OCTANT_BITS;
|
||||||
octant ^= octant << 1;
|
octant ^= octant << 1;
|
||||||
|
|
||||||
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
|
let mut phase = phase;
|
||||||
let mut phase = phase & ((1 << OCTANT_BITS) - 1);
|
|
||||||
|
|
||||||
if octant & 1 != 0 {
|
if octant & 1 != 0 {
|
||||||
// phase = pi/4 - phase
|
// phase = pi/4 - phase
|
||||||
phase = (1 << OCTANT_BITS) - 1 - phase;
|
phase = !phase;
|
||||||
}
|
}
|
||||||
|
|
||||||
let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
|
// Mask off octant bits. This leaves the angle in the range [0, pi/4).
|
||||||
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
|
phase &= (1 << OCTANT_BITS) - 1;
|
||||||
// values and scale the sine values as encoded in the LUT.
|
|
||||||
let mut cos = lookup.0 as i32 + u16::MAX as i32;
|
|
||||||
let mut sin = (lookup.1 as i32) << 1;
|
|
||||||
|
|
||||||
// 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
|
// 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
|
||||||
const ALIGN_MSB: usize = 32 - 16 - 1;
|
const ALIGN_MSB: usize = 32 - 16 - 1;
|
||||||
phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
|
phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
|
||||||
|
|
||||||
|
let lookup = COSSIN[(phase >> ALIGN_MSB) as usize];
|
||||||
phase &= (1 << ALIGN_MSB) - 1;
|
phase &= (1 << ALIGN_MSB) - 1;
|
||||||
|
|
||||||
// The phase values used for the LUT are at midpoint for the truncated phase.
|
// The phase values used for the LUT are at midpoint for the truncated phase.
|
||||||
// Interpolate relative to the LUT entry midpoint.
|
// Interpolate relative to the LUT entry midpoint.
|
||||||
|
// Also cancel the -1 bias that was conditionally introduced above.
|
||||||
phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
|
phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
|
||||||
// Fixed point pi/4.
|
|
||||||
const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
|
|
||||||
// No rounding bias necessary here since we keep enough low bits.
|
|
||||||
let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
|
|
||||||
|
|
||||||
// Make room for the sign bit.
|
// Fixed point pi/4.
|
||||||
let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
|
const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32;
|
||||||
|
// No rounding bias necessary here since we keep enough low bits.
|
||||||
|
let dphi = (phase * PI4) >> 16;
|
||||||
|
|
||||||
|
// 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
|
||||||
|
// values and scale the sine values as encoded in the LUT.
|
||||||
|
let mut cos = (lookup & 0xffff) as i32 + (1 << 16);
|
||||||
|
let mut sin = (lookup >> 16) as i32;
|
||||||
|
|
||||||
|
let dcos = (sin * dphi) >> COSSIN_DEPTH;
|
||||||
let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
|
let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
|
||||||
|
|
||||||
cos = (cos << (ALIGN_MSB - 1)) - dcos;
|
cos = (cos << (ALIGN_MSB - 1)) - dcos;
|
||||||
sin = (sin << (ALIGN_MSB - 1)) + dsin;
|
sin = (sin << ALIGN_MSB) + dsin;
|
||||||
|
|
||||||
// Unmap using octant bits.
|
// Unmap using octant bits.
|
||||||
if octant & 2 != 0 {
|
if octant & 2 != 0 {
|
||||||
core::mem::swap(&mut sin, &mut cos);
|
core::mem::swap(&mut sin, &mut cos);
|
||||||
}
|
}
|
||||||
|
|
||||||
if octant & 4 != 0 {
|
if octant & 4 != 0 {
|
||||||
cos *= -1;
|
cos = -cos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if octant & 8 != 0 {
|
if octant & 8 != 0 {
|
||||||
sin *= -1;
|
sin = -sin;
|
||||||
}
|
}
|
||||||
|
|
||||||
(cos, sin)
|
(cos, sin)
|
||||||
|
Loading…
Reference in New Issue
Block a user