From e89db65722672bbbbc9178b5ba1214126854f17a Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 15:25:31 -0800 Subject: [PATCH 01/20] rename trig.rs -> cossin.rs --- dsp/src/{trig.rs => cossin.rs} | 0 dsp/src/lib.rs | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) rename dsp/src/{trig.rs => cossin.rs} (100%) diff --git a/dsp/src/trig.rs b/dsp/src/cossin.rs similarity index 100% rename from dsp/src/trig.rs rename to dsp/src/cossin.rs diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 6dd20f7..ef0c131 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -18,10 +18,11 @@ pub fn shift_round(x: i32, shift: usize) -> i32 { (x + (1 << (shift - 1))) >> shift } +pub mod atan2; +pub mod cossin; pub mod iir; pub mod lockin; pub mod pll; -pub mod trig; pub mod unwrap; #[cfg(test)] From 17f9f0750eee1ec0fff98d46720e8a45b8702148 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:01:50 -0800 Subject: [PATCH 02/20] dsp: move abs to lib.rs --- dsp/src/iir.rs | 12 +----------- dsp/src/lib.rs | 13 +++++++++++++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index c6f2100..48c92e9 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -2,23 +2,13 @@ use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; use core::f32; +use super::abs; // These are implemented here because core::f32 doesn't have them (yet). // They are naive and don't handle inf/nan. // `compiler-intrinsics`/llvm should have better (robust, universal, and // faster) implementations. -fn abs(x: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if x >= T::default() { - x - } else { - -x - } -} - fn copysign(x: T, y: T) -> T where T: PartialOrd + Default + Neg, diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index ef0c131..2fbd121 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,6 +1,8 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] +use core::ops::Neg; + pub type Complex = (T, T); /// Round up half. @@ -18,6 +20,17 @@ pub fn shift_round(x: i32, shift: usize) -> i32 { (x + (1 << (shift - 1))) >> shift } +fn abs(x: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if x >= T::default() { + x + } else { + -x + } +} + pub mod atan2; pub mod cossin; pub mod iir; From 6d651da758f44d0a8b6702cb4e364da3fad276fa Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:02:17 -0800 Subject: [PATCH 03/20] dsp: add f64 isclose testing function --- dsp/src/testing.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs index 1a8e109..098ec87 100644 --- a/dsp/src/testing.rs +++ b/dsp/src/testing.rs @@ -1,6 +1,10 @@ use super::Complex; -pub fn isclose(a: f32, b: f32, rtol: f32, atol: f32) -> bool { +pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool { + (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol +} + +pub fn isclosef(a: f32, b: f32, rtol: f32, atol: f32) -> bool { (a - b).abs() <= a.abs().max(b.abs()) * rtol + atol } @@ -10,7 +14,7 @@ pub fn complex_isclose( rtol: f32, atol: f32, ) -> bool { - isclose(a.0, b.0, rtol, atol) && isclose(a.1, b.1, rtol, atol) + isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol) } pub fn complex_allclose( From 5d055b01a03f31b0950ce5e36c214a1fc6289a96 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:02:42 -0800 Subject: [PATCH 04/20] dsp: add atan2 --- dsp/src/atan2.rs | 126 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 dsp/src/atan2.rs diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs new file mode 100644 index 0000000..a5f4d3e --- /dev/null +++ b/dsp/src/atan2.rs @@ -0,0 +1,126 @@ +use super::{abs, shift_round}; + +/// 2-argument arctangent function. +/// +/// This implementation uses all integer arithmetic for fast +/// computation. It is designed to have high accuracy near the axes +/// and lower away from the axes. It is additionally designed so that +/// the error changes slowly with respect to the angle. +/// +/// # Arguments +/// +/// * `y` - Y-axis component. +/// * `x` - X-axis component. +/// +/// # Returns +/// +/// The angle between the x-axis and the ray to the point (x,y). The +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// corresponds to an angle of -pi and i32::MAX corresponds to an +/// angle of +pi. +pub fn atan2(y: i32, x: i32) -> i32 { + let y = y >> 16; + let x = x >> 16; + + let ux = abs::(x); + let uy = abs::(y); + + // Uses the general procedure described in the following + // Mathematics stack exchange answer: + // + // https://math.stackexchange.com/a/1105038/583981 + // + // The atan approximation method has been modified to be cheaper + // to compute and to be more compatible with integer + // arithmetic. The approximation technique used here is + // + // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // + // which is taken from Rajan 2006: Efficient Approximations for + // the Arctangent Function. + let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + + if max == 0 { + return 0; + } + + let ratio = (min << 15) / max; + + let mut angle = { + // pi/4, referenced to i16::MAX + const PI_4_FACTOR: i32 = 25735; + // 0.285, referenced to i16::MAX + const FACTOR_0285: i32 = 9339; + // 1/pi, referenced to u16::MAX + const PI_INVERTED_FACTOR: i32 = 20861; + + let r1 = shift_round(ratio * PI_4_FACTOR, 15); + let r2 = shift_round( + (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), + 15, + ); + (r1 + r2) * PI_INVERTED_FACTOR + }; + + if uy > ux { + angle = (i32::MAX >> 1) - angle; + } + + if x < 0 { + angle = i32::MAX - angle; + } + + if y < 0 { + angle *= -1; + } + + angle +} + +#[cfg(test)] +mod tests { + use super::*; + use core::f64::consts::PI; + use crate::testing::isclose; + + fn angle_to_axis(angle: f64) -> f64 { + let angle = angle % (PI / 2.); + (PI / 2. - angle).min(angle) + } + + #[test] + fn absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } +} From e257545321cb27fd808c35f5c7d8ddedca632fbb Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:14:11 -0800 Subject: [PATCH 05/20] fix formatting --- dsp/src/atan2.rs | 2 +- dsp/src/iir.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs index a5f4d3e..2643d19 100644 --- a/dsp/src/atan2.rs +++ b/dsp/src/atan2.rs @@ -80,8 +80,8 @@ pub fn atan2(y: i32, x: i32) -> i32 { #[cfg(test)] mod tests { use super::*; - use core::f64::consts::PI; use crate::testing::isclose; + use core::f64::consts::PI; fn angle_to_axis(angle: f64) -> f64 { let angle = angle % (PI / 2.); diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 48c92e9..5ff0970 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,8 +1,8 @@ use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; -use core::f32; use super::abs; +use core::f32; // These are implemented here because core::f32 doesn't have them (yet). // They are naive and don't handle inf/nan. From 7c4f6082068d8ae3bda2896f29b00c7b4ab50ee4 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:26:44 -0800 Subject: [PATCH 06/20] move cossin and atan2 into the same trig file --- dsp/benches/cossin.rs | 2 +- dsp/src/atan2.rs | 126 --------------------------------- dsp/src/lib.rs | 3 +- dsp/src/{cossin.rs => trig.rs} | 123 +++++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 130 deletions(-) delete mode 100644 dsp/src/atan2.rs rename dsp/src/{cossin.rs => trig.rs} (57%) diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs index 4e23774..9f88e1b 100644 --- a/dsp/benches/cossin.rs +++ b/dsp/benches/cossin.rs @@ -1,6 +1,6 @@ use core::f32::consts::PI; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::trig::cossin; +use dsp::cossin::cossin; fn cossin_bench(c: &mut Criterion) { let zi = -0x7304_2531_i32; diff --git a/dsp/src/atan2.rs b/dsp/src/atan2.rs deleted file mode 100644 index 2643d19..0000000 --- a/dsp/src/atan2.rs +++ /dev/null @@ -1,126 +0,0 @@ -use super::{abs, shift_round}; - -/// 2-argument arctangent function. -/// -/// This implementation uses all integer arithmetic for fast -/// computation. It is designed to have high accuracy near the axes -/// and lower away from the axes. It is additionally designed so that -/// the error changes slowly with respect to the angle. -/// -/// # Arguments -/// -/// * `y` - Y-axis component. -/// * `x` - X-axis component. -/// -/// # Returns -/// -/// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN to i32::MAX, where i32::MIN -/// corresponds to an angle of -pi and i32::MAX corresponds to an -/// angle of +pi. -pub fn atan2(y: i32, x: i32) -> i32 { - let y = y >> 16; - let x = x >> 16; - - let ux = abs::(x); - let uy = abs::(y); - - // Uses the general procedure described in the following - // Mathematics stack exchange answer: - // - // https://math.stackexchange.com/a/1105038/583981 - // - // The atan approximation method has been modified to be cheaper - // to compute and to be more compatible with integer - // arithmetic. The approximation technique used here is - // - // pi / 4 * x + 0.285 * x * (1 - abs(x)) - // - // which is taken from Rajan 2006: Efficient Approximations for - // the Arctangent Function. - let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; - - if max == 0 { - return 0; - } - - let ratio = (min << 15) / max; - - let mut angle = { - // pi/4, referenced to i16::MAX - const PI_4_FACTOR: i32 = 25735; - // 0.285, referenced to i16::MAX - const FACTOR_0285: i32 = 9339; - // 1/pi, referenced to u16::MAX - const PI_INVERTED_FACTOR: i32 = 20861; - - let r1 = shift_round(ratio * PI_4_FACTOR, 15); - let r2 = shift_round( - (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), - 15, - ); - (r1 + r2) * PI_INVERTED_FACTOR - }; - - if uy > ux { - angle = (i32::MAX >> 1) - angle; - } - - if x < 0 { - angle = i32::MAX - angle; - } - - if y < 0 { - angle *= -1; - } - - angle -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::testing::isclose; - use core::f64::consts::PI; - - fn angle_to_axis(angle: f64) -> f64 { - let angle = angle % (PI / 2.); - (PI / 2. - angle).min(angle) - } - - #[test] - fn absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; - } - - for &x in test_vals.iter() { - for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } - } - } -} diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 2fbd121..90f62f6 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -31,11 +31,10 @@ where } } -pub mod atan2; -pub mod cossin; pub mod iir; pub mod lockin; pub mod pll; +pub mod trig; pub mod unwrap; #[cfg(test)] diff --git a/dsp/src/cossin.rs b/dsp/src/trig.rs similarity index 57% rename from dsp/src/cossin.rs rename to dsp/src/trig.rs index 5a99232..72435e0 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/trig.rs @@ -1,8 +1,85 @@ -use super::Complex; +use super::{abs, shift_round, Complex}; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); +/// 2-argument arctangent function. +/// +/// This implementation uses all integer arithmetic for fast +/// computation. It is designed to have high accuracy near the axes +/// and lower away from the axes. It is additionally designed so that +/// the error changes slowly with respect to the angle. +/// +/// # Arguments +/// +/// * `y` - Y-axis component. +/// * `x` - X-axis component. +/// +/// # Returns +/// +/// The angle between the x-axis and the ray to the point (x,y). The +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// corresponds to an angle of -pi and i32::MAX corresponds to an +/// angle of +pi. +pub fn atan2(y: i32, x: i32) -> i32 { + let y = y >> 16; + let x = x >> 16; + + let ux = abs::(x); + let uy = abs::(y); + + // Uses the general procedure described in the following + // Mathematics stack exchange answer: + // + // https://math.stackexchange.com/a/1105038/583981 + // + // The atan approximation method has been modified to be cheaper + // to compute and to be more compatible with integer + // arithmetic. The approximation technique used here is + // + // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // + // which is taken from Rajan 2006: Efficient Approximations for + // the Arctangent Function. + let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + + if max == 0 { + return 0; + } + + let ratio = (min << 15) / max; + + let mut angle = { + // pi/4, referenced to i16::MAX + const PI_4_FACTOR: i32 = 25735; + // 0.285, referenced to i16::MAX + const FACTOR_0285: i32 = 9339; + // 1/pi, referenced to u16::MAX + const PI_INVERTED_FACTOR: i32 = 20861; + + let r1 = shift_round(ratio * PI_4_FACTOR, 15); + let r2 = shift_round( + (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), + 15, + ); + (r1 + r2) * PI_INVERTED_FACTOR + }; + + if uy > ux { + angle = (i32::MAX >> 1) - angle; + } + + if x < 0 { + angle = i32::MAX - angle; + } + + if y < 0 { + angle *= -1; + } + + angle +} + /// Compute the cosine and sine of an angle. /// This is ported from the MiSoC cossin core. /// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py) @@ -75,6 +152,14 @@ pub fn cossin(phase: i32) -> Complex { #[cfg(test)] mod tests { use super::*; + use crate::testing::isclose; + use core::f64::consts::PI; + + fn angle_to_axis(angle: f64) -> f64 { + let angle = angle % (PI / 2.); + (PI / 2. - angle).min(angle) + } + #[test] fn error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. @@ -143,4 +228,40 @@ mod tests { assert!(max_err.0 < 1.1e-5); assert!(max_err.1 < 1.1e-5); } + + #[test] + fn absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } } From 85ae70fe6205ce25e6b80840c92aaf1e4221d2a5 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:28:49 -0800 Subject: [PATCH 07/20] rename trig tests to delineate between cossin and atan2 --- dsp/src/trig.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 72435e0..4dc26be 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -161,7 +161,7 @@ mod tests { } #[test] - fn error_max_rms_all_phase() { + fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64; const MAX_PHASE: f64 = (1i64 << 32) as f64; @@ -230,7 +230,7 @@ mod tests { } #[test] - fn absolute_error() { + fn atan2_absolute_error() { const NUM_VALS: usize = 1_001; let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; let val_bounds: (f64, f64) = (-1., 1.); From 2ddaab8fae34b4f01d2f2029eb18f3676e41ab56 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Wed, 16 Dec 2020 16:57:18 -0800 Subject: [PATCH 08/20] dsp: fix bench import path --- dsp/benches/cossin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs index 9f88e1b..4e23774 100644 --- a/dsp/benches/cossin.rs +++ b/dsp/benches/cossin.rs @@ -1,6 +1,6 @@ use core::f32::consts::PI; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::cossin::cossin; +use dsp::trig::cossin; fn cossin_bench(c: &mut Criterion) { let zi = -0x7304_2531_i32; From d9d500743f41aa150c055263aedfdba36c4ffbd0 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 08:02:54 -0800 Subject: [PATCH 09/20] simplify atan initial angle expression --- dsp/src/trig.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 4dc26be..e306356 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -50,19 +50,13 @@ pub fn atan2(y: i32, x: i32) -> i32 { let ratio = (min << 15) / max; let mut angle = { - // pi/4, referenced to i16::MAX - const PI_4_FACTOR: i32 = 25735; - // 0.285, referenced to i16::MAX - const FACTOR_0285: i32 = 9339; - // 1/pi, referenced to u16::MAX - const PI_INVERTED_FACTOR: i32 = 20861; + const K1: i32 = + ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32; + const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32; - let r1 = shift_round(ratio * PI_4_FACTOR, 15); - let r2 = shift_round( - (shift_round(ratio * FACTOR_0285, 15)) * (i16::MAX as i32 - ratio), - 15, - ); - (r1 + r2) * PI_INVERTED_FACTOR + let ratio_squared = shift_round(ratio * ratio, 15); + + ratio * K1 - K2 * ratio_squared }; if uy > ux { From d7111a3aa811deed34f01c0682ee6fada8978c61 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 08:04:53 -0800 Subject: [PATCH 10/20] dsp/trig: let compiler infer type parameter in atan2 abs call --- dsp/src/trig.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index e306356..51e8b2e 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -25,8 +25,8 @@ pub fn atan2(y: i32, x: i32) -> i32 { let y = y >> 16; let x = x >> 16; - let ux = abs::(x); - let uy = abs::(y); + let ux = abs(x); + let uy = abs(y); // Uses the general procedure described in the following // Mathematics stack exchange answer: From 5717991ada1847549b3154a20803f21c5688c3e5 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:31:18 -0800 Subject: [PATCH 11/20] atan2: result range is from i32::MIN+1 to i32::MAX --- dsp/src/trig.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 51e8b2e..57e4fec 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -18,7 +18,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// # Returns /// /// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1 /// corresponds to an angle of -pi and i32::MAX corresponds to an /// angle of +pi. pub fn atan2(y: i32, x: i32) -> i32 { From cb38c3e3bd3f28f32bfba3ed2abf048772af45a9 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:31:38 -0800 Subject: [PATCH 12/20] atan2: clarify sharing bits between atan argument and constant factors --- dsp/src/trig.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 57e4fec..d25d50b 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -47,16 +47,21 @@ pub fn atan2(y: i32, x: i32) -> i32 { return 0; } - let ratio = (min << 15) / max; + // We need to share the 31 available non-sign bits between the + // atan argument and constant factors used in the atan + // approximation. Sharing the bits roughly equally between them + // gives good accuracy. + const ATAN_ARGUMENT_BITS: usize = 15; + let ratio = (min << ATAN_ARGUMENT_BITS) / max; let mut angle = { - const K1: i32 = - ((1_f64 / 4_f64 + 0.285_f64 / PI) * (1 << 16) as f64) as i32; - const K2: i32 = ((0.285_f64 / PI) * (1 << 16) as f64) as i32; + const K1: i32 = ((1. / 4. + 0.285 / PI) + * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) + as i32; + const K2: i32 = + ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32; - let ratio_squared = shift_round(ratio * ratio, 15); - - ratio * K1 - K2 * ratio_squared + ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) }; if uy > ux { From 1f28949bc5d859144a6fc96cdf28cd6959dd2e29 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 09:47:39 -0800 Subject: [PATCH 13/20] atan2: store sign bits and greater of |x| and |y| --- dsp/src/trig.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index d25d50b..53aecfa 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -1,4 +1,4 @@ -use super::{abs, shift_round, Complex}; +use super::{shift_round, Complex}; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); @@ -22,11 +22,18 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// corresponds to an angle of -pi and i32::MAX corresponds to an /// angle of +pi. pub fn atan2(y: i32, x: i32) -> i32 { - let y = y >> 16; - let x = x >> 16; + let mut y = y >> 16; + let mut x = x >> 16; - let ux = abs(x); - let uy = abs(y); + let sign = ((y >> 14) & 2) | ((x >> 15) & 1); + if sign & 1 == 1 { + x *= -1; + } + if sign & 2 == 2 { + y *= -1; + } + + let y_greater = y > x; // Uses the general procedure described in the following // Mathematics stack exchange answer: @@ -41,7 +48,7 @@ pub fn atan2(y: i32, x: i32) -> i32 { // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - let (min, max) = if ux < uy { (ux, uy) } else { (uy, ux) }; + let (min, max) = if y_greater { (x, y) } else { (y, x) }; if max == 0 { return 0; @@ -64,15 +71,15 @@ pub fn atan2(y: i32, x: i32) -> i32 { ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) }; - if uy > ux { + if y_greater { angle = (i32::MAX >> 1) - angle; } - if x < 0 { + if sign & 1 == 1 { angle = i32::MAX - angle; } - if y < 0 { + if sign & 2 == 2 { angle *= -1; } From 56641d5838cba59fa55d82af2e0d495185b227cc Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:02:35 -0800 Subject: [PATCH 14/20] atan2: specify why we cannot use more than 15 bits for the atan argument --- dsp/src/trig.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 53aecfa..6d0acff 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -57,7 +57,9 @@ pub fn atan2(y: i32, x: i32) -> i32 { // We need to share the 31 available non-sign bits between the // atan argument and constant factors used in the atan // approximation. Sharing the bits roughly equally between them - // gives good accuracy. + // gives good accuracy. Additionally, we cannot increase the + // number of atan argument bits beyond 15 because we must square + // it. const ATAN_ARGUMENT_BITS: usize = 15; let ratio = (min << ATAN_ARGUMENT_BITS) / max; From 09a744f59c5771fa044acd1694809e4d055ef157 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:03:16 -0800 Subject: [PATCH 15/20] dsp: move iir generic math functions to top-level module scope --- dsp/src/iir.rs | 69 +------------------------------------------------- dsp/src/lib.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 69 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 5ff0970..dbec8d8 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,75 +1,8 @@ -use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; -use super::abs; +use super::{abs, copysign, macc, max, min}; use core::f32; -// These are implemented here because core::f32 doesn't have them (yet). -// They are naive and don't handle inf/nan. -// `compiler-intrinsics`/llvm should have better (robust, universal, and -// faster) implementations. - -fn copysign(x: T, y: T) -> T -where - T: PartialOrd + Default + Neg, -{ - if (x >= T::default() && y >= T::default()) - || (x <= T::default() && y <= T::default()) - { - x - } else { - -x - } -} - -#[cfg(not(feature = "nightly"))] -fn max(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x > y { - x - } else { - y - } -} - -#[cfg(not(feature = "nightly"))] -fn min(x: T, y: T) -> T -where - T: PartialOrd, -{ - if x < y { - x - } else { - y - } -} - -#[cfg(feature = "nightly")] -fn max(x: f32, y: f32) -> f32 { - core::intrinsics::maxnumf32(x, y) -} - -#[cfg(feature = "nightly")] -fn min(x: f32, y: f32) -> f32 { - core::intrinsics::minnumf32(x, y) -} - -// Multiply-accumulate vectors `x` and `a`. -// -// A.k.a. dot product. -// Rust/LLVM optimize this nicely. -fn macc(y0: T, x: &[T], a: &[T]) -> T -where - T: Add + Mul + Copy, -{ - x.iter() - .zip(a) - .map(|(x, a)| *x * *a) - .fold(y0, |y, xa| y + xa) -} - /// IIR state and coefficients type. /// /// To represent the IIR state (input and output memory) during the filter update diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 90f62f6..67b1882 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,7 +1,7 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] -use core::ops::Neg; +use core::ops::{Add, Mul, Neg}; pub type Complex = (T, T); @@ -31,6 +31,72 @@ where } } +// These are implemented here because core::f32 doesn't have them (yet). +// They are naive and don't handle inf/nan. +// `compiler-intrinsics`/llvm should have better (robust, universal, and +// faster) implementations. + +fn copysign(x: T, y: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if (x >= T::default() && y >= T::default()) + || (x <= T::default() && y <= T::default()) + { + x + } else { + -x + } +} + +#[cfg(not(feature = "nightly"))] +fn max(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x > y { + x + } else { + y + } +} + +#[cfg(not(feature = "nightly"))] +fn min(x: T, y: T) -> T +where + T: PartialOrd, +{ + if x < y { + x + } else { + y + } +} + +#[cfg(feature = "nightly")] +fn max(x: f32, y: f32) -> f32 { + core::intrinsics::maxnumf32(x, y) +} + +#[cfg(feature = "nightly")] +fn min(x: f32, y: f32) -> f32 { + core::intrinsics::minnumf32(x, y) +} + +// Multiply-accumulate vectors `x` and `a`. +// +// A.k.a. dot product. +// Rust/LLVM optimize this nicely. +fn macc(y0: T, x: &[T], a: &[T]) -> T +where + T: Add + Mul + Copy, +{ + x.iter() + .zip(a) + .map(|(x, a)| *x * *a) + .fold(y0, |y, xa| y + xa) +} + pub mod iir; pub mod lockin; pub mod pll; From 6ffc42021edbcf34e4eabd2a3213b8d191e22e5e Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 10:09:12 -0800 Subject: [PATCH 16/20] move atan2 test before cossin test to mimic function order --- dsp/src/trig.rs | 72 ++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 6d0acff..ccf9292 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -168,6 +168,42 @@ mod tests { (PI / 2. - angle).min(angle) } + #[test] + fn atan2_absolute_error() { + const NUM_VALS: usize = 1_001; + let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; + let val_bounds: (f64, f64) = (-1., 1.); + let val_delta: f64 = + (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; + for i in 0..NUM_VALS { + test_vals[i] = val_bounds.0 + i as f64 * val_delta; + } + + for &x in test_vals.iter() { + for &y in test_vals.iter() { + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; + let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() + / i16::MAX as f64; + let tol = atol + rtol * angle_to_axis(actual).abs(); + let computed = (atan2( + ((y * i16::MAX as f64) as i32) << 16, + ((x * i16::MAX as f64) as i32) << 16, + ) >> 16) as f64 + / i16::MAX as f64 + * PI; + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", x, y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } + } + } + #[test] fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. @@ -236,40 +272,4 @@ mod tests { assert!(max_err.0 < 1.1e-5); assert!(max_err.1 < 1.1e-5); } - - #[test] - fn atan2_absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; - } - - for &x in test_vals.iter() { - for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } - } - } } From 9c5e68ceea82c8f9096473cfb74d0ef930d34843 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 11:34:39 -0800 Subject: [PATCH 17/20] atan2: test min and max angle inputs --- dsp/src/trig.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index ccf9292..5d73846 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -179,10 +179,10 @@ mod tests { test_vals[i] = val_bounds.0 + i as f64 * val_delta; } + let atol: f64 = 4e-5; + let rtol: f64 = 0.127; for &x in test_vals.iter() { for &y in test_vals.iter() { - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() / i16::MAX as f64; let tol = atol + rtol * angle_to_axis(actual).abs(); @@ -202,6 +202,29 @@ mod tests { } } } + + // test min and max explicitly + for (x, y) in [ + ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32), + ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32), + ] + .iter() + { + let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64; + let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64; + let actual = + (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64; + let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI; + let tol = atol + rtol * angle_to_axis(actual).abs(); + + if !isclose(computed, actual, 0., tol) { + println!("(x, y) : {}, {}", *x, *y); + println!("actual : {}", actual); + println!("computed : {}", computed); + println!("tolerance: {}\n", tol); + assert!(false); + } + } } #[test] From 17cf71f22bc3978cb50f21c50367f108aa8f9ee9 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 11:39:32 -0800 Subject: [PATCH 18/20] atan2: replace min, max with x, y --- dsp/src/trig.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 5d73846..13ce844 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -48,9 +48,11 @@ pub fn atan2(y: i32, x: i32) -> i32 { // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - let (min, max) = if y_greater { (x, y) } else { (y, x) }; + if y_greater { + core::mem::swap(&mut x, &mut y); + } - if max == 0 { + if x == 0 { return 0; } @@ -61,7 +63,7 @@ pub fn atan2(y: i32, x: i32) -> i32 { // number of atan argument bits beyond 15 because we must square // it. const ATAN_ARGUMENT_BITS: usize = 15; - let ratio = (min << ATAN_ARGUMENT_BITS) / max; + let ratio = (y << ATAN_ARGUMENT_BITS) / x; let mut angle = { const K1: i32 = ((1. / 4. + 0.285 / PI) From 3125365a1580731d104d74f3023f7c8fb5e1ff9e Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 14:01:57 -0800 Subject: [PATCH 19/20] add atan2 host benchmark --- dsp/Cargo.toml | 2 +- dsp/benches/cossin.rs | 13 ------------- dsp/benches/trig.rs | 28 ++++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 14 deletions(-) delete mode 100644 dsp/benches/cossin.rs create mode 100644 dsp/benches/trig.rs diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml index 8313a49..548e64f 100644 --- a/dsp/Cargo.toml +++ b/dsp/Cargo.toml @@ -12,7 +12,7 @@ serde = { version = "1.0", features = ["derive"], default-features = false } criterion = "0.3" [[bench]] -name = "cossin" +name = "trig" harness = false [features] diff --git a/dsp/benches/cossin.rs b/dsp/benches/cossin.rs deleted file mode 100644 index 4e23774..0000000 --- a/dsp/benches/cossin.rs +++ /dev/null @@ -1,13 +0,0 @@ -use core::f32::consts::PI; -use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use dsp::trig::cossin; - -fn cossin_bench(c: &mut Criterion) { - let zi = -0x7304_2531_i32; - let zf = zi as f32 / i32::MAX as f32 * PI; - c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); - c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); -} - -criterion_group!(benches, cossin_bench); -criterion_main!(benches); diff --git a/dsp/benches/trig.rs b/dsp/benches/trig.rs new file mode 100644 index 0000000..19b6cce --- /dev/null +++ b/dsp/benches/trig.rs @@ -0,0 +1,28 @@ +use core::f32::consts::PI; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use dsp::trig::{atan2, cossin}; + +fn atan2_bench(c: &mut Criterion) { + let xi = (10 << 16) as i32; + let xf = xi as f32 / i32::MAX as f32; + + let yi = (-26_328 << 16) as i32; + let yf = yi as f32 / i32::MAX as f32; + + c.bench_function("atan2(y, x)", |b| { + b.iter(|| atan2(black_box(yi), black_box(xi))) + }); + c.bench_function("y.atan2(x)", |b| { + b.iter(|| black_box(yf).atan2(black_box(xf))) + }); +} + +fn cossin_bench(c: &mut Criterion) { + let zi = -0x7304_2531_i32; + let zf = zi as f32 / i32::MAX as f32 * PI; + c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi)))); + c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos())); +} + +criterion_group!(benches, atan2_bench, cossin_bench); +criterion_main!(benches); From 7e794373f45c942cd3f108b32afe743cf52cf777 Mon Sep 17 00:00:00 2001 From: Matt Huszagh Date: Thu, 17 Dec 2020 14:21:39 -0800 Subject: [PATCH 20/20] atan2: fix output range description --- dsp/src/trig.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 13ce844..9873d7e 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -18,9 +18,9 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// # Returns /// /// The angle between the x-axis and the ray to the point (x,y). The -/// result range is from i32::MIN+1 to i32::MAX, where i32::MIN+1 -/// corresponds to an angle of -pi and i32::MAX corresponds to an -/// angle of +pi. +/// result range is from i32::MIN to i32::MAX, where i32::MIN +/// represents -pi and, equivalently, +pi. i32::MAX represents one +/// count less than +pi. pub fn atan2(y: i32, x: i32) -> i32 { let mut y = y >> 16; let mut x = x >> 16;