From 12d5945d811062cfa06b83cc4d51b8962d0646a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Fri, 18 Dec 2020 15:46:21 +0100 Subject: [PATCH 1/2] dsp/testing: simplify --- dsp/src/testing.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dsp/src/testing.rs b/dsp/src/testing.rs index 098ec87..4a14f22 100644 --- a/dsp/src/testing.rs +++ b/dsp/src/testing.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] use super::Complex; pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool { @@ -23,9 +24,7 @@ pub fn complex_allclose( rtol: f32, atol: f32, ) -> bool { - let mut result: bool = true; - a.iter().zip(b.iter()).for_each(|(i, j)| { - result &= complex_isclose(*i, *j, rtol, atol); - }); - result + a.iter() + .zip(b) + .all(|(&i, &j)| complex_isclose(i, j, rtol, atol)) } From 8d9af70c19d2606a99b68a70cb03b2834f152f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sun, 20 Dec 2020 20:35:26 +0100 Subject: [PATCH 2/2] trig/atan2: refine * use dynamic scaling of the inputs to get accurate ratios (effectively floating point) to maintain accuracy for small arguments * this also allows shifting later and keep more bits * use u32 ratio to keep one more bit * merge the corner case unittests into the big test value list * print rms, absolute and axis-relative angle * simplify the correction expression to get rid of one multiplication * use 5 bit for the correction constant and 15 bits for r * least squares optimal correction constant, this lowers the max error below 5e-5 --- dsp/src/trig.rs | 154 +++++++++++++++++++++--------------------------- 1 file changed, 66 insertions(+), 88 deletions(-) diff --git a/dsp/src/trig.rs b/dsp/src/trig.rs index 9873d7e..3f96609 100644 --- a/dsp/src/trig.rs +++ b/dsp/src/trig.rs @@ -1,4 +1,4 @@ -use super::{shift_round, Complex}; +use super::Complex; use core::f64::consts::PI; include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); @@ -22,18 +22,25 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// represents -pi and, equivalently, +pi. i32::MAX represents one /// count less than +pi. pub fn atan2(y: i32, x: i32) -> i32 { - let mut y = y >> 16; - let mut x = x >> 16; + let sign = (x < 0, y < 0); - let sign = ((y >> 14) & 2) | ((x >> 15) & 1); - if sign & 1 == 1 { - x *= -1; - } - if sign & 2 == 2 { - y *= -1; - } + let mut y = y.wrapping_abs() as u32; + let mut x = x.wrapping_abs() as u32; let y_greater = y > x; + if y_greater { + core::mem::swap(&mut y, &mut x); + } + + let z = (16 - y.leading_zeros() as i32).max(0); + + x >>= z; + if x == 0 { + return 0; + } + y >>= z; + let r = (y << 16) / x; + debug_assert!(r <= 1 << 16); // Uses the general procedure described in the following // Mathematics stack exchange answer: @@ -44,47 +51,37 @@ pub fn atan2(y: i32, x: i32) -> i32 { // to compute and to be more compatible with integer // arithmetic. The approximation technique used here is // - // pi / 4 * x + 0.285 * x * (1 - abs(x)) + // pi / 4 * r + C * r * (1 - abs(r)) // // which is taken from Rajan 2006: Efficient Approximations for // the Arctangent Function. - if y_greater { - core::mem::swap(&mut x, &mut y); - } + // + // The least mean squared error solution is C = 0.279 (no the 0.285 that + // Rajan uses). K = C*4/pi. + // Q5 for K provides sufficient correction accuracy while preserving + // as much smoothness of the quadratic correction as possible. + const FP_K: usize = 5; + const K: u32 = (0.35489 * (1 << FP_K) as f64) as u32; + // debug_assert!(K == 11); - if x == 0 { - return 0; - } - - // We need to share the 31 available non-sign bits between the - // atan argument and constant factors used in the atan - // approximation. Sharing the bits roughly equally between them - // gives good accuracy. Additionally, we cannot increase the - // number of atan argument bits beyond 15 because we must square - // it. - const ATAN_ARGUMENT_BITS: usize = 15; - let ratio = (y << ATAN_ARGUMENT_BITS) / x; - - let mut angle = { - const K1: i32 = ((1. / 4. + 0.285 / PI) - * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) - as i32; - const K2: i32 = - ((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32; - - ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS) - }; + // `r` is unsigned Q16.16 and <= 1 + // `angle` is signed Q1.31 with 1 << 31 == +- pi + // Since K < 0.5 and r*(1 - r) <= 0.25 the correction product can use + // 4 bits for K, and 15 bits for r and 1-r to remain within the u32 range. + let mut angle = ((r << 13) + + ((K * (r >> 1) * ((1 << 15) - (r >> 1))) >> (FP_K + 1))) + as i32; if y_greater { - angle = (i32::MAX >> 1) - angle; + angle = (1 << 30) - angle; } - if sign & 1 == 1 { + if sign.0 { angle = i32::MAX - angle; } - if sign & 2 == 2 { - angle *= -1; + if sign.1 { + angle = angle.wrapping_neg(); } angle @@ -162,7 +159,6 @@ pub fn cossin(phase: i32) -> Complex { #[cfg(test)] mod tests { use super::*; - use crate::testing::isclose; use core::f64::consts::PI; fn angle_to_axis(angle: f64) -> f64 { @@ -172,61 +168,43 @@ mod tests { #[test] fn atan2_absolute_error() { - const NUM_VALS: usize = 1_001; - let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS]; - let val_bounds: (f64, f64) = (-1., 1.); - let val_delta: f64 = - (val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64; - for i in 0..NUM_VALS { - test_vals[i] = val_bounds.0 + i as f64 * val_delta; + const N: usize = 321; + let mut test_vals = [0i32; N + 4]; + let scale = (1i64 << 31) as f64; + for i in 0..N { + test_vals[i] = (scale * (-1. + 2. * i as f64 / N as f64)) as i32; } - let atol: f64 = 4e-5; - let rtol: f64 = 0.127; + assert!(test_vals.contains(&i32::MIN)); + test_vals[N] = i32::MAX; + test_vals[N + 1] = 0; + test_vals[N + 2] = -1; + test_vals[N + 3] = 1; + + let mut rms_err = 0f64; + let mut abs_err = 0f64; + let mut rel_err = 0f64; + for &x in test_vals.iter() { for &y in test_vals.iter() { - let actual = (y.atan2(x) as f64 * i16::MAX as f64).round() - / i16::MAX as f64; - let tol = atol + rtol * angle_to_axis(actual).abs(); - let computed = (atan2( - ((y * i16::MAX as f64) as i32) << 16, - ((x * i16::MAX as f64) as i32) << 16, - ) >> 16) as f64 - / i16::MAX as f64 - * PI; + let want = (y as f64 / scale).atan2(x as f64 / scale); + let have = atan2(y, x) as f64 * PI / scale; - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", x, y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); + let err = (have - want).abs(); + abs_err = abs_err.max(err); + rms_err += err * err; + if err > 3e-5 { + rel_err = rel_err.max(err / angle_to_axis(want)); } } } - - // test min and max explicitly - for (x, y) in [ - ((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32), - ((i16::MIN as i32 + 1) << 16, (1 << 16) as i32), - ] - .iter() - { - let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64; - let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64; - let actual = - (yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64; - let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI; - let tol = atol + rtol * angle_to_axis(actual).abs(); - - if !isclose(computed, actual, 0., tol) { - println!("(x, y) : {}, {}", *x, *y); - println!("actual : {}", actual); - println!("computed : {}", computed); - println!("tolerance: {}\n", tol); - assert!(false); - } - } + rms_err = rms_err.sqrt() / test_vals.len() as f64; + println!("max abs err: {:.2e}", abs_err); + println!("rms abs err: {:.2e}", rms_err); + println!("max rel err: {:.2e}", rel_err); + assert!(abs_err < 5e-3); + assert!(rms_err < 3e-3); + assert!(rel_err < 0.6); } #[test]