Merge #207
207: atan r=jordens a=matthuszagh Adds 2-argument arctangent function. Parameters and result are `i32` integers for fast computation. Only the 16 MSBs of the inputs are used (16 LSBs are discarded). The x and y inputs can range from -1 to 1, which corresponds to `i32::MIN` and `i32::MAX`, respectively. The output ranges from -pi to pi, which corresponds to `i32::MIN` and `i32::MAX`, respectively. - [godbolt](https://rust.godbolt.org/z/nahKrT) # Related - #206 Co-authored-by: Matt Huszagh <huszaghmatt@gmail.com>
This commit is contained in:
commit
2f122d12fa
|
@ -12,7 +12,7 @@ serde = { version = "1.0", features = ["derive"], default-features = false }
|
||||||
criterion = "0.3"
|
criterion = "0.3"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "cossin"
|
name = "trig"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
use core::f32::consts::PI;
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
|
||||||
use dsp::trig::cossin;
|
|
||||||
|
|
||||||
fn cossin_bench(c: &mut Criterion) {
|
|
||||||
let zi = -0x7304_2531_i32;
|
|
||||||
let zf = zi as f32 / i32::MAX as f32 * PI;
|
|
||||||
c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi))));
|
|
||||||
c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos()));
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, cossin_bench);
|
|
||||||
criterion_main!(benches);
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
use core::f32::consts::PI;
|
||||||
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
use dsp::trig::{atan2, cossin};
|
||||||
|
|
||||||
|
fn atan2_bench(c: &mut Criterion) {
|
||||||
|
let xi = (10 << 16) as i32;
|
||||||
|
let xf = xi as f32 / i32::MAX as f32;
|
||||||
|
|
||||||
|
let yi = (-26_328 << 16) as i32;
|
||||||
|
let yf = yi as f32 / i32::MAX as f32;
|
||||||
|
|
||||||
|
c.bench_function("atan2(y, x)", |b| {
|
||||||
|
b.iter(|| atan2(black_box(yi), black_box(xi)))
|
||||||
|
});
|
||||||
|
c.bench_function("y.atan2(x)", |b| {
|
||||||
|
b.iter(|| black_box(yf).atan2(black_box(xf)))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cossin_bench(c: &mut Criterion) {
|
||||||
|
let zi = -0x7304_2531_i32;
|
||||||
|
let zf = zi as f32 / i32::MAX as f32 * PI;
|
||||||
|
c.bench_function("cossin(zi)", |b| b.iter(|| cossin(black_box(zi))));
|
||||||
|
c.bench_function("zf.sin_cos()", |b| b.iter(|| black_box(zf).sin_cos()));
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, atan2_bench, cossin_bench);
|
||||||
|
criterion_main!(benches);
|
|
@ -1,85 +1,8 @@
|
||||||
use core::ops::{Add, Mul, Neg};
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::{abs, copysign, macc, max, min};
|
||||||
use core::f32;
|
use core::f32;
|
||||||
|
|
||||||
// These are implemented here because core::f32 doesn't have them (yet).
|
|
||||||
// They are naive and don't handle inf/nan.
|
|
||||||
// `compiler-intrinsics`/llvm should have better (robust, universal, and
|
|
||||||
// faster) implementations.
|
|
||||||
|
|
||||||
fn abs<T>(x: T) -> T
|
|
||||||
where
|
|
||||||
T: PartialOrd + Default + Neg<Output = T>,
|
|
||||||
{
|
|
||||||
if x >= T::default() {
|
|
||||||
x
|
|
||||||
} else {
|
|
||||||
-x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn copysign<T>(x: T, y: T) -> T
|
|
||||||
where
|
|
||||||
T: PartialOrd + Default + Neg<Output = T>,
|
|
||||||
{
|
|
||||||
if (x >= T::default() && y >= T::default())
|
|
||||||
|| (x <= T::default() && y <= T::default())
|
|
||||||
{
|
|
||||||
x
|
|
||||||
} else {
|
|
||||||
-x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(feature = "nightly"))]
|
|
||||||
fn max<T>(x: T, y: T) -> T
|
|
||||||
where
|
|
||||||
T: PartialOrd,
|
|
||||||
{
|
|
||||||
if x > y {
|
|
||||||
x
|
|
||||||
} else {
|
|
||||||
y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(feature = "nightly"))]
|
|
||||||
fn min<T>(x: T, y: T) -> T
|
|
||||||
where
|
|
||||||
T: PartialOrd,
|
|
||||||
{
|
|
||||||
if x < y {
|
|
||||||
x
|
|
||||||
} else {
|
|
||||||
y
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "nightly")]
|
|
||||||
fn max(x: f32, y: f32) -> f32 {
|
|
||||||
core::intrinsics::maxnumf32(x, y)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "nightly")]
|
|
||||||
fn min(x: f32, y: f32) -> f32 {
|
|
||||||
core::intrinsics::minnumf32(x, y)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multiply-accumulate vectors `x` and `a`.
|
|
||||||
//
|
|
||||||
// A.k.a. dot product.
|
|
||||||
// Rust/LLVM optimize this nicely.
|
|
||||||
fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
|
|
||||||
where
|
|
||||||
T: Add<Output = T> + Mul<Output = T> + Copy,
|
|
||||||
{
|
|
||||||
x.iter()
|
|
||||||
.zip(a)
|
|
||||||
.map(|(x, a)| *x * *a)
|
|
||||||
.fold(y0, |y, xa| y + xa)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// IIR state and coefficients type.
|
/// IIR state and coefficients type.
|
||||||
///
|
///
|
||||||
/// To represent the IIR state (input and output memory) during the filter update
|
/// To represent the IIR state (input and output memory) during the filter update
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#![cfg_attr(not(test), no_std)]
|
#![cfg_attr(not(test), no_std)]
|
||||||
#![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]
|
#![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))]
|
||||||
|
|
||||||
|
use core::ops::{Add, Mul, Neg};
|
||||||
|
|
||||||
pub type Complex<T> = (T, T);
|
pub type Complex<T> = (T, T);
|
||||||
|
|
||||||
/// Round up half.
|
/// Round up half.
|
||||||
|
@ -18,6 +20,83 @@ pub fn shift_round(x: i32, shift: usize) -> i32 {
|
||||||
(x + (1 << (shift - 1))) >> shift
|
(x + (1 << (shift - 1))) >> shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn abs<T>(x: T) -> T
|
||||||
|
where
|
||||||
|
T: PartialOrd + Default + Neg<Output = T>,
|
||||||
|
{
|
||||||
|
if x >= T::default() {
|
||||||
|
x
|
||||||
|
} else {
|
||||||
|
-x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are implemented here because core::f32 doesn't have them (yet).
|
||||||
|
// They are naive and don't handle inf/nan.
|
||||||
|
// `compiler-intrinsics`/llvm should have better (robust, universal, and
|
||||||
|
// faster) implementations.
|
||||||
|
|
||||||
|
fn copysign<T>(x: T, y: T) -> T
|
||||||
|
where
|
||||||
|
T: PartialOrd + Default + Neg<Output = T>,
|
||||||
|
{
|
||||||
|
if (x >= T::default() && y >= T::default())
|
||||||
|
|| (x <= T::default() && y <= T::default())
|
||||||
|
{
|
||||||
|
x
|
||||||
|
} else {
|
||||||
|
-x
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nightly"))]
|
||||||
|
fn max<T>(x: T, y: T) -> T
|
||||||
|
where
|
||||||
|
T: PartialOrd,
|
||||||
|
{
|
||||||
|
if x > y {
|
||||||
|
x
|
||||||
|
} else {
|
||||||
|
y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "nightly"))]
|
||||||
|
fn min<T>(x: T, y: T) -> T
|
||||||
|
where
|
||||||
|
T: PartialOrd,
|
||||||
|
{
|
||||||
|
if x < y {
|
||||||
|
x
|
||||||
|
} else {
|
||||||
|
y
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "nightly")]
|
||||||
|
fn max(x: f32, y: f32) -> f32 {
|
||||||
|
core::intrinsics::maxnumf32(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "nightly")]
|
||||||
|
fn min(x: f32, y: f32) -> f32 {
|
||||||
|
core::intrinsics::minnumf32(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiply-accumulate vectors `x` and `a`.
|
||||||
|
//
|
||||||
|
// A.k.a. dot product.
|
||||||
|
// Rust/LLVM optimize this nicely.
|
||||||
|
fn macc<T>(y0: T, x: &[T], a: &[T]) -> T
|
||||||
|
where
|
||||||
|
T: Add<Output = T> + Mul<Output = T> + Copy,
|
||||||
|
{
|
||||||
|
x.iter()
|
||||||
|
.zip(a)
|
||||||
|
.map(|(x, a)| *x * *a)
|
||||||
|
.fold(y0, |y, xa| y + xa)
|
||||||
|
}
|
||||||
|
|
||||||
pub mod iir;
|
pub mod iir;
|
||||||
pub mod lockin;
|
pub mod lockin;
|
||||||
pub mod pll;
|
pub mod pll;
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
use super::Complex;
|
use super::Complex;
|
||||||
|
|
||||||
pub fn isclose(a: f32, b: f32, rtol: f32, atol: f32) -> bool {
|
pub fn isclose(a: f64, b: f64, rtol: f64, atol: f64) -> bool {
|
||||||
|
(a - b).abs() <= a.abs().max(b.abs()) * rtol + atol
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn isclosef(a: f32, b: f32, rtol: f32, atol: f32) -> bool {
|
||||||
(a - b).abs() <= a.abs().max(b.abs()) * rtol + atol
|
(a - b).abs() <= a.abs().max(b.abs()) * rtol + atol
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +14,7 @@ pub fn complex_isclose(
|
||||||
rtol: f32,
|
rtol: f32,
|
||||||
atol: f32,
|
atol: f32,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
isclose(a.0, b.0, rtol, atol) && isclose(a.1, b.1, rtol, atol)
|
isclosef(a.0, b.0, rtol, atol) && isclosef(a.1, b.1, rtol, atol)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn complex_allclose(
|
pub fn complex_allclose(
|
||||||
|
|
158
dsp/src/trig.rs
158
dsp/src/trig.rs
|
@ -1,8 +1,95 @@
|
||||||
use super::Complex;
|
use super::{shift_round, Complex};
|
||||||
use core::f64::consts::PI;
|
use core::f64::consts::PI;
|
||||||
|
|
||||||
include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
|
include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
|
||||||
|
|
||||||
|
/// 2-argument arctangent function.
|
||||||
|
///
|
||||||
|
/// This implementation uses all integer arithmetic for fast
|
||||||
|
/// computation. It is designed to have high accuracy near the axes
|
||||||
|
/// and lower away from the axes. It is additionally designed so that
|
||||||
|
/// the error changes slowly with respect to the angle.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `y` - Y-axis component.
|
||||||
|
/// * `x` - X-axis component.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// The angle between the x-axis and the ray to the point (x,y). The
|
||||||
|
/// result range is from i32::MIN to i32::MAX, where i32::MIN
|
||||||
|
/// represents -pi and, equivalently, +pi. i32::MAX represents one
|
||||||
|
/// count less than +pi.
|
||||||
|
pub fn atan2(y: i32, x: i32) -> i32 {
|
||||||
|
let mut y = y >> 16;
|
||||||
|
let mut x = x >> 16;
|
||||||
|
|
||||||
|
let sign = ((y >> 14) & 2) | ((x >> 15) & 1);
|
||||||
|
if sign & 1 == 1 {
|
||||||
|
x *= -1;
|
||||||
|
}
|
||||||
|
if sign & 2 == 2 {
|
||||||
|
y *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let y_greater = y > x;
|
||||||
|
|
||||||
|
// Uses the general procedure described in the following
|
||||||
|
// Mathematics stack exchange answer:
|
||||||
|
//
|
||||||
|
// https://math.stackexchange.com/a/1105038/583981
|
||||||
|
//
|
||||||
|
// The atan approximation method has been modified to be cheaper
|
||||||
|
// to compute and to be more compatible with integer
|
||||||
|
// arithmetic. The approximation technique used here is
|
||||||
|
//
|
||||||
|
// pi / 4 * x + 0.285 * x * (1 - abs(x))
|
||||||
|
//
|
||||||
|
// which is taken from Rajan 2006: Efficient Approximations for
|
||||||
|
// the Arctangent Function.
|
||||||
|
if y_greater {
|
||||||
|
core::mem::swap(&mut x, &mut y);
|
||||||
|
}
|
||||||
|
|
||||||
|
if x == 0 {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to share the 31 available non-sign bits between the
|
||||||
|
// atan argument and constant factors used in the atan
|
||||||
|
// approximation. Sharing the bits roughly equally between them
|
||||||
|
// gives good accuracy. Additionally, we cannot increase the
|
||||||
|
// number of atan argument bits beyond 15 because we must square
|
||||||
|
// it.
|
||||||
|
const ATAN_ARGUMENT_BITS: usize = 15;
|
||||||
|
let ratio = (y << ATAN_ARGUMENT_BITS) / x;
|
||||||
|
|
||||||
|
let mut angle = {
|
||||||
|
const K1: i32 = ((1. / 4. + 0.285 / PI)
|
||||||
|
* (1 << (31 - ATAN_ARGUMENT_BITS)) as f64)
|
||||||
|
as i32;
|
||||||
|
const K2: i32 =
|
||||||
|
((0.285 / PI) * (1 << (31 - ATAN_ARGUMENT_BITS)) as f64) as i32;
|
||||||
|
|
||||||
|
ratio * K1 - K2 * shift_round(ratio * ratio, ATAN_ARGUMENT_BITS)
|
||||||
|
};
|
||||||
|
|
||||||
|
if y_greater {
|
||||||
|
angle = (i32::MAX >> 1) - angle;
|
||||||
|
}
|
||||||
|
|
||||||
|
if sign & 1 == 1 {
|
||||||
|
angle = i32::MAX - angle;
|
||||||
|
}
|
||||||
|
|
||||||
|
if sign & 2 == 2 {
|
||||||
|
angle *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
angle
|
||||||
|
}
|
||||||
|
|
||||||
/// Compute the cosine and sine of an angle.
|
/// Compute the cosine and sine of an angle.
|
||||||
/// This is ported from the MiSoC cossin core.
|
/// This is ported from the MiSoC cossin core.
|
||||||
/// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
|
/// (https://github.com/m-labs/misoc/blob/master/misoc/cores/cossin.py)
|
||||||
|
@ -75,8 +162,75 @@ pub fn cossin(phase: i32) -> Complex<i32> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::testing::isclose;
|
||||||
|
use core::f64::consts::PI;
|
||||||
|
|
||||||
|
fn angle_to_axis(angle: f64) -> f64 {
|
||||||
|
let angle = angle % (PI / 2.);
|
||||||
|
(PI / 2. - angle).min(angle)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn error_max_rms_all_phase() {
|
fn atan2_absolute_error() {
|
||||||
|
const NUM_VALS: usize = 1_001;
|
||||||
|
let mut test_vals: [f64; NUM_VALS] = [0.; NUM_VALS];
|
||||||
|
let val_bounds: (f64, f64) = (-1., 1.);
|
||||||
|
let val_delta: f64 =
|
||||||
|
(val_bounds.1 - val_bounds.0) / (NUM_VALS - 1) as f64;
|
||||||
|
for i in 0..NUM_VALS {
|
||||||
|
test_vals[i] = val_bounds.0 + i as f64 * val_delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
let atol: f64 = 4e-5;
|
||||||
|
let rtol: f64 = 0.127;
|
||||||
|
for &x in test_vals.iter() {
|
||||||
|
for &y in test_vals.iter() {
|
||||||
|
let actual = (y.atan2(x) as f64 * i16::MAX as f64).round()
|
||||||
|
/ i16::MAX as f64;
|
||||||
|
let tol = atol + rtol * angle_to_axis(actual).abs();
|
||||||
|
let computed = (atan2(
|
||||||
|
((y * i16::MAX as f64) as i32) << 16,
|
||||||
|
((x * i16::MAX as f64) as i32) << 16,
|
||||||
|
) >> 16) as f64
|
||||||
|
/ i16::MAX as f64
|
||||||
|
* PI;
|
||||||
|
|
||||||
|
if !isclose(computed, actual, 0., tol) {
|
||||||
|
println!("(x, y) : {}, {}", x, y);
|
||||||
|
println!("actual : {}", actual);
|
||||||
|
println!("computed : {}", computed);
|
||||||
|
println!("tolerance: {}\n", tol);
|
||||||
|
assert!(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test min and max explicitly
|
||||||
|
for (x, y) in [
|
||||||
|
((i16::MIN as i32 + 1) << 16, -(1 << 16) as i32),
|
||||||
|
((i16::MIN as i32 + 1) << 16, (1 << 16) as i32),
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
{
|
||||||
|
let yf = *y as f64 / ((i16::MAX as i32) << 16) as f64;
|
||||||
|
let xf = *x as f64 / ((i16::MAX as i32) << 16) as f64;
|
||||||
|
let actual =
|
||||||
|
(yf.atan2(xf) * i16::MAX as f64).round() / i16::MAX as f64;
|
||||||
|
let computed = (atan2(*y, *x) >> 16) as f64 / i16::MAX as f64 * PI;
|
||||||
|
let tol = atol + rtol * angle_to_axis(actual).abs();
|
||||||
|
|
||||||
|
if !isclose(computed, actual, 0., tol) {
|
||||||
|
println!("(x, y) : {}, {}", *x, *y);
|
||||||
|
println!("actual : {}", actual);
|
||||||
|
println!("computed : {}", computed);
|
||||||
|
println!("tolerance: {}\n", tol);
|
||||||
|
assert!(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cossin_error_max_rms_all_phase() {
|
||||||
// Constant amplitude error due to LUT data range.
|
// Constant amplitude error due to LUT data range.
|
||||||
const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
|
const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as f64;
|
||||||
const MAX_PHASE: f64 = (1i64 << 32) as f64;
|
const MAX_PHASE: f64 = (1i64 << 32) as f64;
|
||||||
|
|
Loading…
Reference in New Issue