diff --git a/.cargo/config b/.cargo/config index 382c36e..ea1d6c0 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,6 +1,16 @@ [target.'cfg(all(target_arch = "arm", target_os = "none"))'] runner = "gdb-multiarch -q -x openocd.gdb" -rustflags = ["-C", "link-arg=-Tlink.x"] +rustflags = [ + "-C", "link-arg=-Tlink.x", +# The target (below) defaults to cortex-m4 +# There currently are two different options to go beyond that: +# 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet + "-C", "target-cpu=cortex-m7", +# 2. cortex-m4 with the additional fpv5 instructions and a potentially +# better-than-nothing instruction schedule + "-C", "target-feature=+fp-armv8d16", +# When combined they are equivalent to (1) alone +] [build] target = "thumbv7em-none-eabihf" diff --git a/Cargo.toml b/Cargo.toml index 301956c..896eecf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,7 @@ branch = "dma" [features] semihosting = ["panic-semihosting", "cortex-m-log/semihosting"] bkpt = [ ] -nightly = ["cortex-m/inline-asm"] +nightly = ["cortex-m/inline-asm", "dsp/nightly"] [profile.dev] codegen-units = 1 diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml index 625d0f0..c8ef52b 100644 --- a/dsp/Cargo.toml +++ b/dsp/Cargo.toml @@ -6,3 +6,6 @@ edition = "2018" [dependencies] serde = { version = "1.0", features = ["derive"], default-features = false } + +[features] +nightly = [] diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index fac1c4c..c6f2100 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,4 +1,4 @@ -use core::ops::{Add, Mul}; +use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; use core::f32; @@ -8,23 +8,35 @@ use core::f32; // `compiler-intrinsics`/llvm should have better (robust, universal, and // faster) implementations. -fn abs(x: f32) -> f32 { - if x >= 0. { +fn abs(x: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if x >= T::default() { x } else { -x } } -fn copysign(x: f32, y: f32) -> f32 { - if (x >= 0. && y >= 0.) || (x <= 0. && y <= 0.) { +fn copysign(x: T, y: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if (x >= T::default() && y >= T::default()) + || (x <= T::default() && y <= T::default()) + { x } else { -x } } -fn max(x: f32, y: f32) -> f32 { +#[cfg(not(feature = "nightly"))] +fn max(x: T, y: T) -> T +where + T: PartialOrd, +{ if x > y { x } else { @@ -32,7 +44,11 @@ fn max(x: f32, y: f32) -> f32 { } } -fn min(x: f32, y: f32) -> f32 { +#[cfg(not(feature = "nightly"))] +fn min(x: T, y: T) -> T +where + T: PartialOrd, +{ if x < y { x } else { @@ -40,6 +56,16 @@ fn min(x: f32, y: f32) -> f32 { } } +#[cfg(feature = "nightly")] +fn max(x: f32, y: f32) -> f32 { + core::intrinsics::maxnumf32(x, y) +} + +#[cfg(feature = "nightly")] +fn min(x: f32, y: f32) -> f32 { + core::intrinsics::minnumf32(x, y) +} + // Multiply-accumulate vectors `x` and `a`. // // A.k.a. dot product. @@ -50,7 +76,7 @@ where { x.iter() .zip(a) - .map(|(&x, &a)| x * a) + .map(|(x, a)| *x * *a) .fold(y0, |y, xa| y + xa) } @@ -58,10 +84,10 @@ where /// /// To represent the IIR state (input and output memory) during the filter update /// this contains the three inputs (x0, x1, x2) and the two outputs (y1, y2) -/// concatenated. +/// concatenated. Lower indices correspond to more recent samples. /// To represent the IIR coefficients, this contains the feed-forward -/// coefficients (b0, b1, b2) followd by the feed-back coefficients (a1, a2), -/// all normalized such that a0 = 1. +/// coefficients (b0, b1, b2) followd by the negated feed-back coefficients +/// (-a1, -a2), all five normalized such that a0 = 1. pub type IIRState = [f32; 5]; /// IIR configuration. @@ -159,10 +185,13 @@ impl IIR { /// * `xy` - Current filter state. /// * `x0` - New input. pub fn update(&self, xy: &mut IIRState, x0: f32) -> f32 { + let n = self.ba.len(); + debug_assert!(xy.len() == n); // `xy` contains x0 x1 y0 y1 y2 // Increment time x1 x2 y1 y2 y3 - // Rotate y3 x1 x2 y1 y2 - xy.rotate_right(1); + // Shift x1 x1 x2 y1 y2 + // This unrolls better than xy.rotate_right(1) + xy.copy_within(0..n - 1, 1); // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; // Compute y0 by multiply-accumulate @@ -170,7 +199,7 @@ impl IIR { // Limit y0 let y0 = max(self.y_min, min(self.y_max, y0)); // Store y0 x0 x1 y0 y1 y2 - xy[xy.len() / 2] = y0; + xy[n / 2] = y0; y0 } } diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 3c44bbc..b2acf34 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,3 +1,4 @@ #![no_std] +#![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] pub mod iir; diff --git a/src/main.rs b/src/main.rs index e6f83b5..845a9b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,9 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { let gpiod = unsafe { &*hal::stm32::GPIOD::ptr() }; gpiod.odr.modify(|_, w| w.odr6().high().odr12().high()); // FP_LED_1, FP_LED_3 + #[cfg(feature = "nightly")] + core::intrinsics::abort(); + #[cfg(not(feature = "nightly"))] unsafe { core::intrinsics::abort(); } @@ -760,7 +763,11 @@ const APP: () = { let x = f32::from(adc_samples[channel][sample] as i16); let y = c.resources.iir_ch[channel] .update(&mut c.resources.iir_state[channel], x); - dac_samples[channel][sample] = y as i16 as u16 ^ 0x8000; + // Note(unsafe): The filter limits ensure that the value is in range. + // The truncation introduces 1/2 LSB distortion. + let y = unsafe { y.to_int_unchecked::() }; + // Convert to DAC code + dac_samples[channel][sample] = y as u16 ^ 0x8000; } } let [dac0, dac1] = dac_samples;