From b0e0b5144fec39f66c49d9b551d6be5b9d658f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 24 Nov 2020 09:27:47 +0100 Subject: [PATCH 1/9] processing: use faster unsafe truncate --- src/main.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 4c945e5..6a72552 100644 --- a/src/main.rs +++ b/src/main.rs @@ -779,7 +779,11 @@ const APP: () = { let x0 = f32::from(a as i16); let y0 = c.resources.iir_ch[1].update(&mut c.resources.iir_state[1], x0); - y0 as i16 as u16 ^ 0x8000 + // note(unsafe): The filter limits ensure that the value is in range. + // The truncation introduces 1/2 LSB distortion. + let y0 = unsafe { y0.to_int_unchecked::() }; + // convert to DAC code + y0 as u16 ^ 0x8000 }; c.resources.dac1.send(output).unwrap(); @@ -792,7 +796,11 @@ const APP: () = { let x0 = f32::from(a as i16); let y0 = c.resources.iir_ch[0].update(&mut c.resources.iir_state[0], x0); - y0 as i16 as u16 ^ 0x8000 + // note(unsafe): The filter limits ensure that the value is in range. + // The truncation introduces 1/2 LSB distortion. + let y0 = unsafe { y0.to_int_unchecked::() }; + // convert to DAC code + y0 as u16 ^ 0x8000 }; c.resources.dac0.send(output).unwrap(); From d9e4f6a052757abcc6a084d47603d51259c02b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Wed, 25 Nov 2020 17:24:49 +0100 Subject: [PATCH 2/9] iir: copy_within is better than rotate_right --- dsp/src/iir.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index fac1c4c..8d25c27 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -159,10 +159,12 @@ impl IIR { /// * `xy` - Current filter state. /// * `x0` - New input. pub fn update(&self, xy: &mut IIRState, x0: f32) -> f32 { + let n = self.ba.len(); + debug_assert!(xy.len() == n); // `xy` contains x0 x1 y0 y1 y2 // Increment time x1 x2 y1 y2 y3 // Rotate y3 x1 x2 y1 y2 - xy.rotate_right(1); + xy.copy_within(0..n - 1, 1); // unrolls better than xy.rotate_right(1) // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; // Compute y0 by multiply-accumulate @@ -170,7 +172,7 @@ impl IIR { // Limit y0 let y0 = max(self.y_min, min(self.y_max, y0)); // Store y0 x0 x1 y0 y1 y2 - xy[xy.len() / 2] = y0; + xy[n / 2] = y0; y0 } } From 4c9c65bf2d5d9146e130ce55d7d615f578937091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Wed, 25 Nov 2020 17:33:16 +0100 Subject: [PATCH 3/9] cargo-config: cm7 features --- .cargo/config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.cargo/config b/.cargo/config index 382c36e..9968846 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,6 +1,10 @@ [target.'cfg(all(target_arch = "arm", target_os = "none"))'] runner = "gdb-multiarch -q -x openocd.gdb" -rustflags = ["-C", "link-arg=-Tlink.x"] +rustflags = [ + "-C", "link-arg=-Tlink.x", + "-C", "target-cpu=cortex-m7", + "-C", "target-feature=+fp-armv8d16", +] [build] target = "thumbv7em-none-eabihf" From 38dfd48c149efaabce5dfa390b7bb6724b187dde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Wed, 25 Nov 2020 17:53:13 +0100 Subject: [PATCH 4/9] iir: fix comment [nfc] --- dsp/src/iir.rs | 2 +- src/main.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 8d25c27..f58fb64 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -163,7 +163,7 @@ impl IIR { debug_assert!(xy.len() == n); // `xy` contains x0 x1 y0 y1 y2 // Increment time x1 x2 y1 y2 y3 - // Rotate y3 x1 x2 y1 y2 + // Shift x1 x1 x2 y1 y2 xy.copy_within(0..n - 1, 1); // unrolls better than xy.rotate_right(1) // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; diff --git a/src/main.rs b/src/main.rs index ac9f1a6..3173158 100644 --- a/src/main.rs +++ b/src/main.rs @@ -758,10 +758,10 @@ const APP: () = { let x0 = f32::from(*adc0 as i16); let y0 = c.resources.iir_ch[0] .update(&mut c.resources.iir_state[0], x0); - // note(unsafe): The filter limits ensure that the value is in range. + // Note(unsafe): The filter limits ensure that the value is in range. // The truncation introduces 1/2 LSB distortion. let y0 = unsafe { y0.to_int_unchecked::() }; - // convert to DAC code + // Convert to DAC code y0 as u16 ^ 0x8000 }; @@ -769,10 +769,10 @@ const APP: () = { let x1 = f32::from(*adc1 as i16); let y1 = c.resources.iir_ch[1] .update(&mut c.resources.iir_state[1], x1); - // note(unsafe): The filter limits ensure that the value is in range. + // Note(unsafe): The filter limits ensure that the value is in range. // The truncation introduces 1/2 LSB distortion. let y1 = unsafe { y1.to_int_unchecked::() }; - // convert to DAC code + // Convert to DAC code y1 as u16 ^ 0x8000 }; } From cc64f470049ec363f547f4643c1b3de3613db12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Wed, 25 Nov 2020 18:55:07 +0100 Subject: [PATCH 5/9] iir: fmt [nfc] --- dsp/src/iir.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index f58fb64..04d7c8e 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -164,7 +164,8 @@ impl IIR { // `xy` contains x0 x1 y0 y1 y2 // Increment time x1 x2 y1 y2 y3 // Shift x1 x1 x2 y1 y2 - xy.copy_within(0..n - 1, 1); // unrolls better than xy.rotate_right(1) + // This unrolls better than xy.rotate_right(1) + xy.copy_within(0..n - 1, 1); // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; // Compute y0 by multiply-accumulate From 468929690df7493baef5ae6537f09bee9443ec73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Thu, 26 Nov 2020 14:19:09 +0100 Subject: [PATCH 6/9] iir: vminnm/vmaxnm --- Cargo.toml | 2 +- dsp/Cargo.toml | 3 +++ dsp/src/iir.rs | 27 +++++++++++++++++++++++++++ dsp/src/lib.rs | 1 + src/main.rs | 3 +++ 5 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 301956c..896eecf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,7 +62,7 @@ branch = "dma" [features] semihosting = ["panic-semihosting", "cortex-m-log/semihosting"] bkpt = [ ] -nightly = ["cortex-m/inline-asm"] +nightly = ["cortex-m/inline-asm", "dsp/nightly"] [profile.dev] codegen-units = 1 diff --git a/dsp/Cargo.toml b/dsp/Cargo.toml index 625d0f0..c8ef52b 100644 --- a/dsp/Cargo.toml +++ b/dsp/Cargo.toml @@ -6,3 +6,6 @@ edition = "2018" [dependencies] serde = { version = "1.0", features = ["derive"], default-features = false } + +[features] +nightly = [] diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 04d7c8e..e081556 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -24,6 +24,7 @@ fn copysign(x: f32, y: f32) -> f32 { } } +#[cfg(not(feature = "nightly"))] fn max(x: f32, y: f32) -> f32 { if x > y { x @@ -32,6 +33,7 @@ fn max(x: f32, y: f32) -> f32 { } } +#[cfg(not(feature = "nightly"))] fn min(x: f32, y: f32) -> f32 { if x < y { x @@ -40,6 +42,31 @@ fn min(x: f32, y: f32) -> f32 { } } +#[cfg(feature = "nightly")] +fn max(x: f32, y: f32) -> f32 { + let o: f32; + unsafe { + asm!("vmaxnm.f32 {}, {}, {}", + lateout(sreg) o, in(sreg) x, in(sreg) y, + options(pure, nomem, nostack, preserves_flags) + ); + } + o +} + +#[cfg(feature = "nightly")] +fn min(x: f32, y: f32) -> f32 { + let o: f32; + unsafe { + asm!("vminnm.f32 {}, {}, {}", + lateout(sreg) o, in(sreg) x, in(sreg) y, + options(pure, nomem, nostack, preserves_flags) + ); + } + o +} + + // Multiply-accumulate vectors `x` and `a`. // // A.k.a. dot product. diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index 3c44bbc..ac25d1e 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,3 +1,4 @@ #![no_std] +#![cfg_attr(feature = "nightly", feature(asm))] pub mod iir; diff --git a/src/main.rs b/src/main.rs index 3173158..15046b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,9 @@ fn panic(_info: &core::panic::PanicInfo) -> ! { let gpiod = unsafe { &*hal::stm32::GPIOD::ptr() }; gpiod.odr.modify(|_, w| w.odr6().high().odr12().high()); // FP_LED_1, FP_LED_3 + #[cfg(feature = "nightly")] + core::intrinsics::abort(); + #[cfg(not(feature = "nightly"))] unsafe { core::intrinsics::abort(); } From ea3e343c39d7768fa6050c774ee360ec9473add4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Thu, 26 Nov 2020 14:30:09 +0100 Subject: [PATCH 7/9] cargo fmt [nfc] --- dsp/src/iir.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index e081556..33d8fdf 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -66,7 +66,6 @@ fn min(x: f32, y: f32) -> f32 { o } - // Multiply-accumulate vectors `x` and `a`. // // A.k.a. dot product. From 74349e5d68f4663deff5b9df9d1ac52d069ff294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Fri, 27 Nov 2020 10:36:30 +0100 Subject: [PATCH 8/9] iir: more generic math helpers, use core::intrinsics --- dsp/src/iir.rs | 54 +++++++++++++++++++++++++------------------------- dsp/src/lib.rs | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/dsp/src/iir.rs b/dsp/src/iir.rs index 33d8fdf..c6f2100 100644 --- a/dsp/src/iir.rs +++ b/dsp/src/iir.rs @@ -1,4 +1,4 @@ -use core::ops::{Add, Mul}; +use core::ops::{Add, Mul, Neg}; use serde::{Deserialize, Serialize}; use core::f32; @@ -8,16 +8,24 @@ use core::f32; // `compiler-intrinsics`/llvm should have better (robust, universal, and // faster) implementations. -fn abs(x: f32) -> f32 { - if x >= 0. { +fn abs(x: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if x >= T::default() { x } else { -x } } -fn copysign(x: f32, y: f32) -> f32 { - if (x >= 0. && y >= 0.) || (x <= 0. && y <= 0.) { +fn copysign(x: T, y: T) -> T +where + T: PartialOrd + Default + Neg, +{ + if (x >= T::default() && y >= T::default()) + || (x <= T::default() && y <= T::default()) + { x } else { -x @@ -25,7 +33,10 @@ fn copysign(x: f32, y: f32) -> f32 { } #[cfg(not(feature = "nightly"))] -fn max(x: f32, y: f32) -> f32 { +fn max(x: T, y: T) -> T +where + T: PartialOrd, +{ if x > y { x } else { @@ -34,7 +45,10 @@ fn max(x: f32, y: f32) -> f32 { } #[cfg(not(feature = "nightly"))] -fn min(x: f32, y: f32) -> f32 { +fn min(x: T, y: T) -> T +where + T: PartialOrd, +{ if x < y { x } else { @@ -44,26 +58,12 @@ fn min(x: f32, y: f32) -> f32 { #[cfg(feature = "nightly")] fn max(x: f32, y: f32) -> f32 { - let o: f32; - unsafe { - asm!("vmaxnm.f32 {}, {}, {}", - lateout(sreg) o, in(sreg) x, in(sreg) y, - options(pure, nomem, nostack, preserves_flags) - ); - } - o + core::intrinsics::maxnumf32(x, y) } #[cfg(feature = "nightly")] fn min(x: f32, y: f32) -> f32 { - let o: f32; - unsafe { - asm!("vminnm.f32 {}, {}, {}", - lateout(sreg) o, in(sreg) x, in(sreg) y, - options(pure, nomem, nostack, preserves_flags) - ); - } - o + core::intrinsics::minnumf32(x, y) } // Multiply-accumulate vectors `x` and `a`. @@ -76,7 +76,7 @@ where { x.iter() .zip(a) - .map(|(&x, &a)| x * a) + .map(|(x, a)| *x * *a) .fold(y0, |y, xa| y + xa) } @@ -84,10 +84,10 @@ where /// /// To represent the IIR state (input and output memory) during the filter update /// this contains the three inputs (x0, x1, x2) and the two outputs (y1, y2) -/// concatenated. +/// concatenated. Lower indices correspond to more recent samples. /// To represent the IIR coefficients, this contains the feed-forward -/// coefficients (b0, b1, b2) followd by the feed-back coefficients (a1, a2), -/// all normalized such that a0 = 1. +/// coefficients (b0, b1, b2) followd by the negated feed-back coefficients +/// (-a1, -a2), all five normalized such that a0 = 1. pub type IIRState = [f32; 5]; /// IIR configuration. diff --git a/dsp/src/lib.rs b/dsp/src/lib.rs index ac25d1e..b2acf34 100644 --- a/dsp/src/lib.rs +++ b/dsp/src/lib.rs @@ -1,4 +1,4 @@ #![no_std] -#![cfg_attr(feature = "nightly", feature(asm))] +#![cfg_attr(feature = "nightly", feature(asm, core_intrinsics))] pub mod iir; From feb229ddd5befd9fdb6fa873458db502d722e82c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 30 Nov 2020 12:02:14 +0100 Subject: [PATCH 9/9] cargo: add docs for target cpu/features --- .cargo/config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.cargo/config b/.cargo/config index 9968846..ea1d6c0 100644 --- a/.cargo/config +++ b/.cargo/config @@ -2,8 +2,14 @@ runner = "gdb-multiarch -q -x openocd.gdb" rustflags = [ "-C", "link-arg=-Tlink.x", +# The target (below) defaults to cortex-m4 +# There currently are two different options to go beyond that: +# 1. cortex-m7 has the right flags and instructions (FPU) but no instruction schedule yet "-C", "target-cpu=cortex-m7", +# 2. cortex-m4 with the additional fpv5 instructions and a potentially +# better-than-nothing instruction schedule "-C", "target-feature=+fp-armv8d16", +# When combined they are equivalent to (1) alone ] [build]