From 6f3a30292c4ef54ea2c861a66478a7250e12afa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sat, 20 Feb 2021 20:27:30 +0100 Subject: [PATCH 01/15] iir_int: make miniconf-able --- dsp/src/iir_int.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs index 4af50f1..02fdeb1 100644 --- a/dsp/src/iir_int.rs +++ b/dsp/src/iir_int.rs @@ -1,5 +1,6 @@ use core::f64::consts::PI; -use serde::{Deserialize, Serialize}; +use miniconf::StringSet; +use serde::Deserialize; /// Generic vector for integer IIR filter. /// This struct is used to hold the x/y input/output data vector or the b/a coefficient @@ -55,7 +56,7 @@ fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { /// See `dsp::iir::IIR` for general implementation details. /// Offset and limiting disabled to suit lowpass applications. /// Coefficient scaling fixed and optimized. -#[derive(Copy, Clone, Default, Deserialize, Serialize)] +#[derive(Copy, Clone, Default, Debug, StringSet, Deserialize)] pub struct IIR { pub ba: Vec5, // pub y_offset: i32, From f671e0c94252568fb00cfc709ac5dfa81e43ffa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 22 Feb 2021 16:36:56 +0100 Subject: [PATCH 02/15] complex: add saturating_add/sub --- dsp/src/complex.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs index 3349460..dc36349 100644 --- a/dsp/src/complex.rs +++ b/dsp/src/complex.rs @@ -8,6 +8,8 @@ pub trait ComplexExt { fn abs_sqr(&self) -> U; fn log2(&self) -> T; fn arg(&self) -> T; + fn saturating_add(&self, other: Self) -> Self; + fn saturating_sub(&self, other: Self) -> Self; } impl ComplexExt for Complex { @@ -23,7 +25,7 @@ impl ComplexExt for Complex { /// ``` fn from_angle(angle: i32) -> Self { let (c, s) = cossin(angle); - Self { re: c, im: s } + Self::new(c, s) } /// Return the absolute square (the squared magnitude). @@ -85,6 +87,15 @@ impl ComplexExt for Complex { fn arg(&self) -> i32 { atan2(self.im, self.re) } + + fn saturating_add(&self, other: Self) -> Self { + Self::new(self.re.saturating_add(other.re), self.im.saturating_add(other.im)) + } + + fn saturating_sub(&self, other: Self) -> Self { + Self::new(self.re.saturating_sub(other.re), self.im.saturating_sub(other.im)) + } + } /// Full scale fixed point multiplication. From ffdf4026fbdbf9a899aec7025b5710542ced6870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 22 Feb 2021 16:40:51 +0100 Subject: [PATCH 03/15] fmt --- dsp/src/complex.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs index dc36349..1eb003f 100644 --- a/dsp/src/complex.rs +++ b/dsp/src/complex.rs @@ -89,13 +89,18 @@ impl ComplexExt for Complex { } fn saturating_add(&self, other: Self) -> Self { - Self::new(self.re.saturating_add(other.re), self.im.saturating_add(other.im)) + Self::new( + self.re.saturating_add(other.re), + self.im.saturating_add(other.im), + ) } fn saturating_sub(&self, other: Self) -> Self { - Self::new(self.re.saturating_sub(other.re), self.im.saturating_sub(other.im)) + Self::new( + self.re.saturating_sub(other.re), + self.im.saturating_sub(other.im), + ) } - } /// Full scale fixed point multiplication. From 6c6c2e64a7d97aef57fda7034cf4062ae04ee5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 23 Feb 2021 16:46:16 +0100 Subject: [PATCH 04/15] lockin: make order generic --- Cargo.lock | 1 + Cargo.toml | 1 + dsp/src/lockin.rs | 8 ++++---- src/bin/lockin-external.rs | 8 +++----- src/bin/lockin-internal.rs | 3 ++- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fa03f0..0999288 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,6 +724,7 @@ dependencies = [ "dsp", "embedded-hal", "enum-iterator", + "generic-array 0.14.4", "heapless", "log", "mcp23017", diff --git a/Cargo.toml b/Cargo.toml index 599e595..f384fe9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ dsp = { path = "dsp" } ad9959 = { path = "ad9959" } smoltcp-nal = "0.1.0" miniconf = "0.1" +generic-array = "0.14" [patch.crates-io.miniconf] git = "https://github.com/quartiq/miniconf.git" diff --git a/dsp/src/lockin.rs b/dsp/src/lockin.rs index 6a977b4..613a133 100644 --- a/dsp/src/lockin.rs +++ b/dsp/src/lockin.rs @@ -1,12 +1,12 @@ use super::{Complex, ComplexExt, Lowpass, MulScaled}; -use generic_array::typenum::U2; +use generic_array::ArrayLength; #[derive(Clone, Default)] -pub struct Lockin { - state: [Lowpass; 2], +pub struct Lockin> { + state: [Lowpass; 2], } -impl Lockin { +impl> Lockin { /// Update the lockin with a sample taken at a given phase. pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex { // Get the LO signal for demodulation and mix the sample; diff --git a/src/bin/lockin-external.rs b/src/bin/lockin-external.rs index 3b7c6a8..e44c17e 100644 --- a/src/bin/lockin-external.rs +++ b/src/bin/lockin-external.rs @@ -2,10 +2,8 @@ #![no_std] #![no_main] -use stm32h7xx_hal as hal; - +use generic_array::typenum::U4; use stabilizer::{hardware, hardware::design_parameters}; - use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL}; use hardware::{ Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1, @@ -20,7 +18,7 @@ const APP: () = { timestamper: InputStamper, pll: RPLL, - lockin: Lockin, + lockin: Lockin, } #[init] @@ -156,7 +154,7 @@ const APP: () = { #[task(binds = ETH, priority = 1)] fn eth(_: eth::Context) { - unsafe { hal::ethernet::interrupt_handler() } + unsafe { stm32h7xx_hal::ethernet::interrupt_handler() } } #[task(binds = SPI2, priority = 3)] diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index 1f9df6e..7ea7e1d 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -2,6 +2,7 @@ #![no_std] #![no_main] +use generic_array::typenum::U2; use dsp::{Accu, Complex, ComplexExt, Lockin}; use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1}; use stabilizer::{hardware, hardware::design_parameters}; @@ -20,7 +21,7 @@ const APP: () = { adc: Adc1Input, dacs: (Dac0Output, Dac1Output), - lockin: Lockin, + lockin: Lockin, } #[init] From e86f449dc0aa103701ba268b6dbc3f9fb5b1f683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 23 Feb 2021 16:57:09 +0100 Subject: [PATCH 05/15] lockin bins: remove stale todos, align and document [nfc] --- src/bin/lockin-external.rs | 21 +++++++--------- src/bin/lockin-internal.rs | 49 +++++++++++++------------------------- 2 files changed, 26 insertions(+), 44 deletions(-) diff --git a/src/bin/lockin-external.rs b/src/bin/lockin-external.rs index e44c17e..1b11c76 100644 --- a/src/bin/lockin-external.rs +++ b/src/bin/lockin-external.rs @@ -2,12 +2,12 @@ #![no_std] #![no_main] -use generic_array::typenum::U4; -use stabilizer::{hardware, hardware::design_parameters}; use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL}; +use generic_array::typenum::U4; use hardware::{ Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1, }; +use stabilizer::{hardware, hardware::design_parameters}; #[rtic::app(device = stm32h7xx_hal::stm32, peripherals = true, monotonic = rtic::cyccnt::CYCCNT)] const APP: () = { @@ -86,22 +86,20 @@ const APP: () = { .map(|t| t as i32); let (pll_phase, pll_frequency) = c.resources.pll.update( timestamp, - 21, // frequency settling time (log2 counter cycles), TODO: expose - 21, // phase settling time, TODO: expose + 21, // frequency settling time (log2 counter cycles), + 21, // phase settling time ); // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) - let harmonic: i32 = -1; // TODO: expose + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = 0; // TODO: expose + let phase_offset: i32 = 0; // Log2 lowpass time constant - let time_constant: u8 = 6; // TODO: expose + let time_constant: u8 = 6; let sample_frequency = ((pll_frequency - // half-up rounding bias - // .wrapping_add(1 << design_parameters::SAMPLE_BUFFER_SIZE_LOG2 - 1) >> design_parameters::SAMPLE_BUFFER_SIZE_LOG2) as i32) .wrapping_mul(harmonic); @@ -129,7 +127,7 @@ const APP: () = { Quadrature, } - let conf = Conf::FrequencyDiscriminator; // TODO: expose + let conf = Conf::FrequencyDiscriminator; let output = match conf { // Convert from IQ to power and phase. Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()], @@ -147,7 +145,6 @@ const APP: () = { #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } } @@ -164,7 +161,7 @@ const APP: () = { #[task(binds = SPI3, priority = 3)] fn spi3(_: spi3::Context) { - panic!("ADC0 input overrun"); + panic!("ADC1 input overrun"); } #[task(binds = SPI4, priority = 3)] diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index 7ea7e1d..d4e90e6 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -2,8 +2,8 @@ #![no_std] #![no_main] -use generic_array::typenum::U2; use dsp::{Accu, Complex, ComplexExt, Lockin}; +use generic_array::typenum::U2; use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1}; use stabilizer::{hardware, hardware::design_parameters}; @@ -45,24 +45,13 @@ const APP: () = { } } - /// Main DSP processing routine for Stabilizer. + /// Main DSP processing routine. /// - /// # Note - /// Processing time for the DSP application code is bounded by the following constraints: + /// See `dual-iir` for general notes on processing time and timing. /// - /// DSP application code starts after the ADC has generated a batch of samples and must be - /// completed by the time the next batch of ADC samples has been acquired (plus the FIFO buffer - /// time). If this constraint is not met, firmware will panic due to an ADC input overrun. - /// - /// The DSP application code must also fill out the next DAC output buffer in time such that the - /// DAC can switch to it when it has completed the current buffer. If this constraint is not met - /// it's possible that old DAC codes will be generated on the output and the output samples will - /// be delayed by 1 batch. - /// - /// Because the ADC and DAC operate at the same rate, these two constraints actually implement - /// the same time bounds, meeting one also means the other is also met. - /// - /// TODO: Document + /// This is an implementation of an internal-reference lockin on the ADC1 signal. + /// The reference at f_sample/8 is output on DAC0 and the phase of the demodulated + /// signal on DAC1. #[task(binds=DMA1_STR4, resources=[adc, dacs, lockin], priority=2)] fn process(c: process::Context) { let lockin = c.resources.lockin; @@ -72,29 +61,23 @@ const APP: () = { c.resources.dacs.1.acquire_buffer(), ]; - // DAC0 always generates a fixed sinusoidal output. - dac_samples[0] - .iter_mut() - .zip(DAC_SEQUENCE.iter()) - .for_each(|(d, s)| *d = *s as u16 ^ 0x8000); - // Reference phase and frequency are known. - let pll_phase = 0; + let pll_phase = 0i32; let pll_frequency = 1i32 << (32 - design_parameters::SAMPLE_BUFFER_SIZE_LOG2); - // Harmonic index of the LO: -1 to _de_modulate the fundamental - let harmonic: i32 = -1; // TODO: expose + // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate) + let harmonic: i32 = -1; // Demodulation LO phase offset - let phase_offset: i32 = (0.25 * i32::MAX as f32) as i32; // TODO: expose + let phase_offset: i32 = 1 << 30; // Log2 lowpass time constant. let time_constant: u8 = 8; let sample_frequency = (pll_frequency as i32).wrapping_mul(harmonic); - let sample_phase = phase_offset - .wrapping_add((pll_phase as i32).wrapping_mul(harmonic)); + let sample_phase = + phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic)); let output: Complex = adc_samples .iter() @@ -110,15 +93,17 @@ const APP: () = { .unwrap() * 2; // Full scale assuming the 2f component is gone. - for value in dac_samples[1].iter_mut() { - *value = (output.arg() >> 16) as u16 ^ 0x8000; + // Convert to DAC data. + for i in 0..dac_samples[0].len() { + // DAC0 always generates a fixed sinusoidal output. + dac_samples[0][i] = DAC_SEQUENCE[i] as u16 ^ 0x8000; + dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000; } } #[idle(resources=[afes])] fn idle(_: idle::Context) -> ! { loop { - // TODO: Implement network interface. cortex_m::asm::wfi(); } } From 016323c94d2da871d1e08eeecc3f13af8ecfb406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 23 Feb 2021 17:15:07 +0100 Subject: [PATCH 06/15] avoid clippy --- src/bin/lockin-internal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index d4e90e6..1e7ebc0 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -94,7 +94,7 @@ const APP: () = { * 2; // Full scale assuming the 2f component is gone. // Convert to DAC data. - for i in 0..dac_samples[0].len() { + for i in 0..dac_samples[1].len() { // DAC0 always generates a fixed sinusoidal output. dac_samples[0][i] = DAC_SEQUENCE[i] as u16 ^ 0x8000; dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000; From 41f6d971c3f00dd11547303f002b416a607aaf12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 23 Feb 2021 17:22:53 +0100 Subject: [PATCH 07/15] avoid clippy better --- src/bin/lockin-internal.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs index 1e7ebc0..ee9da2f 100644 --- a/src/bin/lockin-internal.rs +++ b/src/bin/lockin-internal.rs @@ -94,9 +94,9 @@ const APP: () = { * 2; // Full scale assuming the 2f component is gone. // Convert to DAC data. - for i in 0..dac_samples[1].len() { + for (i, data) in DAC_SEQUENCE.iter().enumerate() { // DAC0 always generates a fixed sinusoidal output. - dac_samples[0][i] = DAC_SEQUENCE[i] as u16 ^ 0x8000; + dac_samples[0][i] = *data as u16 ^ 0x8000; dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000; } } From a2e0539218422e1b0d6c00e4241f4656d7bfc56d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 23 Feb 2021 17:31:44 +0100 Subject: [PATCH 08/15] ci: cron daily --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4b66b0..cf68b3f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,7 +5,9 @@ on: branches: [master, staging, trying] pull_request: branches: [master] - + schedule: + # UTC + - cron: '48 4 * * *' env: CARGO_TERM_COLOR: always From b071fef15c4dbd8c4b66b4f8b97cf1256592966a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Thu, 25 Feb 2021 16:38:57 +0100 Subject: [PATCH 09/15] iir_int: apply rounding bias summarily for speed --- dsp/src/lowpass.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dsp/src/lowpass.rs b/dsp/src/lowpass.rs index 5ab803d..10d5838 100644 --- a/dsp/src/lowpass.rs +++ b/dsp/src/lowpass.rs @@ -27,10 +27,10 @@ impl> Lowpass { // Note T-DF-I and the zeros at Nyquist. let mut x = x; for y in self.y.iter_mut() { - let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k; + let dy = x.saturating_sub(*y) >> k; *y += dy; x = *y - (dy >> 1); } - x + x.saturating_add((self.y.len() as i32) << (k - 1)) } } From 067ef79560623ff6ae461173ad313aa546763ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Fri, 26 Feb 2021 19:47:48 +0100 Subject: [PATCH 10/15] cossin: shave off a few more instructions --- dsp/build.rs | 8 ++++---- dsp/src/cossin.rs | 44 +++++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/dsp/build.rs b/dsp/build.rs index ca43736..5e5fe1d 100644 --- a/dsp/build.rs +++ b/dsp/build.rs @@ -15,7 +15,7 @@ fn write_cossin_table() { .unwrap(); write!( file, - "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = [" + "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = [" ) .unwrap(); @@ -29,12 +29,12 @@ fn write_cossin_table() { // use midpoint samples to save one entry in the LUT let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5); // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 - let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16; - let sin = (phase.sin() * AMPLITUDE).round() as u16; + let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1; + let sin = (phase.sin() * AMPLITUDE).round() as u32; if i % 4 == 0 { write!(file, "\n ").unwrap(); } - write!(file, " ({}, {}),", cos, sin).unwrap(); + write!(file, " {},", cos + (sin << 16)).unwrap(); } writeln!(file, "\n];").unwrap(); diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 4a5cccd..2f21cec 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { - // Phase bits excluding the three highes MSB + // Phase bits excluding the three highest MSB const OCTANT_BITS: usize = 32 - 3; // This is a slightly more compact way to compute the four flags for @@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) { let mut octant = (phase as u32) >> OCTANT_BITS; octant ^= octant << 1; - // Mask off octant bits. This leaves the angle in the range [0, pi/4). - let mut phase = phase & ((1 << OCTANT_BITS) - 1); - + let mut phase = phase; if octant & 1 != 0 { // phase = pi/4 - phase - phase = (1 << OCTANT_BITS) - 1 - phase; + phase = !phase; } - let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize]; - // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos - // values and scale the sine values as encoded in the LUT. - let mut cos = lookup.0 as i32 + u16::MAX as i32; - let mut sin = (lookup.1 as i32) << 1; + // Mask off octant bits. This leaves the angle in the range [0, pi/4). + phase &= (1 << OCTANT_BITS) - 1; // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range. const ALIGN_MSB: usize = 32 - 16 - 1; phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB; + + let lookup = COSSIN[(phase >> ALIGN_MSB) as usize]; phase &= (1 << ALIGN_MSB) - 1; + // The phase values used for the LUT are at midpoint for the truncated phase. // Interpolate relative to the LUT entry midpoint. + // Also cancel the -1 bias that was conditionally introduced above. phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32; - // Fixed point pi/4. - const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32; - // No rounding bias necessary here since we keep enough low bits. - let dphi = (phase * PI4) >> (32 - ALIGN_MSB); - // Make room for the sign bit. - let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1); + // Fixed point pi/4. + const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32; + // No rounding bias necessary here since we keep enough low bits. + let dphi = (phase * PI4) >> 16; + + // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos + // values and scale the sine values as encoded in the LUT. + let mut cos = (lookup & 0xffff) as i32 + (1 << 16); + let mut sin = (lookup >> 16) as i32; + + let dcos = (sin * dphi) >> COSSIN_DEPTH; let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1); cos = (cos << (ALIGN_MSB - 1)) - dcos; - sin = (sin << (ALIGN_MSB - 1)) + dsin; + sin = (sin << ALIGN_MSB) + dsin; // Unmap using octant bits. if octant & 2 != 0 { core::mem::swap(&mut sin, &mut cos); } - if octant & 4 != 0 { - cos *= -1; + cos = -cos; } - if octant & 8 != 0 { - sin *= -1; + sin = -sin; } (cos, sin) From f4c6e07a381352a35a12e0b42ace635100fde958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sat, 27 Feb 2021 14:54:46 +0100 Subject: [PATCH 11/15] dsp/bench: add lowpass --- dsp/benches/micro.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/dsp/benches/micro.rs b/dsp/benches/micro.rs index 1f2cc1f..b3c1549 100644 --- a/dsp/benches/micro.rs +++ b/dsp/benches/micro.rs @@ -1,8 +1,9 @@ use core::f32::consts::PI; -use dsp::{atan2, cossin}; -use dsp::{iir, iir_int}; -use dsp::{PLL, RPLL}; + use easybench::bench_env; +use generic_array::typenum::U4; + +use dsp::{Lowpass, atan2, cossin, iir, iir_int, PLL, RPLL}; fn atan2_bench() { let xi = (10 << 16) as i32; @@ -70,6 +71,18 @@ fn iir_bench() { ); } +fn lowpass_bench() { + let mut dut = Lowpass::::default(); + println!( + "Lowpass::::update(x, k): {}", + bench_env((0x32421, 14), |(x, k)| dut.update(*x, *k)) + ); + println!( + "Lowpass::::update(x, 14): {}", + bench_env(0x32421, |x| dut.update(*x, 14)) + ); +} + fn main() { atan2_bench(); cossin_bench(); @@ -77,4 +90,5 @@ fn main() { pll_bench(); iir_int_bench(); iir_bench(); + lowpass_bench(); } From 1805961d5dade1ed90361a8de6bc49b7c2d4a928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Thu, 25 Feb 2021 18:01:53 +0100 Subject: [PATCH 12/15] macc_i32: move to tools --- dsp/src/iir_int.rs | 14 ++------------ dsp/src/tools.rs | 11 +++++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs index 02fdeb1..ef0c1dc 100644 --- a/dsp/src/iir_int.rs +++ b/dsp/src/iir_int.rs @@ -1,3 +1,4 @@ +use super::tools::macc_i32; use core::f64::consts::PI; use miniconf::StringSet; use serde::Deserialize; @@ -40,17 +41,6 @@ impl Coeff for Vec5 { } } -fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { - // Rounding bias, half up - let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); - let y = x - .iter() - .zip(a) - .map(|(x, a)| *x as i64 * *a as i64) - .fold(y0, |y, xa| y + xa); - (y >> shift) as i32 -} - /// Integer biquad IIR /// /// See `dsp::iir::IIR` for general implementation details. @@ -86,7 +76,7 @@ impl IIR { // Store x0 x0 x1 x2 y1 y2 xy[0] = x0; // Compute y0 by multiply-accumulate - let y0 = macc(0, xy, &self.ba, IIR::SHIFT); + let y0 = macc_i32(0, xy, &self.ba, IIR::SHIFT); // Limit y0 // let y0 = y0.max(self.y_min).min(self.y_max); // Store y0 x0 x1 y0 y1 y2 diff --git a/dsp/src/tools.rs b/dsp/src/tools.rs index 378734f..6fc2b12 100644 --- a/dsp/src/tools.rs +++ b/dsp/src/tools.rs @@ -77,6 +77,17 @@ where .fold(y0, |y, xa| y + xa) } +pub fn macc_i32(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 { + // Rounding bias, half up + let y0 = ((y0 as i64) << shift) + (1 << (shift - 1)); + let y = x + .iter() + .zip(a) + .map(|(x, a)| *x as i64 * *a as i64) + .fold(y0, |y, xa| y + xa); + (y >> shift) as i32 +} + /// Combine high and low i32 into a single downscaled i32, saturating the type. pub fn saturating_scale(lo: i32, hi: i32, shift: u32) -> i32 { debug_assert!(shift & 31 == shift); From b2d6b5c10c74f1ddad710588baa40892870ab0ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sat, 27 Feb 2021 15:02:16 +0100 Subject: [PATCH 13/15] dsp/bench: fmt --- dsp/benches/micro.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsp/benches/micro.rs b/dsp/benches/micro.rs index b3c1549..0902a4c 100644 --- a/dsp/benches/micro.rs +++ b/dsp/benches/micro.rs @@ -3,7 +3,7 @@ use core::f32::consts::PI; use easybench::bench_env; use generic_array::typenum::U4; -use dsp::{Lowpass, atan2, cossin, iir, iir_int, PLL, RPLL}; +use dsp::{atan2, cossin, iir, iir_int, Lowpass, PLL, RPLL}; fn atan2_bench() { let xi = (10 << 16) as i32; From cd289687eafd20f24c5d81ac9a0fe15a42152278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Sat, 27 Feb 2021 15:37:15 +0100 Subject: [PATCH 14/15] cossin: tighten test --- dsp/src/cossin.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 2f21cec..9240d0d 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -11,7 +11,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// /// # Returns /// The cos and sin values of the provided phase as a `(i32, i32)` -/// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error +/// tuple. With a 7-bit deep LUT there is 9e-6 max and 6e-8 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { // Phase bits excluding the three highest MSB @@ -81,7 +81,7 @@ mod tests { #[test] fn cossin_error_max_rms_all_phase() { // Constant amplitude error due to LUT data range. - const AMPLITUDE: f64 = ((1i64 << 31) - (1i64 << 15)) as _; + const AMPLITUDE: f64 = (1i64 << 31) as f64 - 0.85 * (1i64 << 15) as f64; const MAX_PHASE: f64 = (1i64 << 32) as _; let mut rms_err = (0f64, 0f64); let mut sum_err = (0f64, 0f64); @@ -144,7 +144,7 @@ mod tests { assert!(rms_err.0.sqrt() < 6e-8); assert!(rms_err.1.sqrt() < 6e-8); - assert!(max_err.0 < 1.1e-5); - assert!(max_err.1 < 1.1e-5); + assert!(max_err.0 < 9e-6); + assert!(max_err.1 < 9e-6); } } From 0d6b297d4e0e983d39b6118f8f9f69bd5958aea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Mon, 1 Mar 2021 13:29:59 +0100 Subject: [PATCH 15/15] cossin: don't cancel -1 phase offset --- dsp/build.rs | 12 +++++++----- dsp/src/cossin.rs | 27 +++++++++++++++------------ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/dsp/build.rs b/dsp/build.rs index 5e5fe1d..edd56b0 100644 --- a/dsp/build.rs +++ b/dsp/build.rs @@ -26,14 +26,16 @@ fn write_cossin_table() { const AMPLITUDE: f64 = u16::MAX as f64; for i in 0..(1 << DEPTH) { - // use midpoint samples to save one entry in the LUT - let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5); - // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 - let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1; - let sin = (phase.sin() * AMPLITUDE).round() as u32; if i % 4 == 0 { write!(file, "\n ").unwrap(); } + // Use midpoint samples to save one entry in the LUT + let (sin, cos) = + (PI / 4. * ((i as f64 + 0.5) / (1 << DEPTH) as f64)).sin_cos(); + // Add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4 + // The -1 LSB is cancelled when unscaling with the biased half amplitude + let cos = ((cos * 2. - 1.) * AMPLITUDE - 1.).round() as u32; + let sin = (sin * AMPLITUDE).round() as u32; write!(file, " {},", cos + (sin << 16)).unwrap(); } writeln!(file, "\n];").unwrap(); diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs index 9240d0d..5dae7c3 100644 --- a/dsp/src/cossin.rs +++ b/dsp/src/cossin.rs @@ -11,7 +11,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs")); /// /// # Returns /// The cos and sin values of the provided phase as a `(i32, i32)` -/// tuple. With a 7-bit deep LUT there is 9e-6 max and 6e-8 RMS error +/// tuple. With a 7-bit deep LUT there is 9e-6 max and 4e-6 RMS error /// in each quadrature over 20 bit phase. pub fn cossin(phase: i32) -> (i32, i32) { // Phase bits excluding the three highest MSB @@ -40,11 +40,14 @@ pub fn cossin(phase: i32) -> (i32, i32) { // The phase values used for the LUT are at midpoint for the truncated phase. // Interpolate relative to the LUT entry midpoint. - // Also cancel the -1 bias that was conditionally introduced above. - phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32; + phase -= 1 << (ALIGN_MSB - 1); + + // Cancel the -1 bias that was conditionally introduced above. + // This lowers the DC spur from 2e-8 to 2e-10 magnitude. + // phase += (octant & 1) as i32; // Fixed point pi/4. - const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32; + const PI4: i32 = (PI / 4. * (1 << 16) as f64) as _; // No rounding bias necessary here since we keep enough low bits. let dphi = (phase * PI4) >> 16; @@ -123,8 +126,8 @@ mod tests { max_err.0 = max_err.0.max(err.0.abs()); max_err.1 = max_err.1.max(err.1.abs()); } - rms_err.0 /= MAX_PHASE; - rms_err.1 /= MAX_PHASE; + rms_err.0 /= (1 << PHASE_DEPTH) as f64; + rms_err.1 /= (1 << PHASE_DEPTH) as f64; println!("sum: {:.2e} {:.2e}", sum.0, sum.1); println!("demod: {:.2e} {:.2e}", demod.0, demod.1); @@ -133,18 +136,18 @@ mod tests { println!("max: {:.2e} {:.2e}", max_err.0, max_err.1); assert!(sum.0.abs() < 4e-10); - assert!(sum.1.abs() < 4e-10); + assert!(sum.1.abs() < 3e-8); assert!(demod.0.abs() < 4e-10); - assert!(demod.1.abs() < 4e-10); + assert!(demod.1.abs() < 1e-8); assert!(sum_err.0.abs() < 4e-10); assert!(sum_err.1.abs() < 4e-10); - assert!(rms_err.0.sqrt() < 6e-8); - assert!(rms_err.1.sqrt() < 6e-8); + assert!(rms_err.0.sqrt() < 4e-6); + assert!(rms_err.1.sqrt() < 4e-6); - assert!(max_err.0 < 9e-6); - assert!(max_err.1 < 9e-6); + assert!(max_err.0 < 1e-5); + assert!(max_err.1 < 1e-5); } }