diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f4b66b0..cf68b3f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,7 +5,9 @@ on:
     branches: [master, staging, trying]
   pull_request:
     branches: [master]
-
+  schedule:
+    # UTC
+    - cron: '48 4 * * *'
 env:
   CARGO_TERM_COLOR: always
 
diff --git a/Cargo.lock b/Cargo.lock
index 7fa03f0..0999288 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -724,6 +724,7 @@ dependencies = [
  "dsp",
  "embedded-hal",
  "enum-iterator",
+ "generic-array 0.14.4",
  "heapless",
  "log",
  "mcp23017",
diff --git a/Cargo.toml b/Cargo.toml
index 599e595..f384fe9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -46,6 +46,7 @@ dsp = { path = "dsp" }
 ad9959 = { path = "ad9959" }
 smoltcp-nal = "0.1.0"
 miniconf = "0.1"
+generic-array = "0.14"
 
 [patch.crates-io.miniconf]
 git = "https://github.com/quartiq/miniconf.git"
diff --git a/dsp/benches/micro.rs b/dsp/benches/micro.rs
index 1f2cc1f..0902a4c 100644
--- a/dsp/benches/micro.rs
+++ b/dsp/benches/micro.rs
@@ -1,8 +1,9 @@
 use core::f32::consts::PI;
-use dsp::{atan2, cossin};
-use dsp::{iir, iir_int};
-use dsp::{PLL, RPLL};
+
 use easybench::bench_env;
+use generic_array::typenum::U4;
+
+use dsp::{atan2, cossin, iir, iir_int, Lowpass, PLL, RPLL};
 
 fn atan2_bench() {
     let xi = (10 << 16) as i32;
@@ -70,6 +71,18 @@ fn iir_bench() {
     );
 }
 
+fn lowpass_bench() {
+    let mut dut = Lowpass::<U4>::default();
+    println!(
+        "Lowpass::<U4>::update(x, k): {}",
+        bench_env((0x32421, 14), |(x, k)| dut.update(*x, *k))
+    );
+    println!(
+        "Lowpass::<U4>::update(x, 14): {}",
+        bench_env(0x32421, |x| dut.update(*x, 14))
+    );
+}
+
 fn main() {
     atan2_bench();
     cossin_bench();
@@ -77,4 +90,5 @@ fn main() {
     pll_bench();
     iir_int_bench();
     iir_bench();
+    lowpass_bench();
 }
diff --git a/dsp/build.rs b/dsp/build.rs
index ca43736..5e5fe1d 100644
--- a/dsp/build.rs
+++ b/dsp/build.rs
@@ -15,7 +15,7 @@ fn write_cossin_table() {
         .unwrap();
     write!(
         file,
-        "pub(crate) const COSSIN: [(u16, u16); 1 << COSSIN_DEPTH] = ["
+        "pub(crate) const COSSIN: [u32; 1 << COSSIN_DEPTH] = ["
     )
     .unwrap();
 
@@ -29,12 +29,12 @@ fn write_cossin_table() {
         // use midpoint samples to save one entry in the LUT
         let phase = (PI / 4. / (1 << DEPTH) as f64) * (i as f64 + 0.5);
         // add one bit accuracy to cos due to 0.5 < cos(z) <= 1 for |z| < pi/4
-        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u16;
-        let sin = (phase.sin() * AMPLITUDE).round() as u16;
+        let cos = ((phase.cos() - 0.5) * 2. * AMPLITUDE).round() as u32 - 1;
+        let sin = (phase.sin() * AMPLITUDE).round() as u32;
         if i % 4 == 0 {
             write!(file, "\n   ").unwrap();
         }
-        write!(file, " ({}, {}),", cos, sin).unwrap();
+        write!(file, " {},", cos + (sin << 16)).unwrap();
     }
     writeln!(file, "\n];").unwrap();
 
diff --git a/dsp/src/complex.rs b/dsp/src/complex.rs
index 3349460..1eb003f 100644
--- a/dsp/src/complex.rs
+++ b/dsp/src/complex.rs
@@ -8,6 +8,8 @@ pub trait ComplexExt<T, U> {
     fn abs_sqr(&self) -> U;
     fn log2(&self) -> T;
     fn arg(&self) -> T;
+    fn saturating_add(&self, other: Self) -> Self;
+    fn saturating_sub(&self, other: Self) -> Self;
 }
 
 impl ComplexExt<i32, u32> for Complex<i32> {
@@ -23,7 +25,7 @@ impl ComplexExt<i32, u32> for Complex<i32> {
     /// ```
     fn from_angle(angle: i32) -> Self {
         let (c, s) = cossin(angle);
-        Self { re: c, im: s }
+        Self::new(c, s)
     }
 
     /// Return the absolute square (the squared magnitude).
@@ -85,6 +87,20 @@ impl ComplexExt<i32, u32> for Complex<i32> {
     fn arg(&self) -> i32 {
         atan2(self.im, self.re)
     }
+
+    fn saturating_add(&self, other: Self) -> Self {
+        Self::new(
+            self.re.saturating_add(other.re),
+            self.im.saturating_add(other.im),
+        )
+    }
+
+    fn saturating_sub(&self, other: Self) -> Self {
+        Self::new(
+            self.re.saturating_sub(other.re),
+            self.im.saturating_sub(other.im),
+        )
+    }
 }
 
 /// Full scale fixed point multiplication.
diff --git a/dsp/src/cossin.rs b/dsp/src/cossin.rs
index 4a5cccd..2f21cec 100644
--- a/dsp/src/cossin.rs
+++ b/dsp/src/cossin.rs
@@ -14,7 +14,7 @@ include!(concat!(env!("OUT_DIR"), "/cossin_table.rs"));
 /// tuple. With a 7-bit deep LUT there is 1e-5 max and 6e-8 RMS error
 /// in each quadrature over 20 bit phase.
 pub fn cossin(phase: i32) -> (i32, i32) {
-    // Phase bits excluding the three highes MSB
+    // Phase bits excluding the three highest MSB
     const OCTANT_BITS: usize = 32 - 3;
 
     // This is a slightly more compact way to compute the four flags for
@@ -22,50 +22,52 @@ pub fn cossin(phase: i32) -> (i32, i32) {
     let mut octant = (phase as u32) >> OCTANT_BITS;
     octant ^= octant << 1;
 
-    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
-    let mut phase = phase & ((1 << OCTANT_BITS) - 1);
-
+    let mut phase = phase;
     if octant & 1 != 0 {
         // phase = pi/4 - phase
-        phase = (1 << OCTANT_BITS) - 1 - phase;
+        phase = !phase;
     }
 
-    let lookup = COSSIN[(phase >> (OCTANT_BITS - COSSIN_DEPTH)) as usize];
-    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
-    // values and scale the sine values as encoded in the LUT.
-    let mut cos = lookup.0 as i32 + u16::MAX as i32;
-    let mut sin = (lookup.1 as i32) << 1;
+    // Mask off octant bits. This leaves the angle in the range [0, pi/4).
+    phase &= (1 << OCTANT_BITS) - 1;
 
     // 16 + 1 bits for cos/sin and 15 for dphi to saturate the i32 range.
     const ALIGN_MSB: usize = 32 - 16 - 1;
     phase >>= OCTANT_BITS - COSSIN_DEPTH - ALIGN_MSB;
+
+    let lookup = COSSIN[(phase >> ALIGN_MSB) as usize];
     phase &= (1 << ALIGN_MSB) - 1;
+
     // The phase values used for the LUT are at midpoint for the truncated phase.
     // Interpolate relative to the LUT entry midpoint.
+    // Also cancel the -1 bias that was conditionally introduced above.
     phase -= (1 << (ALIGN_MSB - 1)) - (octant & 1) as i32;
-    // Fixed point pi/4.
-    const PI4: i32 = (PI / 4. * (1 << (32 - ALIGN_MSB)) as f64) as i32;
-    // No rounding bias necessary here since we keep enough low bits.
-    let dphi = (phase * PI4) >> (32 - ALIGN_MSB);
 
-    // Make room for the sign bit.
-    let dcos = (sin * dphi) >> (COSSIN_DEPTH + 1);
+    // Fixed point pi/4.
+    const PI4: i32 = (PI / 4. * (1 << 16) as f64) as i32;
+    // No rounding bias necessary here since we keep enough low bits.
+    let dphi = (phase * PI4) >> 16;
+
+    // 1/2 < cos(0 <= x <= pi/4) <= 1: Shift the cos
+    // values and scale the sine values as encoded in the LUT.
+    let mut cos = (lookup & 0xffff) as i32 + (1 << 16);
+    let mut sin = (lookup >> 16) as i32;
+
+    let dcos = (sin * dphi) >> COSSIN_DEPTH;
     let dsin = (cos * dphi) >> (COSSIN_DEPTH + 1);
 
     cos = (cos << (ALIGN_MSB - 1)) - dcos;
-    sin = (sin << (ALIGN_MSB - 1)) + dsin;
+    sin = (sin << ALIGN_MSB) + dsin;
 
     // Unmap using octant bits.
     if octant & 2 != 0 {
         core::mem::swap(&mut sin, &mut cos);
     }
-
     if octant & 4 != 0 {
-        cos *= -1;
+        cos = -cos;
     }
-
     if octant & 8 != 0 {
-        sin *= -1;
+        sin = -sin;
     }
 
     (cos, sin)
diff --git a/dsp/src/iir_int.rs b/dsp/src/iir_int.rs
index 4af50f1..02fdeb1 100644
--- a/dsp/src/iir_int.rs
+++ b/dsp/src/iir_int.rs
@@ -1,5 +1,6 @@
 use core::f64::consts::PI;
-use serde::{Deserialize, Serialize};
+use miniconf::StringSet;
+use serde::Deserialize;
 
 /// Generic vector for integer IIR filter.
 /// This struct is used to hold the x/y input/output data vector or the b/a coefficient
@@ -55,7 +56,7 @@ fn macc(y0: i32, x: &[i32], a: &[i32], shift: u32) -> i32 {
 /// See `dsp::iir::IIR` for general implementation details.
 /// Offset and limiting disabled to suit lowpass applications.
 /// Coefficient scaling fixed and optimized.
-#[derive(Copy, Clone, Default, Deserialize, Serialize)]
+#[derive(Copy, Clone, Default, Debug, StringSet, Deserialize)]
 pub struct IIR {
     pub ba: Vec5,
     // pub y_offset: i32,
diff --git a/dsp/src/lockin.rs b/dsp/src/lockin.rs
index 6a977b4..613a133 100644
--- a/dsp/src/lockin.rs
+++ b/dsp/src/lockin.rs
@@ -1,12 +1,12 @@
 use super::{Complex, ComplexExt, Lowpass, MulScaled};
-use generic_array::typenum::U2;
+use generic_array::ArrayLength;
 
 #[derive(Clone, Default)]
-pub struct Lockin {
-    state: [Lowpass<U2>; 2],
+pub struct Lockin<N: ArrayLength<i32>> {
+    state: [Lowpass<N>; 2],
 }
 
-impl Lockin {
+impl<N: ArrayLength<i32>> Lockin<N> {
     /// Update the lockin with a sample taken at a given phase.
     pub fn update(&mut self, sample: i32, phase: i32, k: u8) -> Complex<i32> {
         // Get the LO signal for demodulation and mix the sample;
diff --git a/dsp/src/lowpass.rs b/dsp/src/lowpass.rs
index 5ab803d..10d5838 100644
--- a/dsp/src/lowpass.rs
+++ b/dsp/src/lowpass.rs
@@ -27,10 +27,10 @@ impl<N: ArrayLength<i32>> Lowpass<N> {
         // Note T-DF-I and the zeros at Nyquist.
         let mut x = x;
         for y in self.y.iter_mut() {
-            let dy = x.saturating_sub(*y).saturating_add(1 << (k - 1)) >> k;
+            let dy = x.saturating_sub(*y) >> k;
             *y += dy;
             x = *y - (dy >> 1);
         }
-        x
+        x.saturating_add((self.y.len() as i32) << (k - 1))
     }
 }
diff --git a/src/bin/lockin-external.rs b/src/bin/lockin-external.rs
index 3b7c6a8..1b11c76 100644
--- a/src/bin/lockin-external.rs
+++ b/src/bin/lockin-external.rs
@@ -2,14 +2,12 @@
 #![no_std]
 #![no_main]
 
-use stm32h7xx_hal as hal;
-
-use stabilizer::{hardware, hardware::design_parameters};
-
 use dsp::{Accu, Complex, ComplexExt, Lockin, RPLL};
+use generic_array::typenum::U4;
 use hardware::{
     Adc0Input, Adc1Input, Dac0Output, Dac1Output, InputStamper, AFE0, AFE1,
 };
+use stabilizer::{hardware, hardware::design_parameters};
 
 #[rtic::app(device = stm32h7xx_hal::stm32, peripherals = true, monotonic = rtic::cyccnt::CYCCNT)]
 const APP: () = {
@@ -20,7 +18,7 @@ const APP: () = {
 
         timestamper: InputStamper,
         pll: RPLL,
-        lockin: Lockin,
+        lockin: Lockin<U4>,
     }
 
     #[init]
@@ -88,22 +86,20 @@ const APP: () = {
             .map(|t| t as i32);
         let (pll_phase, pll_frequency) = c.resources.pll.update(
             timestamp,
-            21, // frequency settling time (log2 counter cycles), TODO: expose
-            21, // phase settling time, TODO: expose
+            21, // frequency settling time (log2 counter cycles),
+            21, // phase settling time
         );
 
         // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate)
-        let harmonic: i32 = -1; // TODO: expose
+        let harmonic: i32 = -1;
 
         // Demodulation LO phase offset
-        let phase_offset: i32 = 0; // TODO: expose
+        let phase_offset: i32 = 0;
 
         // Log2 lowpass time constant
-        let time_constant: u8 = 6; // TODO: expose
+        let time_constant: u8 = 6;
 
         let sample_frequency = ((pll_frequency
-            // half-up rounding bias
-            // .wrapping_add(1 << design_parameters::SAMPLE_BUFFER_SIZE_LOG2 - 1)
             >> design_parameters::SAMPLE_BUFFER_SIZE_LOG2)
             as i32)
             .wrapping_mul(harmonic);
@@ -131,7 +127,7 @@ const APP: () = {
             Quadrature,
         }
 
-        let conf = Conf::FrequencyDiscriminator; // TODO: expose
+        let conf = Conf::FrequencyDiscriminator;
         let output = match conf {
             // Convert from IQ to power and phase.
             Conf::PowerPhase => [(output.log2() << 24) as _, output.arg()],
@@ -149,14 +145,13 @@ const APP: () = {
     #[idle(resources=[afes])]
     fn idle(_: idle::Context) -> ! {
         loop {
-            // TODO: Implement network interface.
             cortex_m::asm::wfi();
         }
     }
 
     #[task(binds = ETH, priority = 1)]
     fn eth(_: eth::Context) {
-        unsafe { hal::ethernet::interrupt_handler() }
+        unsafe { stm32h7xx_hal::ethernet::interrupt_handler() }
     }
 
     #[task(binds = SPI2, priority = 3)]
@@ -166,7 +161,7 @@ const APP: () = {
 
     #[task(binds = SPI3, priority = 3)]
     fn spi3(_: spi3::Context) {
-        panic!("ADC0 input overrun");
+        panic!("ADC1 input overrun");
     }
 
     #[task(binds = SPI4, priority = 3)]
diff --git a/src/bin/lockin-internal.rs b/src/bin/lockin-internal.rs
index 1f9df6e..ee9da2f 100644
--- a/src/bin/lockin-internal.rs
+++ b/src/bin/lockin-internal.rs
@@ -3,6 +3,7 @@
 #![no_main]
 
 use dsp::{Accu, Complex, ComplexExt, Lockin};
+use generic_array::typenum::U2;
 use hardware::{Adc1Input, Dac0Output, Dac1Output, AFE0, AFE1};
 use stabilizer::{hardware, hardware::design_parameters};
 
@@ -20,7 +21,7 @@ const APP: () = {
         adc: Adc1Input,
         dacs: (Dac0Output, Dac1Output),
 
-        lockin: Lockin,
+        lockin: Lockin<U2>,
     }
 
     #[init]
@@ -44,24 +45,13 @@ const APP: () = {
         }
     }
 
-    /// Main DSP processing routine for Stabilizer.
+    /// Main DSP processing routine.
     ///
-    /// # Note
-    /// Processing time for the DSP application code is bounded by the following constraints:
+    /// See `dual-iir` for general notes on processing time and timing.
     ///
-    /// DSP application code starts after the ADC has generated a batch of samples and must be
-    /// completed by the time the next batch of ADC samples has been acquired (plus the FIFO buffer
-    /// time). If this constraint is not met, firmware will panic due to an ADC input overrun.
-    ///
-    /// The DSP application code must also fill out the next DAC output buffer in time such that the
-    /// DAC can switch to it when it has completed the current buffer. If this constraint is not met
-    /// it's possible that old DAC codes will be generated on the output and the output samples will
-    /// be delayed by 1 batch.
-    ///
-    /// Because the ADC and DAC operate at the same rate, these two constraints actually implement
-    /// the same time bounds, meeting one also means the other is also met.
-    ///
-    /// TODO: Document
+    /// This is an implementation of an internal-reference lockin on the ADC1 signal.
+    /// The reference at f_sample/8 is output on DAC0 and the phase of the demodulated
+    /// signal on DAC1.
     #[task(binds=DMA1_STR4, resources=[adc, dacs, lockin], priority=2)]
     fn process(c: process::Context) {
         let lockin = c.resources.lockin;
@@ -71,29 +61,23 @@ const APP: () = {
             c.resources.dacs.1.acquire_buffer(),
         ];
 
-        // DAC0 always generates a fixed sinusoidal output.
-        dac_samples[0]
-            .iter_mut()
-            .zip(DAC_SEQUENCE.iter())
-            .for_each(|(d, s)| *d = *s as u16 ^ 0x8000);
-
         // Reference phase and frequency are known.
-        let pll_phase = 0;
+        let pll_phase = 0i32;
         let pll_frequency =
             1i32 << (32 - design_parameters::SAMPLE_BUFFER_SIZE_LOG2);
 
-        // Harmonic index of the LO: -1 to _de_modulate the fundamental
-        let harmonic: i32 = -1; // TODO: expose
+        // Harmonic index of the LO: -1 to _de_modulate the fundamental (complex conjugate)
+        let harmonic: i32 = -1;
 
         // Demodulation LO phase offset
-        let phase_offset: i32 = (0.25 * i32::MAX as f32) as i32; // TODO: expose
+        let phase_offset: i32 = 1 << 30;
 
         // Log2 lowpass time constant.
         let time_constant: u8 = 8;
 
         let sample_frequency = (pll_frequency as i32).wrapping_mul(harmonic);
-        let sample_phase = phase_offset
-            .wrapping_add((pll_phase as i32).wrapping_mul(harmonic));
+        let sample_phase =
+            phase_offset.wrapping_add(pll_phase.wrapping_mul(harmonic));
 
         let output: Complex<i32> = adc_samples
             .iter()
@@ -109,15 +93,17 @@ const APP: () = {
             .unwrap()
             * 2; // Full scale assuming the 2f component is gone.
 
-        for value in dac_samples[1].iter_mut() {
-            *value = (output.arg() >> 16) as u16 ^ 0x8000;
+        // Convert to DAC data.
+        for (i, data) in DAC_SEQUENCE.iter().enumerate() {
+            // DAC0 always generates a fixed sinusoidal output.
+            dac_samples[0][i] = *data as u16 ^ 0x8000;
+            dac_samples[1][i] = (output.arg() >> 16) as u16 ^ 0x8000;
         }
     }
 
     #[idle(resources=[afes])]
     fn idle(_: idle::Context) -> ! {
         loop {
-            // TODO: Implement network interface.
             cortex_m::asm::wfi();
         }
     }