fir: automatically use transposed topology

2024-12-28 20:53:35 +08:00 · 2016-12-14 19:15:50 +01:00 · 2016-12-14 19:15:50 +01:00 · 115ea67860
commit 115ea67860
parent a451b675c9
1 changed files with 54 additions and 32 deletions
--- a/artiq/gateware/dsp/fir.py
+++ b/artiq/gateware/dsp/fir.py
@ -1,5 +1,6 @@
 from operator import add
 from functools import reduce
+from collections import namedtuple
 import numpy as np
 from migen import *

@ -38,7 +39,10 @@ def halfgen4(width, n):
 class FIR(Module):
    """Full-rate finite impulse response filter.

-    :param coefficients: integer taps.
+    Tries to use transposed form (adder chain instead of adder tree)
+    as much as possible.
+
+    :param coefficients: integer taps, increasing delay.
    :param width: bit width of input and output.
    :param shift: scale factor (as power of two).
    """
@ -47,37 +51,46 @@ class FIR(Module):
        self.i = Signal((width, True))
        self.o = Signal((width, True))
        n = len(coefficients)
-        self.latency = (n + 1)//2 + 2
+        self.latency = n//2 + 3

        ###

+        if shift is None:
+            shift = bits_for(sum(abs(c) for c in coefficients)) - 1
+
        # Delay line: increasing delay
        x = [Signal((width, True)) for _ in range(n)]
        self.sync += [xi.eq(xj) for xi, xj in zip(x, [self.i] + x)]

-        if shift is None:
-            shift = width - 1
-
+        o = Signal((width + shift + 1, True))
+        self.comb += self.o.eq(o >> shift)
+        delay = -1
        # Make products
-        o = []
        for i, c in enumerate(coefficients):
            # simplify for halfband and symmetric filters
-            if c == 0 or c in coefficients[i + 1:]:
+            if not c or c in coefficients[:i]:
                continue
-            m = Signal((width + shift, True))
-            self.sync += m.eq(c*reduce(add, [
-                xj for xj, cj in zip(x[::-1], coefficients) if cj == c
-            ]))
-            o.append(m)
-
-        # Make sum
-        self.sync += self.o.eq(reduce(add, o) >> shift)
+            js = [j for j, cj in enumerate(coefficients) if cj == c]
+            m = Signal.like(o)
+            o0, o = o, Signal.like(o)
+            if delay < js[0]:
+                self.sync += o0.eq(o + m)
+                delay += 1
+            else:
+                self.comb += o0.eq(o + m)
+            assert js[0] - delay >= 0
+            self.sync += m.eq(c*reduce(add, [x[j - delay] for j in js]))
+        # symmetric rounding
+        if shift:
+            self.comb += o.eq((1 << shift - 1) - 1)


 class ParallelFIR(Module):
    """Full-rate parallelized finite impulse response filter.

-    :param coefficients: integer taps.
+    Tries to use transposed form as much as possible.
+
+    :param coefficients: integer taps, increasing delay.
    :param parallelism: number of samples per cycle.
    :param width: bit width of input and output.
    :param shift: scale factor (as power of two).
@ -86,34 +99,43 @@ class ParallelFIR(Module):
        self.width = width
        self.parallelism = p = parallelism
        n = len(coefficients)
-        # input and output: old to young, decreasing delay
+        # input and output: old to new, decreasing delay
        self.i = [Signal((width, True)) for i in range(p)]
        self.o = [Signal((width, True)) for i in range(p)]
-        self.latency = (n + 1)//2//parallelism + 3  # minus one sample
+        self.latency = (n + 1)//2//p + 2
+        # ... plus one sample

        ###

-        # Delay line: young to old, increasing delay
+        if shift is None:
+            shift = bits_for(sum(abs(c) for c in coefficients)) - 1
+
+        # Delay line: increasing delay
        x = [Signal((width, True)) for _ in range(n + p - 1)]
        self.sync += [xi.eq(xj) for xi, xj in zip(x, self.i[::-1] + x)]

-        if shift is None:
-            shift = width - 1
-
-        for j in range(p):
+        for delay in range(p):
+            o = Signal((width + shift + 1, True))
+            self.comb += self.o[delay].eq(o >> shift)
            # Make products
-            o = []
            for i, c in enumerate(coefficients):
                # simplify for halfband and symmetric filters
-                if c == 0 or c in coefficients[i + 1:]:
+                if not c or c in coefficients[:i]:
                    continue
-                m = Signal((width + shift, True))
-                self.sync += m.eq(c*reduce(add, [
-                    xj for xj, cj in zip(x[-1 - j::-1], coefficients) if cj == c
-                ]))
-                o.append(m)
-            # Make sum
-            self.sync += self.o[j].eq(reduce(add, o) >> shift)
+                js = [j + p - 1 for j, cj in enumerate(coefficients)
+                      if cj == c]
+                m = Signal.like(o)
+                o0, o = o, Signal.like(o)
+                if delay + p <= js[0]:
+                    self.sync += o0.eq(o + m)
+                    delay += p
+                else:
+                    self.comb += o0.eq(o + m)
+                assert js[0] - delay >= 0
+                self.sync += m.eq(c*reduce(add, [x[j - delay] for j in js]))
+            # symmetric rounding
+            if shift:
+                self.comb += o.eq((1 << shift - 1) - 1)


 def halfgen4_cascade(rate, width, order=None):