fir: add ParallelFIR and test

2016-12-08 13:05:13 +01:00 · 2016-12-08 13:05:13 +01:00 · d303225249
parent 7e0f3edca5
commit d303225249
2 changed files with 69 additions and 10 deletions
--- a/artiq/gateware/dsp/fir.py
+++ b/artiq/gateware/dsp/fir.py
@ -45,24 +45,64 @@ class FIR(Module):
        self.width = width
        self.i = Signal((width, True))
        self.o = Signal((width, True))
-        self.latency = (len(coefficients) + 1)//2 + 1
+        n = len(coefficients)
        self.latency = (n + 1)//2 + 1
        ###
-        n = len(coefficients)
+        # Delay line: increasing delay
        x = [Signal((width, True)) for _ in range(n)]
-        self.comb += x[0].eq(self.i)
+        self.sync += [xi.eq(xj) for xi, xj in zip(x, [self.i] + x)]
        self.sync += [x[i + 1].eq(x[i]) for i in range(n - 1)]
        # Wire up output
        o = []
        for i, c in enumerate(coefficients):
            # simplify for halfband and symmetric filters
-            if c == 0 or c in coefficients[:i]:
+            if c == 0 or c in coefficients[i + 1:]:
                continue
            o.append(c*reduce(add, [
-                xj for xj, cj in zip(x, coefficients) if cj == c
+                xj for xj, cj in zip(x[::-1], coefficients) if cj == c
            ]))
        if shift is None:
            shift = width - 1
        self.sync += self.o.eq(reduce(add, o) >> shift)
 class ParallelFIR(Module):
    """Full-rate parallelized finite impulse response filter.
    :param coefficients: integer taps.
    :param parallelism: number of samples per cycle.
    :param width: bit width of input and output.
    :param shift: scale factor (as power of two).
    """
    def __init__(self, coefficients, parallelism, width=16, shift=None):
        self.width = width
        self.parallelism = p = parallelism
        n = len(coefficients)
        # input and output: old to young, decreasing delay
        self.i = [Signal((width, True)) for i in range(p)]
        self.o = [Signal((width, True)) for i in range(p)]
        self.latency = (n + 1)//2//parallelism + 2  # minus .5
        ###
        # Delay line: young to old, increasing delay
        x = [Signal((width, True)) for _ in range(n + p - 1)]
        self.sync += [xi.eq(xj) for xi, xj in zip(x, self.i[::-1] + x)]
        if shift is None:
            shift = width - 1
        # wire up each output
        for j in range(p):
            o = []
            for i, c in enumerate(coefficients):
                # simplify for halfband and symmetric filters
                if c == 0 or c in coefficients[i + 1:]:
                    continue
                o.append(c*reduce(add, [
                    xj for xj, cj in zip(x[-1 - j::-1], coefficients) if cj == c
                ]))
            self.sync += self.o[j].eq(reduce(add, o) >> shift)
--- a/artiq/test/gateware/test_fir.py
+++ b/artiq/test/gateware/test_fir.py
@ -19,8 +19,8 @@ class Transfer(Module):
        for i in range(self.dut.latency):
            yield
        for i in range(len(y)):
            y[i] = (yield self.dut.o)
            yield
            y[i] = (yield self.dut.o)
    def run(self, samples, amplitude=1.):
        w = 2**(self.dut.width - 1) - 1
@ -63,12 +63,31 @@ class Transfer(Module):
        return fig
 class ParallelTransfer(Transfer):
    def drive(self, x):
        for xi in x.reshape(-1, self.dut.parallelism):
            yield [ij.eq(int(xj)) for ij, xj in zip(self.dut.i, xi)]
            yield
    def record(self, y):
        for i in range(self.dut.latency):
            yield
        for yi in y.reshape(-1, self.dut.parallelism):
            yield
            yi[:] = (yield from [(yield o) for o in self.dut.o])
 def _main():
    coeff = fir.halfgen4(.4/2, 8)
    coeff_int = [int(round(c * (1 << 16 - 1))) for c in coeff]
    if False:
        dut = fir.FIR(coeff_int, width=16)
        # print(verilog.convert(dut, ios={dut.i, dut.o}))
        tb = Transfer(dut)
    else:
        dut = fir.ParallelFIR(coeff_int, parallelism=4, width=16)
        # print(verilog.convert(dut, ios=set(dut.i + dut.o)))
        tb = ParallelTransfer(dut)
    x, y = tb.run(samples=1 << 10, amplitude=.8)
    tb.analyze(x, y)
    plt.show()