forked from M-Labs/artiq
fir: streamline, optimize DSP extraction, left-align inputs
This commit is contained in:
parent
cfb66117af
commit
f5f662200b
|
@ -1,6 +1,10 @@
|
||||||
|
from math import floor
|
||||||
from operator import add
|
from operator import add
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from migen import *
|
from migen import *
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,56 +44,11 @@ def halfgen4(width, n, df=1e-3):
|
||||||
return a
|
return a
|
||||||
|
|
||||||
|
|
||||||
class FIR(Module):
|
_Widths = namedtuple("_Widths", "A B P")
|
||||||
"""Full-rate finite impulse response filter.
|
|
||||||
|
|
||||||
Tries to use transposed form (adder chain instead of adder tree)
|
_widths = {
|
||||||
as much as possible.
|
"DSP48E1": _Widths(25, 18, 48),
|
||||||
|
}
|
||||||
:param coefficients: integer taps, increasing delay.
|
|
||||||
:param width: bit width of input and output.
|
|
||||||
:param shift: scale factor (as power of two).
|
|
||||||
"""
|
|
||||||
def __init__(self, coefficients, width=16, shift=None):
|
|
||||||
self.width = width
|
|
||||||
self.i = Signal((width, True))
|
|
||||||
self.o = Signal((width, True))
|
|
||||||
n = len(coefficients)
|
|
||||||
self.latency = n//2 + 3
|
|
||||||
|
|
||||||
###
|
|
||||||
|
|
||||||
if shift is None:
|
|
||||||
shift = bits_for(sum(abs(c) for c in coefficients)) - 1
|
|
||||||
|
|
||||||
# Delay line: increasing delay
|
|
||||||
x = [Signal((width, True)) for _ in range(n)]
|
|
||||||
self.sync += [xi.eq(xj) for xi, xj in zip(x, [self.i] + x)]
|
|
||||||
|
|
||||||
o = Signal((width + shift + 1, True))
|
|
||||||
self.comb += self.o.eq(o >> shift)
|
|
||||||
delay = -1
|
|
||||||
# Make products
|
|
||||||
for i, c in enumerate(coefficients):
|
|
||||||
# simplify for halfband and symmetric filters
|
|
||||||
if not c or c in coefficients[:i]:
|
|
||||||
continue
|
|
||||||
js = [j for j, cj in enumerate(coefficients) if cj == c]
|
|
||||||
m = Signal.like(o)
|
|
||||||
o0, o = o, Signal.like(o)
|
|
||||||
if delay < js[0]:
|
|
||||||
self.sync += o0.eq(o + m)
|
|
||||||
delay += 1
|
|
||||||
else:
|
|
||||||
self.comb += o0.eq(o + m)
|
|
||||||
assert js[0] - delay >= 0
|
|
||||||
xs = [x[j - delay] for j in js]
|
|
||||||
s = Signal((bits_for(len(xs)) - 1 + len(xs[0]), True))
|
|
||||||
self.comb += s.eq(sum(xs))
|
|
||||||
self.sync += m.eq(c*s)
|
|
||||||
# symmetric rounding
|
|
||||||
if shift:
|
|
||||||
self.comb += o.eq((1 << shift - 1) - 1)
|
|
||||||
|
|
||||||
|
|
||||||
class ParallelFIR(Module):
|
class ParallelFIR(Module):
|
||||||
|
@ -97,12 +56,14 @@ class ParallelFIR(Module):
|
||||||
|
|
||||||
Tries to use transposed form as much as possible.
|
Tries to use transposed form as much as possible.
|
||||||
|
|
||||||
:param coefficients: integer taps, increasing delay.
|
:param coefficients: tap coefficients (normalized to 1.),
|
||||||
|
increasing delay.
|
||||||
:param parallelism: number of samples per cycle.
|
:param parallelism: number of samples per cycle.
|
||||||
:param width: bit width of input and output.
|
:param width: bit width of input and output.
|
||||||
:param shift: scale factor (as power of two).
|
:param arch: architecture (default: "DSP48E1").
|
||||||
"""
|
"""
|
||||||
def __init__(self, coefficients, parallelism, width=16, shift=None):
|
def __init__(self, coefficients, parallelism, width=16,
|
||||||
|
arch="DSP48E1"):
|
||||||
self.width = width
|
self.width = width
|
||||||
self.parallelism = p = parallelism
|
self.parallelism = p = parallelism
|
||||||
n = len(coefficients)
|
n = len(coefficients)
|
||||||
|
@ -111,45 +72,60 @@ class ParallelFIR(Module):
|
||||||
self.o = [Signal((width, True)) for i in range(p)]
|
self.o = [Signal((width, True)) for i in range(p)]
|
||||||
self.latency = (n + 1)//2//p + 2
|
self.latency = (n + 1)//2//p + 2
|
||||||
# ... plus one sample
|
# ... plus one sample
|
||||||
|
w = _widths[arch]
|
||||||
|
|
||||||
|
c_max = max(abs(c) for c in coefficients)
|
||||||
|
c_shift = bits_for(floor((1 << w.B - 2) / c_max))
|
||||||
|
self.coefficients = cs = [int(round(c*(1 << c_shift)))
|
||||||
|
for c in coefficients]
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
if shift is None:
|
|
||||||
shift = bits_for(sum(abs(c) for c in coefficients)) - 1
|
|
||||||
|
|
||||||
# Delay line: increasing delay
|
# Delay line: increasing delay
|
||||||
x = [Signal((width, True)) for _ in range(n + p - 1)]
|
x = [Signal((w.A, True)) for _ in range(n + p - 1)]
|
||||||
self.sync += [xi.eq(xj) for xi, xj in zip(x, self.i[::-1] + x)]
|
x_shift = w.A - width - bits_for(
|
||||||
|
max(cs.count(c) for c in cs if c) - 1)
|
||||||
|
for xi, xj in zip(x, self.i[::-1]):
|
||||||
|
self.sync += xi.eq(xj << x_shift)
|
||||||
|
for xi, xj in zip(x[len(self.i):], x):
|
||||||
|
self.sync += xi.eq(xj)
|
||||||
|
|
||||||
for delay in range(p):
|
for delay in range(p):
|
||||||
o = Signal((width + shift + 1, True))
|
o = Signal((w.P, True))
|
||||||
self.comb += self.o[delay].eq(o >> shift)
|
self.comb += self.o[delay].eq(o >> c_shift + x_shift)
|
||||||
# Make products
|
# Make products
|
||||||
for i, c in enumerate(coefficients):
|
for i, c in enumerate(cs):
|
||||||
# simplify for halfband and symmetric filters
|
# simplify for halfband and symmetric filters
|
||||||
if not c or c in coefficients[:i]:
|
if not c or c in cs[:i]:
|
||||||
continue
|
continue
|
||||||
js = [j + p - 1 for j, cj in enumerate(coefficients)
|
js = [j + p - 1 for j, cj in enumerate(cs) if cj == c]
|
||||||
if cj == c]
|
|
||||||
m = Signal.like(o)
|
m = Signal.like(o)
|
||||||
o0, o = o, Signal.like(o)
|
o0, o = o, Signal.like(o)
|
||||||
|
q = Signal.like(x[0])
|
||||||
if delay + p <= js[0]:
|
if delay + p <= js[0]:
|
||||||
self.sync += o0.eq(o + m)
|
self.sync += o0.eq(o + m)
|
||||||
delay += p
|
delay += p
|
||||||
else:
|
else:
|
||||||
self.comb += o0.eq(o + m)
|
self.comb += o0.eq(o + m)
|
||||||
assert js[0] - delay >= 0
|
assert js[0] - delay >= 0
|
||||||
xs = [x[j - delay] for j in js]
|
self.comb += q.eq(reduce(add, [x[j - delay] for j in js]))
|
||||||
s = Signal((bits_for(len(xs)) - 1 + len(xs[0]), True))
|
self.sync += m.eq(c*q)
|
||||||
self.comb += s.eq(sum(xs))
|
|
||||||
self.sync += m.eq(c*s)
|
|
||||||
# symmetric rounding
|
# symmetric rounding
|
||||||
if shift:
|
if c_shift + x_shift > 1:
|
||||||
self.comb += o.eq((1 << shift - 1) - 1)
|
self.comb += o.eq((1 << c_shift + x_shift - 1) - 1)
|
||||||
|
|
||||||
|
|
||||||
|
class FIR(ParallelFIR):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(self, *args, parallelism=1, **kwargs)
|
||||||
|
self.i = self.i[0]
|
||||||
|
self.o = self.o[0]
|
||||||
|
|
||||||
|
|
||||||
def halfgen4_cascade(rate, width, order=None):
|
def halfgen4_cascade(rate, width, order=None):
|
||||||
"""Generate coefficients for cascaded half-band filters.
|
"""Generate coefficients for cascaded half-band filters.
|
||||||
|
Coefficients are normalized to a gain of two per stage to compensate for
|
||||||
|
the zero stuffing.
|
||||||
|
|
||||||
:param rate: upsampling rate. power of two
|
:param rate: upsampling rate. power of two
|
||||||
:param width: passband/stopband width in units of input sampling rate.
|
:param width: passband/stopband width in units of input sampling rate.
|
||||||
|
@ -160,7 +136,7 @@ def halfgen4_cascade(rate, width, order=None):
|
||||||
p = 1
|
p = 1
|
||||||
while p < rate:
|
while p < rate:
|
||||||
p *= 2
|
p *= 2
|
||||||
coeff.append(halfgen4(width*p/rate/2, order*p//rate))
|
coeff.append(2*halfgen4(width*p/rate/2, order*p//rate))
|
||||||
return coeff
|
return coeff
|
||||||
|
|
||||||
|
|
||||||
|
@ -170,8 +146,8 @@ class ParallelHBFUpsampler(Module):
|
||||||
Coefficients should be normalized to overall gain of 2
|
Coefficients should be normalized to overall gain of 2
|
||||||
(highest/center coefficient being 1)."""
|
(highest/center coefficient being 1)."""
|
||||||
def __init__(self, coefficients, width=16, **kwargs):
|
def __init__(self, coefficients, width=16, **kwargs):
|
||||||
self.parallelism = 1
|
self.parallelism = 1 # accumulate
|
||||||
self.latency = 0
|
self.latency = 0 # accumulate
|
||||||
self.width = width
|
self.width = width
|
||||||
self.i = Signal((width, True))
|
self.i = Signal((width, True))
|
||||||
|
|
||||||
|
@ -180,7 +156,6 @@ class ParallelHBFUpsampler(Module):
|
||||||
i = [self.i]
|
i = [self.i]
|
||||||
for coeff in coefficients:
|
for coeff in coefficients:
|
||||||
self.parallelism *= 2
|
self.parallelism *= 2
|
||||||
# assert coeff[len(coeff)//2 + 1] == 1
|
|
||||||
hbf = ParallelFIR(coeff, self.parallelism, width, **kwargs)
|
hbf = ParallelFIR(coeff, self.parallelism, width, **kwargs)
|
||||||
self.submodules += hbf
|
self.submodules += hbf
|
||||||
self.comb += [a.eq(b) for a, b in zip(hbf.i[::2], i)]
|
self.comb += [a.eq(b) for a, b in zip(hbf.i[::2], i)]
|
||||||
|
|
|
@ -128,10 +128,8 @@ class Channel(Module, SatAddMixin):
|
||||||
|
|
||||||
self.submodules.a1 = a1 = SplineParallelDDS(widths, orders)
|
self.submodules.a1 = a1 = SplineParallelDDS(widths, orders)
|
||||||
self.submodules.a2 = a2 = SplineParallelDDS(widths, orders)
|
self.submodules.a2 = a2 = SplineParallelDDS(widths, orders)
|
||||||
coeff = [[int(round((1 << 18)*ci)) for ci in c]
|
coeff = halfgen4_cascade(parallelism, width=.4, order=8)
|
||||||
for c in halfgen4_cascade(parallelism, width=.4, order=8)]
|
hbf = [ParallelHBFUpsampler(coeff, width=width) for i in range(2)]
|
||||||
hbf = [ParallelHBFUpsampler(coeff, width=width, shift=17)
|
|
||||||
for i in range(2)]
|
|
||||||
self.submodules.b = b = SplineParallelDUC(
|
self.submodules.b = b = SplineParallelDUC(
|
||||||
widths._replace(a=len(hbf[0].o[0]), f=widths.f - width), orders,
|
widths._replace(a=len(hbf[0].o[0]), f=widths.f - width), orders,
|
||||||
parallelism=parallelism)
|
parallelism=parallelism)
|
||||||
|
|
|
@ -11,16 +11,16 @@ class Transfer(Module):
|
||||||
self.submodules.dut = dut
|
self.submodules.dut = dut
|
||||||
|
|
||||||
def drive(self, x):
|
def drive(self, x):
|
||||||
for xi in x:
|
for xi in x.reshape(-1, self.dut.parallelism):
|
||||||
yield self.dut.i.eq(int(xi))
|
yield [ij.eq(int(xj)) for ij, xj in zip(self.dut.i, xi)]
|
||||||
yield
|
yield
|
||||||
|
|
||||||
def record(self, y):
|
def record(self, y):
|
||||||
for i in range(self.dut.latency):
|
for i in range(self.dut.latency):
|
||||||
yield
|
yield
|
||||||
for i in range(len(y)):
|
for yi in y.reshape(-1, self.dut.parallelism):
|
||||||
yield
|
yield
|
||||||
y[i] = (yield self.dut.o)
|
yi[:] = (yield from [(yield o) for o in self.dut.o])
|
||||||
|
|
||||||
def run(self, samples, amplitude=1.):
|
def run(self, samples, amplitude=1.):
|
||||||
w = 2**(self.dut.width - 1) - 1
|
w = 2**(self.dut.width - 1) - 1
|
||||||
|
@ -63,21 +63,7 @@ class Transfer(Module):
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
class ParallelTransfer(Transfer):
|
class UpTransfer(Transfer):
|
||||||
def drive(self, x):
|
|
||||||
for xi in x.reshape(-1, self.dut.parallelism):
|
|
||||||
yield [ij.eq(int(xj)) for ij, xj in zip(self.dut.i, xi)]
|
|
||||||
yield
|
|
||||||
|
|
||||||
def record(self, y):
|
|
||||||
for i in range(self.dut.latency):
|
|
||||||
yield
|
|
||||||
for yi in y.reshape(-1, self.dut.parallelism):
|
|
||||||
yield
|
|
||||||
yi[:] = (yield from [(yield o) for o in self.dut.o])
|
|
||||||
|
|
||||||
|
|
||||||
class UpTransfer(ParallelTransfer):
|
|
||||||
def drive(self, x):
|
def drive(self, x):
|
||||||
x = x.reshape(-1, len(self.dut.o))
|
x = x.reshape(-1, len(self.dut.o))
|
||||||
x[:, 1:] = 0
|
x[:, 1:] = 0
|
||||||
|
@ -94,21 +80,15 @@ class UpTransfer(ParallelTransfer):
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
def _main():
|
||||||
coeff = fir.halfgen4(.4/2, 8)
|
if True:
|
||||||
coeff_int = [int(round(c * (1 << 16 - 1))) for c in coeff]
|
coeff = fir.halfgen4_cascade(8, width=.4, order=8)
|
||||||
if False:
|
dut = fir.ParallelHBFUpsampler(coeff, width=16)
|
||||||
coeff = [[int(round((1 << 19) * ci)) for ci in c]
|
|
||||||
for c in fir.halfgen4_cascade(8, width=.4, order=8)]
|
|
||||||
dut = fir.ParallelHBFUpsampler(coeff, width=16, shift=18)
|
|
||||||
# print(verilog.convert(dut, ios=set([dut.i] + dut.o)))
|
# print(verilog.convert(dut, ios=set([dut.i] + dut.o)))
|
||||||
tb = UpTransfer(dut)
|
tb = UpTransfer(dut)
|
||||||
elif True:
|
|
||||||
dut = fir.ParallelFIR(coeff_int, parallelism=4, width=16)
|
|
||||||
# print(verilog.convert(dut, ios=set(dut.i + dut.o)))
|
|
||||||
tb = ParallelTransfer(dut)
|
|
||||||
else:
|
else:
|
||||||
dut = fir.FIR(coeff_int, width=16)
|
coeff = fir.halfgen4(.4/2, 8)
|
||||||
# print(verilog.convert(dut, ios={dut.i, dut.o}))
|
dut = fir.ParallelFIR(coeff, parallelism=4, width=16)
|
||||||
|
# print(verilog.convert(dut, ios=set(dut.i + dut.o)))
|
||||||
tb = Transfer(dut)
|
tb = Transfer(dut)
|
||||||
|
|
||||||
x, y = tb.run(samples=1 << 10, amplitude=.5)
|
x, y = tb.run(samples=1 << 10, amplitude=.5)
|
||||||
|
|
Loading…
Reference in New Issue