dsp: implement sawg features

2024-12-19 00:16:29 +08:00 · 2016-11-17 02:36:49 +01:00 · 2016-11-17 02:36:49 +01:00 · 51f23feeac
commit 51f23feeac
parent 98193d6fa1
1 changed files with 121 additions and 299 deletions
--- a/artiq/gateware/dsp/sawg.py
+++ b/artiq/gateware/dsp/sawg.py
@ -1,354 +1,176 @@
 from collections import namedtuple
 from migen import *
 from misoc.interconnect.stream import Endpoint
 from misoc.cores.cordic import Cordic
 from .accu import PhasedAccu, Accu
-from .tools import eqh, Delay
+from .tools import eqh, Delay, SatAddMixin
 from .spline import Spline
-class DDSFast(Module):
+_Widths = namedtuple("_Widths", "t a p f")
-    def __init__(self, width, parallelism=4):
+_Orders = namedtuple("_Orders", "a p f")
        a_width = width
        p_width = width
        f_width = 2*width
        self.o = [Signal((width, True)) for i in range(parallelism)]
-        self.width = width
+class ParallelDDS(Module):
    def __init__(self, widths, parallelism=1, a_delay=0):
        self.i = Endpoint([("x", widths.a), ("y", widths.a),
                           ("f", widths.f), ("p", widths.f), ("clr", 1)])
        self.parallelism = parallelism
-        self.latency = 1  # will be accumulated
+        self.widths = widths
        q = PhasedAccu(f_width, parallelism)
        self.submodules += q
        self.latency += q.latency
        self.a = Endpoint([("a", a_width)])
        self.f = Endpoint([("f", f_width)])
        self.p = Endpoint([("p", p_width)])
        self.i = [self.a, self.f, self.p]
        ###
-        a = Signal.like(self.a.a)
+        accu = PhasedAccu(widths.f, parallelism)
-        self.sync += [
+        cordic = [Cordic(width=widths.a, widthz=widths.p, guard=None,
-            If(self.a.stb,
+                         eval_mode="pipelined") for i in range(parallelism)]
-                a.eq(self.a.a)
+        self.xo = [c.xo for c in cordic]
-            ),
+        self.yo = [c.yo for c in cordic]
-            If(self.f.stb,
+        a_delay += accu.latency
-                eqh(q.i.f, self.f.f)
+        xy_delay = Delay(2*widths.a, max(0, a_delay))
-            ),
+        z_delay = Delay(parallelism*widths.p, max(0, -a_delay))
-            q.i.clr.eq(0),
+        self.submodules += accu, xy_delay, z_delay, cordic
-            If(self.p.stb,
+        self.latency = max(0, a_delay) + cordic[0].latency
-                eqh(q.i.p, self.p.p),
+        self.gain = cordic[0].gain
-                q.i.clr.eq(1)
+
            ),
            q.i.stb.eq(self.f.stb | self.p.stb),
        ]
        self.comb += [
-            self.a.ack.eq(1),
+            xy_delay.i.eq(Cat(self.i.x, self.i.y)),
-            self.f.ack.eq(1),
+            z_delay.i.eq(Cat([zi[-widths.p:]
-            self.p.ack.eq(1),
+                              for zi in accu.o.payload.flatten()])),
-            q.o.ack.eq(1),
+            eqh(accu.i.p, self.i.p),
            accu.i.f.eq(self.i.f),
            accu.i.clr.eq(self.i.clr),
            accu.i.stb.eq(self.i.stb),
            self.i.ack.eq(accu.i.ack),
            accu.o.ack.eq(1),
            [Cat(c.xi, c.yi).eq(xy_delay.o) for c in cordic],
            Cat([c.zi for c in cordic]).eq(z_delay.o),
        ]
        c = []
        for i in range(parallelism):
            ci = Cordic(width=width, widthz=p_width,
                        guard=None, eval_mode="pipelined")
            self.submodules += ci
            c.append(ci)
            qoi = getattr(q.o, "z{}".format(i))
            self.comb += [
                eqh(ci.xi, a),
                ci.yi.eq(0),
                eqh(ci.zi, qoi),
                eqh(self.o[i], ci.xo),
            ]
        self.latency += c[0].latency
        self.gain = c[0].gain
-
+class SplineParallelDUC(ParallelDDS):
-class DDSFast(Module):
+    def __init__(self, widths, orders, **kwargs):
-    def __init__(self, width, t_width=None,
+        p = Spline(order=orders.p, width=widths.p)
-                 a_width=None, p_width=None, f_width=None,
+        f = Spline(order=orders.f, width=widths.f)
-                 a_order=4, p_order=1, f_order=2, parallelism=8):
+        self.f = f.tri(widths.t)
-        if t_width is None:
+        self.p = p.tri(widths.t)
-            t_width = width
+        self.submodules += p, f
-        if a_width is None:
+        self.ce = Signal(reset=1)
-            a_width = width + (a_order - 1)*t_width
+        self.clr = Signal()
-        if p_width is None:
+        super().__init__(widths._replace(p=len(self.f.a0), f=len(self.f.a0)),
-            p_width = width + (p_order - 1)*t_width
+                         **kwargs)
-        if f_width is None:
+        self.latency += f.latency
            f_width = width + (f_order + 1)*t_width
        a = Spline(order=a_order, width=a_width)
        p = Spline(order=p_order, width=p_width)
        f = Spline(order=f_order, width=f_width)
        self.submodules += a, p, f
        self.a = a.tri(t_width)
        self.f = f.tri(t_width)
        self.p = p.tri(t_width)
        self.i = [self.a, self.f, self.p]
        self.o = [[Signal((width, True)) for i in range(2)]
                  for i in range(parallelism)]
        self.parallelism = parallelism
        self.latency = 0  # will be accumulated
        ###
-        self.latency += p.latency
+        assert p.latency == f.latency
        q = PhasedAccu(f_width, parallelism)
        self.submodules += q
        self.latency += q.latency
        da = [Signal((width, True)) for i in range(q.latency)]
-        self.sync += [
+        self.comb += [
-            If(q.i.stb & q.i.ack,
+            p.o.ack.eq(self.ce),
-                eqh(da[0], a.o.a0),
+            f.o.ack.eq(self.ce),
-                [da[i + 1].eq(da[i]) for i in range(len(da) - 1)],
+            eqh(self.i.f, f.o.a0),
-            ),
+            eqh(self.i.p, p.o.a0),
-            If(p.o.stb & p.o.ack,
+            self.i.clr.eq(self.clr),
-                q.i.clr.eq(0),
+            self.i.stb.eq(p.o.stb & f.o.stb),
            ),
            If(p.i.stb & p.i.ack,
                q.i.clr.eq(self.clr),
            ),
        ]
 class SplineParallelDDS(SplineParallelDUC):
    def __init__(self, widths, orders, **kwargs):
        a = Spline(order=orders.a, width=widths.a)
        self.a = a.tri(widths.t)
        self.submodules += a
        super().__init__(widths._replace(a=len(self.a.a0)),
                         orders, **kwargs)
        ###
        self.comb += [
            a.o.ack.eq(self.ce),
-            p.o.ack.eq(self.ce),
+            eqh(self.i.x, a.o.a0),
-            f.o.ack.eq(self.ce),
+            self.i.y.eq(0),
            q.i.stb.eq(self.ce),
            eqh(q.i.p, p.o.a0),
            q.i.f.eq(f.o.a0),
            q.o.ack.eq(1),
        ]
        c = []
        for i in range(parallelism):
            ci = Cordic(width=width, widthz=p_width,
                        guard=None, eval_mode="pipelined")
            self.submodules += ci
            c.append(ci)
            qoi = getattr(q.o, "z{}".format(i))
            self.comb += [
                ci.xi.eq(da[-1]),
                ci.yi.eq(0),
                eqh(ci.zi, qoi),
                eqh(self.o[i][0], ci.xo),
                eqh(self.o[i][1], ci.yo),
            ]
        self.latency += c[0].latency
        self.gain = c[0].gain
 class DDSSlow(Module):
    def __init__(self, width, t_width, a_width, p_width, f_width,
                 a_order=4, p_order=1, f_order=2):
        a = Spline(order=a_order, width=a_width)
        p = Spline(order=p_order, width=p_width)
        f = Spline(order=f_order, width=f_width)
        self.submodules += a, p, f
        self.a = a.tri(t_width)
        self.f = f.tri(t_width)
        self.p = p.tri(t_width)
        self.i = [self.a, self.f, self.p]
        self.i_names = "a f p".split()
        self.o = [Signal((width, True)) for i in range(2)]
        self.ce = Signal()
        self.clr = Signal()
        self.latency = 0  # will be accumulated
        ###
        self.latency += p.latency
        q = Accu(f_width)
        self.latency += q.latency
        da = CEInserter()(Delay)(width, q.latency)
        c = Cordic(width=width, widthz=p_width,
                   guard=None, eval_mode="pipelined")
        self.latency += c.latency
        self.gain = c.gain
        self.submodules += q, da, c
        self.sync += [
            If(p.o.stb & p.o.ack,
                q.i.clr.eq(0),
            ),
            If(p.i.stb & p.i.ack,
                q.i.clr.eq(self.clr),
            ),
        ]
        self.comb += [
            da.ce.eq(q.i.stb & q.i.ack),
            a.o.ack.eq(self.ce),
            p.o.ack.eq(self.ce),
            f.o.ack.eq(self.ce),
            q.i.stb.eq(self.ce),
            eqh(da.i, a.o.a0),
            eqh(q.i.p, p.o.a0),
            q.i.f.eq(f.o.a0),
            q.o.ack.eq(1),
            c.xi.eq(da.o),
            c.yi.eq(0),
            eqh(c.zi, q.o.z),
            eqh(self.o[0], c.xo),
            eqh(self.o[1], c.yo),
        ]
 class DDS(Module):
    def __init__(self, width, t_width=None,
                 a_width=None, p_width=None, f_width=None,
                 a_order=4, p_order=1, f_order=2, parallelism=8):
        if t_width is None:
            t_width = width
        if a_width is None:
            a_width = width + (a_order - 1)*t_width
        if p_width is None:
            p_width = width + (p_order - 1)*t_width
        if f_width is None:
            f_width = width + (f_order + 1)*t_width
        self.b = [DDSSlow(width, t_width, a_width, p_width, f_width, a_order,
                          p_order, f_order) for i in range(2)]
        p = Spline(order=1, width=p_width)
        f = Spline(order=1, width=f_width)
        self.submodules += self.b, p, f
        self.f0 = f.tri(t_width)
        self.p0 = p.tri(t_width)
        self.i = [self.f0, self.p0]
        self.i_names = "f0 p0".split()
        for i, bi in enumerate(self.b):
            self.i += bi.i
            for ii in bi.i_names:
                self.i_names.append("{}{}".format(ii, i + 1))
            for j in "afp":
                setattr(self, "{}{}".format(j, i + 1), getattr(bi, j))
        self.o = [[Signal((width, True)) for i in range(2)]
                  for i in range(parallelism)]
        self.ce = Signal()
        self.clr = Signal()
        self.parallelism = parallelism
        self.latency = 0  # will be accumulated
        ###
        self.latency += self.b[0].latency  # TODO: f0/p0, q.latency delta
        q = PhasedAccu(f_width, parallelism)
        self.submodules += q
        self.sync += [
            If(p.o.stb & p.o.ack,
                q.i.clr.eq(0),
            ),
            If(p.i.stb & p.i.ack,
                q.i.clr.eq(self.clr),
            ),
        ]
        self.comb += [
            [bi.ce.eq(self.ce) for bi in self.b],
            [bi.clr.eq(self.clr) for bi in self.b],
            p.o.ack.eq(self.ce),
            f.o.ack.eq(self.ce),
            q.i.stb.eq(self.ce),
            eqh(q.i.p, p.o.a0),
            eqh(q.i.f, f.o.a0),
            q.o.ack.eq(1),
        ]
        x = self.sat_add(bi.o[0] for bi in self.b)
        y = self.sat_add(bi.o[1] for bi in self.b)
        c = []
        for i in range(parallelism):
            ci = Cordic(width=width, widthz=p_width,
                        guard=None, eval_mode="pipelined")
            self.submodules += ci
            c.append(ci)
            qoi = getattr(q.o, "z{}".format(i))
            self.comb += [
                ci.xi.eq(x),
                ci.yi.eq(y),
                eqh(ci.zi, qoi),
                eqh(self.o[i][0], ci.xo),
                eqh(self.o[i][1], ci.yo),
            ]
        self.latency += c[0].latency
        self.gain = self.b[0].gain * c[0].gain
 class Config(Module):
    def __init__(self):
-        self.cfg = Record([("tap", 5), ("clr", 1), ("iq", 2)])
+        self.clr = Signal(4)
-        self.i = Endpoint(self.cfg.layout)
+        self.iq_en = Signal(2)
        limit = [Signal((16, True)) for i in range(2*2)]
        self.limit = [limit[i:i + 2] for i in range(0, len(limit), 2)]
        self.i = Endpoint([("addr", bits_for(len(limit) + 2)), ("data", 16)])
        self.ce = Signal()
        ###
-        n = Signal(1 << len(self.i.tap))
+        div = Signal(16)
-        tap = Signal.like(self.i.tap)
+        n = Signal.like(div)
-        clk = Signal()
+
-        clk0 = Signal()
+        reg = Array([Cat(self.clr, self.iq_en), Cat(div, n)] + self.limit)
        self.comb += [
            self.i.ack.eq(1),
-            clk.eq(Array(n)[tap]),
+            self.ce.eq(n == 0),
        ]
        self.sync += [
-            clk0.eq(clk),
+            n.eq(n - 1),
-            self.ce.eq(0),
+            If(self.ce,
-            If(clk0 ^ clk,
+                n.eq(div),
                self.ce.eq(1),
            ),
            n.eq(n + 1),
            If(self.i.stb,
-                n.eq(0),
+                reg[self.i.addr].eq(self.i.data),
                self.cfg.eq(self.i.payload),
            ),
        ]
-class Channel(Module):
+class Channel(Module, SatAddMixin):
-    def __init__(self, width=16, t_width=None, u_order=4, **kwargs):
+    def __init__(self, width=16, parallelism=4, widths=None, orders=None):
-        if t_width is None:
+        if orders is None:
-            t_width = width
+            orders = _Orders(a=4, f=2, p=1)
-        du = Spline(width=width + (u_order - 1)*t_width, order=u_order)
+        if widths is None:
-        da = DDS(width, t_width, **kwargs)
+            widths = _Widths(t=width, a=orders.a*width, p=orders.p*width,
                             f=3*width + (orders.f - 1)*width)
        cfg = Config()
-        self.submodules += du, da, cfg
+        a1 = SplineParallelDDS(widths, orders)
-        self.i = [cfg.i, du.tri(t_width)] + da.i
+        a2 = SplineParallelDDS(widths, orders)
-        self.i_names = "cfg u".split() + da.i_names
+        b = SplineParallelDUC(widths, orders, parallelism=parallelism,
-        self.q_i = [Signal((width, True)) for i in range(da.parallelism)]
+                              a_delay=-a1.latency)
-        self.q_o = [ai[1] for ai in da.o]
+        u = Spline(width=widths.a, order=orders.a)
-        self.o = [Signal((width, True)) for i in range(da.parallelism)]
+        du = Delay(widths.a, a1.latency + b.latency - u.latency)
-        self.width = width
+        self.submodules += cfg, a1, a2, b, u, du
-        self.parallelism = da.parallelism
+        self.cfg = cfg.i
-        self.latency = da.latency + 1
+        self.u = u.tri(widths.t)
-        self.cordic_gain = da.gain
+        self.i = [self.cfg, self.u, a1.a, a1.f, a1.p, a2.a, a2.f, a2.p, b.f, b.p]
        self.y_in = [Signal((width, True)) for i in range(b.parallelism)]
        self.y_out = b.yo
        self.o = [Signal((width, True)) for i in range(b.parallelism)]
        self.widths = widths
        self.orders = orders
        self.parallelism = parallelism
        self.latency = a1.latency + b.latency + 1
        self.cordic_gain = a1.gain*b.gain
        ###
        # delay du to match da
        ddu = Delay((width, True), da.latency - du.latency)
        self.submodules += ddu
        self.comb += [
-            ddu.i.eq(du.o.a0[-width:]),
+            a1.ce.eq(cfg.ce),
-            da.clr.eq(cfg.cfg.clr),
+            a2.ce.eq(cfg.ce),
-            da.ce.eq(cfg.ce),
+            b.ce.eq(cfg.ce),
-            du.o.ack.eq(cfg.ce),
+            u.o.ack.eq(cfg.ce),
            Cat(a1.clr, a2.clr, b.clr).eq(cfg.clr),
            b.i.x.eq(self.sat_add([a1.xo[0], a2.xo[0]])),
            b.i.y.eq(self.sat_add([a1.yo[0], a2.yo[0]])),
            eqh(du.i, u.o.a0),
        ]
        # wire up outputs and q_{i,o} exchange
-        for oi, ai, qi in zip(self.o, da.o, self.q_i):
+        for o, x, y in zip(self.o, b.xo, self.y_in):
            self.sync += [
-                oi.eq(self.sat_add([
+                o.eq(self.sat_add([du.o,
-                    ddu.o +
+                    Mux(cfg.iq_en[0], x, 0),
-                    # du.o.a0[-width:],
+                    Mux(cfg.iq_en[1], y, 0)])),
                    Mux(cfg.cfg.iq[0], ai[0], 0),
                    Mux(cfg.cfg.iq[1], qi, 0)])),
            ]
-    def connect_q(self, buddy):
+    def connect_q_from(self, buddy):
-        for i, qi in enumerate(self.q_i):
+        self.comb += Cat(self.y_in).eq(Cat(buddy.y_out))
            self.comb += qi.eq(buddy.q_o[i])