dsp: implement sawg features

2024-12-19 00:16:29 +08:00 · 2016-11-17 02:36:49 +01:00 · 2016-11-17 02:36:49 +01:00 · 51f23feeac
commit 51f23feeac
parent 98193d6fa1
1 changed files with 121 additions and 299 deletions
--- a/artiq/gateware/dsp/sawg.py
+++ b/artiq/gateware/dsp/sawg.py
@ -1,354 +1,176 @@
+from collections import namedtuple
+
 from migen import *
 from misoc.interconnect.stream import Endpoint
 from misoc.cores.cordic import Cordic

 from .accu import PhasedAccu, Accu
-from .tools import eqh, Delay
+from .tools import eqh, Delay, SatAddMixin
 from .spline import Spline


-class DDSFast(Module):
-    def __init__(self, width, parallelism=4):
-        a_width = width
-        p_width = width
-        f_width = 2*width
+_Widths = namedtuple("_Widths", "t a p f")
+_Orders = namedtuple("_Orders", "a p f")

-        self.o = [Signal((width, True)) for i in range(parallelism)]

-        self.width = width
+class ParallelDDS(Module):
+    def __init__(self, widths, parallelism=1, a_delay=0):
+        self.i = Endpoint([("x", widths.a), ("y", widths.a),
+                           ("f", widths.f), ("p", widths.f), ("clr", 1)])
        self.parallelism = parallelism
-        self.latency = 1  # will be accumulated
-
-        q = PhasedAccu(f_width, parallelism)
-        self.submodules += q
-        self.latency += q.latency
-
-        self.a = Endpoint([("a", a_width)])
-        self.f = Endpoint([("f", f_width)])
-        self.p = Endpoint([("p", p_width)])
-        self.i = [self.a, self.f, self.p]
+        self.widths = widths

        ###

-        a = Signal.like(self.a.a)
-        self.sync += [
-            If(self.a.stb,
-                a.eq(self.a.a)
-            ),
-            If(self.f.stb,
-                eqh(q.i.f, self.f.f)
-            ),
-            q.i.clr.eq(0),
-            If(self.p.stb,
-                eqh(q.i.p, self.p.p),
-                q.i.clr.eq(1)
-            ),
-            q.i.stb.eq(self.f.stb | self.p.stb),
-        ]
+        accu = PhasedAccu(widths.f, parallelism)
+        cordic = [Cordic(width=widths.a, widthz=widths.p, guard=None,
+                         eval_mode="pipelined") for i in range(parallelism)]
+        self.xo = [c.xo for c in cordic]
+        self.yo = [c.yo for c in cordic]
+        a_delay += accu.latency
+        xy_delay = Delay(2*widths.a, max(0, a_delay))
+        z_delay = Delay(parallelism*widths.p, max(0, -a_delay))
+        self.submodules += accu, xy_delay, z_delay, cordic
+        self.latency = max(0, a_delay) + cordic[0].latency
+        self.gain = cordic[0].gain
+
        self.comb += [
-            self.a.ack.eq(1),
-            self.f.ack.eq(1),
-            self.p.ack.eq(1),
-            q.o.ack.eq(1),
+            xy_delay.i.eq(Cat(self.i.x, self.i.y)),
+            z_delay.i.eq(Cat([zi[-widths.p:]
+                              for zi in accu.o.payload.flatten()])),
+            eqh(accu.i.p, self.i.p),
+            accu.i.f.eq(self.i.f),
+            accu.i.clr.eq(self.i.clr),
+            accu.i.stb.eq(self.i.stb),
+            self.i.ack.eq(accu.i.ack),
+            accu.o.ack.eq(1),
+            [Cat(c.xi, c.yi).eq(xy_delay.o) for c in cordic],
+            Cat([c.zi for c in cordic]).eq(z_delay.o),
        ]

-        c = []
-        for i in range(parallelism):
-            ci = Cordic(width=width, widthz=p_width,
-                        guard=None, eval_mode="pipelined")
-            self.submodules += ci
-            c.append(ci)
-            qoi = getattr(q.o, "z{}".format(i))
-            self.comb += [
-                eqh(ci.xi, a),
-                ci.yi.eq(0),
-                eqh(ci.zi, qoi),
-                eqh(self.o[i], ci.xo),
-            ]
-        self.latency += c[0].latency
-        self.gain = c[0].gain

-
-class DDSFast(Module):
-    def __init__(self, width, t_width=None,
-                 a_width=None, p_width=None, f_width=None,
-                 a_order=4, p_order=1, f_order=2, parallelism=8):
-        if t_width is None:
-            t_width = width
-        if a_width is None:
-            a_width = width + (a_order - 1)*t_width
-        if p_width is None:
-            p_width = width + (p_order - 1)*t_width
-        if f_width is None:
-            f_width = width + (f_order + 1)*t_width
-        a = Spline(order=a_order, width=a_width)
-        p = Spline(order=p_order, width=p_width)
-        f = Spline(order=f_order, width=f_width)
-        self.submodules += a, p, f
-
-        self.a = a.tri(t_width)
-        self.f = f.tri(t_width)
-        self.p = p.tri(t_width)
-        self.i = [self.a, self.f, self.p]
-        self.o = [[Signal((width, True)) for i in range(2)]
-                  for i in range(parallelism)]
-        self.parallelism = parallelism
-        self.latency = 0  # will be accumulated
+class SplineParallelDUC(ParallelDDS):
+    def __init__(self, widths, orders, **kwargs):
+        p = Spline(order=orders.p, width=widths.p)
+        f = Spline(order=orders.f, width=widths.f)
+        self.f = f.tri(widths.t)
+        self.p = p.tri(widths.t)
+        self.submodules += p, f
+        self.ce = Signal(reset=1)
+        self.clr = Signal()
+        super().__init__(widths._replace(p=len(self.f.a0), f=len(self.f.a0)),
+                         **kwargs)
+        self.latency += f.latency

        ###

-        self.latency += p.latency
-        q = PhasedAccu(f_width, parallelism)
-        self.submodules += q
-        self.latency += q.latency
-        da = [Signal((width, True)) for i in range(q.latency)]
+        assert p.latency == f.latency

-        self.sync += [
-            If(q.i.stb & q.i.ack,
-                eqh(da[0], a.o.a0),
-                [da[i + 1].eq(da[i]) for i in range(len(da) - 1)],
-            ),
-            If(p.o.stb & p.o.ack,
-                q.i.clr.eq(0),
-            ),
-            If(p.i.stb & p.i.ack,
-                q.i.clr.eq(self.clr),
-            ),
+        self.comb += [
+            p.o.ack.eq(self.ce),
+            f.o.ack.eq(self.ce),
+            eqh(self.i.f, f.o.a0),
+            eqh(self.i.p, p.o.a0),
+            self.i.clr.eq(self.clr),
+            self.i.stb.eq(p.o.stb & f.o.stb),
        ]
+
+
+class SplineParallelDDS(SplineParallelDUC):
+    def __init__(self, widths, orders, **kwargs):
+        a = Spline(order=orders.a, width=widths.a)
+        self.a = a.tri(widths.t)
+        self.submodules += a
+        super().__init__(widths._replace(a=len(self.a.a0)),
+                         orders, **kwargs)
+
+        ###
+
        self.comb += [
            a.o.ack.eq(self.ce),
-            p.o.ack.eq(self.ce),
-            f.o.ack.eq(self.ce),
-            q.i.stb.eq(self.ce),
-            eqh(q.i.p, p.o.a0),
-            q.i.f.eq(f.o.a0),
-            q.o.ack.eq(1),
+            eqh(self.i.x, a.o.a0),
+            self.i.y.eq(0),
        ]

-        c = []
-        for i in range(parallelism):
-            ci = Cordic(width=width, widthz=p_width,
-                        guard=None, eval_mode="pipelined")
-            self.submodules += ci
-            c.append(ci)
-            qoi = getattr(q.o, "z{}".format(i))
-            self.comb += [
-                ci.xi.eq(da[-1]),
-                ci.yi.eq(0),
-                eqh(ci.zi, qoi),
-                eqh(self.o[i][0], ci.xo),
-                eqh(self.o[i][1], ci.yo),
-            ]
-        self.latency += c[0].latency
-        self.gain = c[0].gain
-
-
-class DDSSlow(Module):
-    def __init__(self, width, t_width, a_width, p_width, f_width,
-                 a_order=4, p_order=1, f_order=2):
-        a = Spline(order=a_order, width=a_width)
-        p = Spline(order=p_order, width=p_width)
-        f = Spline(order=f_order, width=f_width)
-        self.submodules += a, p, f
-
-        self.a = a.tri(t_width)
-        self.f = f.tri(t_width)
-        self.p = p.tri(t_width)
-        self.i = [self.a, self.f, self.p]
-        self.i_names = "a f p".split()
-        self.o = [Signal((width, True)) for i in range(2)]
-        self.ce = Signal()
-        self.clr = Signal()
-        self.latency = 0  # will be accumulated
-
-        ###
-
-        self.latency += p.latency
-        q = Accu(f_width)
-        self.latency += q.latency
-        da = CEInserter()(Delay)(width, q.latency)
-        c = Cordic(width=width, widthz=p_width,
-                   guard=None, eval_mode="pipelined")
-        self.latency += c.latency
-        self.gain = c.gain
-        self.submodules += q, da, c
-
-        self.sync += [
-            If(p.o.stb & p.o.ack,
-                q.i.clr.eq(0),
-            ),
-            If(p.i.stb & p.i.ack,
-                q.i.clr.eq(self.clr),
-            ),
-        ]
-        self.comb += [
-            da.ce.eq(q.i.stb & q.i.ack),
-            a.o.ack.eq(self.ce),
-            p.o.ack.eq(self.ce),
-            f.o.ack.eq(self.ce),
-            q.i.stb.eq(self.ce),
-            eqh(da.i, a.o.a0),
-            eqh(q.i.p, p.o.a0),
-            q.i.f.eq(f.o.a0),
-            q.o.ack.eq(1),
-            c.xi.eq(da.o),
-            c.yi.eq(0),
-            eqh(c.zi, q.o.z),
-            eqh(self.o[0], c.xo),
-            eqh(self.o[1], c.yo),
-        ]
-
-
-class DDS(Module):
-    def __init__(self, width, t_width=None,
-                 a_width=None, p_width=None, f_width=None,
-                 a_order=4, p_order=1, f_order=2, parallelism=8):
-        if t_width is None:
-            t_width = width
-        if a_width is None:
-            a_width = width + (a_order - 1)*t_width
-        if p_width is None:
-            p_width = width + (p_order - 1)*t_width
-        if f_width is None:
-            f_width = width + (f_order + 1)*t_width
-        self.b = [DDSSlow(width, t_width, a_width, p_width, f_width, a_order,
-                          p_order, f_order) for i in range(2)]
-        p = Spline(order=1, width=p_width)
-        f = Spline(order=1, width=f_width)
-        self.submodules += self.b, p, f
-
-        self.f0 = f.tri(t_width)
-        self.p0 = p.tri(t_width)
-        self.i = [self.f0, self.p0]
-        self.i_names = "f0 p0".split()
-        for i, bi in enumerate(self.b):
-            self.i += bi.i
-            for ii in bi.i_names:
-                self.i_names.append("{}{}".format(ii, i + 1))
-            for j in "afp":
-                setattr(self, "{}{}".format(j, i + 1), getattr(bi, j))
-        self.o = [[Signal((width, True)) for i in range(2)]
-                  for i in range(parallelism)]
-        self.ce = Signal()
-        self.clr = Signal()
-        self.parallelism = parallelism
-        self.latency = 0  # will be accumulated
-
-        ###
-
-        self.latency += self.b[0].latency  # TODO: f0/p0, q.latency delta
-        q = PhasedAccu(f_width, parallelism)
-        self.submodules += q
-
-        self.sync += [
-            If(p.o.stb & p.o.ack,
-                q.i.clr.eq(0),
-            ),
-            If(p.i.stb & p.i.ack,
-                q.i.clr.eq(self.clr),
-            ),
-        ]
-        self.comb += [
-            [bi.ce.eq(self.ce) for bi in self.b],
-            [bi.clr.eq(self.clr) for bi in self.b],
-            p.o.ack.eq(self.ce),
-            f.o.ack.eq(self.ce),
-            q.i.stb.eq(self.ce),
-            eqh(q.i.p, p.o.a0),
-            eqh(q.i.f, f.o.a0),
-            q.o.ack.eq(1),
-        ]
-        x = self.sat_add(bi.o[0] for bi in self.b)
-        y = self.sat_add(bi.o[1] for bi in self.b)
-
-        c = []
-        for i in range(parallelism):
-            ci = Cordic(width=width, widthz=p_width,
-                        guard=None, eval_mode="pipelined")
-            self.submodules += ci
-            c.append(ci)
-            qoi = getattr(q.o, "z{}".format(i))
-            self.comb += [
-                ci.xi.eq(x),
-                ci.yi.eq(y),
-                eqh(ci.zi, qoi),
-                eqh(self.o[i][0], ci.xo),
-                eqh(self.o[i][1], ci.yo),
-            ]
-        self.latency += c[0].latency
-        self.gain = self.b[0].gain * c[0].gain
-

 class Config(Module):
    def __init__(self):
-        self.cfg = Record([("tap", 5), ("clr", 1), ("iq", 2)])
-        self.i = Endpoint(self.cfg.layout)
+        self.clr = Signal(4)
+        self.iq_en = Signal(2)
+        limit = [Signal((16, True)) for i in range(2*2)]
+        self.limit = [limit[i:i + 2] for i in range(0, len(limit), 2)]
+        self.i = Endpoint([("addr", bits_for(len(limit) + 2)), ("data", 16)])
        self.ce = Signal()

        ###

-        n = Signal(1 << len(self.i.tap))
-        tap = Signal.like(self.i.tap)
-        clk = Signal()
-        clk0 = Signal()
+        div = Signal(16)
+        n = Signal.like(div)
+
+        reg = Array([Cat(self.clr, self.iq_en), Cat(div, n)] + self.limit)

        self.comb += [
            self.i.ack.eq(1),
-            clk.eq(Array(n)[tap]),
+            self.ce.eq(n == 0),
        ]
        self.sync += [
-            clk0.eq(clk),
-            self.ce.eq(0),
-            If(clk0 ^ clk,
-                self.ce.eq(1),
+            n.eq(n - 1),
+            If(self.ce,
+                n.eq(div),
            ),
-            n.eq(n + 1),
            If(self.i.stb,
-                n.eq(0),
-                self.cfg.eq(self.i.payload),
+                reg[self.i.addr].eq(self.i.data),
            ),
        ]


-class Channel(Module):
-    def __init__(self, width=16, t_width=None, u_order=4, **kwargs):
-        if t_width is None:
-            t_width = width
-        du = Spline(width=width + (u_order - 1)*t_width, order=u_order)
-        da = DDS(width, t_width, **kwargs)
+class Channel(Module, SatAddMixin):
+    def __init__(self, width=16, parallelism=4, widths=None, orders=None):
+        if orders is None:
+            orders = _Orders(a=4, f=2, p=1)
+        if widths is None:
+            widths = _Widths(t=width, a=orders.a*width, p=orders.p*width,
+                             f=3*width + (orders.f - 1)*width)
+
        cfg = Config()
-        self.submodules += du, da, cfg
-        self.i = [cfg.i, du.tri(t_width)] + da.i
-        self.i_names = "cfg u".split() + da.i_names
-        self.q_i = [Signal((width, True)) for i in range(da.parallelism)]
-        self.q_o = [ai[1] for ai in da.o]
-        self.o = [Signal((width, True)) for i in range(da.parallelism)]
-        self.width = width
-        self.parallelism = da.parallelism
-        self.latency = da.latency + 1
-        self.cordic_gain = da.gain
+        a1 = SplineParallelDDS(widths, orders)
+        a2 = SplineParallelDDS(widths, orders)
+        b = SplineParallelDUC(widths, orders, parallelism=parallelism,
+                              a_delay=-a1.latency)
+        u = Spline(width=widths.a, order=orders.a)
+        du = Delay(widths.a, a1.latency + b.latency - u.latency)
+        self.submodules += cfg, a1, a2, b, u, du
+        self.cfg = cfg.i
+        self.u = u.tri(widths.t)
+        self.i = [self.cfg, self.u, a1.a, a1.f, a1.p, a2.a, a2.f, a2.p, b.f, b.p]
+        self.y_in = [Signal((width, True)) for i in range(b.parallelism)]
+        self.y_out = b.yo
+        self.o = [Signal((width, True)) for i in range(b.parallelism)]
+        self.widths = widths
+        self.orders = orders
+        self.parallelism = parallelism
+        self.latency = a1.latency + b.latency + 1
+        self.cordic_gain = a1.gain*b.gain

        ###

-        # delay du to match da
-        ddu = Delay((width, True), da.latency - du.latency)
-        self.submodules += ddu
        self.comb += [
-            ddu.i.eq(du.o.a0[-width:]),
-            da.clr.eq(cfg.cfg.clr),
-            da.ce.eq(cfg.ce),
-            du.o.ack.eq(cfg.ce),
+            a1.ce.eq(cfg.ce),
+            a2.ce.eq(cfg.ce),
+            b.ce.eq(cfg.ce),
+            u.o.ack.eq(cfg.ce),
+            Cat(a1.clr, a2.clr, b.clr).eq(cfg.clr),
+            b.i.x.eq(self.sat_add([a1.xo[0], a2.xo[0]])),
+            b.i.y.eq(self.sat_add([a1.yo[0], a2.yo[0]])),
+            eqh(du.i, u.o.a0),
        ]
        # wire up outputs and q_{i,o} exchange
-        for oi, ai, qi in zip(self.o, da.o, self.q_i):
+        for o, x, y in zip(self.o, b.xo, self.y_in):
            self.sync += [
-                oi.eq(self.sat_add([
-                    ddu.o +
-                    # du.o.a0[-width:],
-                    Mux(cfg.cfg.iq[0], ai[0], 0),
-                    Mux(cfg.cfg.iq[1], qi, 0)])),
+                o.eq(self.sat_add([du.o,
+                    Mux(cfg.iq_en[0], x, 0),
+                    Mux(cfg.iq_en[1], y, 0)])),
            ]

-    def connect_q(self, buddy):
-        for i, qi in enumerate(self.q_i):
-            self.comb += qi.eq(buddy.q_o[i])
+    def connect_q_from(self, buddy):
+        self.comb += Cat(self.y_in).eq(Cat(buddy.y_out))