From 51f23feeacc25586616f277967b5935ddb2ea40e Mon Sep 17 00:00:00 2001 From: Robert Jordens Date: Thu, 17 Nov 2016 02:36:49 +0100 Subject: [PATCH] dsp: implement sawg features --- artiq/gateware/dsp/sawg.py | 420 +++++++++++-------------------------- 1 file changed, 121 insertions(+), 299 deletions(-) diff --git a/artiq/gateware/dsp/sawg.py b/artiq/gateware/dsp/sawg.py index 75ac80b39..14c16d41f 100644 --- a/artiq/gateware/dsp/sawg.py +++ b/artiq/gateware/dsp/sawg.py @@ -1,354 +1,176 @@ +from collections import namedtuple + from migen import * from misoc.interconnect.stream import Endpoint from misoc.cores.cordic import Cordic from .accu import PhasedAccu, Accu -from .tools import eqh, Delay +from .tools import eqh, Delay, SatAddMixin from .spline import Spline -class DDSFast(Module): - def __init__(self, width, parallelism=4): - a_width = width - p_width = width - f_width = 2*width +_Widths = namedtuple("_Widths", "t a p f") +_Orders = namedtuple("_Orders", "a p f") - self.o = [Signal((width, True)) for i in range(parallelism)] - self.width = width +class ParallelDDS(Module): + def __init__(self, widths, parallelism=1, a_delay=0): + self.i = Endpoint([("x", widths.a), ("y", widths.a), + ("f", widths.f), ("p", widths.f), ("clr", 1)]) self.parallelism = parallelism - self.latency = 1 # will be accumulated - - q = PhasedAccu(f_width, parallelism) - self.submodules += q - self.latency += q.latency - - self.a = Endpoint([("a", a_width)]) - self.f = Endpoint([("f", f_width)]) - self.p = Endpoint([("p", p_width)]) - self.i = [self.a, self.f, self.p] + self.widths = widths ### - a = Signal.like(self.a.a) - self.sync += [ - If(self.a.stb, - a.eq(self.a.a) - ), - If(self.f.stb, - eqh(q.i.f, self.f.f) - ), - q.i.clr.eq(0), - If(self.p.stb, - eqh(q.i.p, self.p.p), - q.i.clr.eq(1) - ), - q.i.stb.eq(self.f.stb | self.p.stb), - ] + accu = PhasedAccu(widths.f, parallelism) + cordic = [Cordic(width=widths.a, widthz=widths.p, guard=None, + eval_mode="pipelined") for i in range(parallelism)] + self.xo = [c.xo for c in cordic] + self.yo = [c.yo for c in cordic] + a_delay += accu.latency + xy_delay = Delay(2*widths.a, max(0, a_delay)) + z_delay = Delay(parallelism*widths.p, max(0, -a_delay)) + self.submodules += accu, xy_delay, z_delay, cordic + self.latency = max(0, a_delay) + cordic[0].latency + self.gain = cordic[0].gain + self.comb += [ - self.a.ack.eq(1), - self.f.ack.eq(1), - self.p.ack.eq(1), - q.o.ack.eq(1), + xy_delay.i.eq(Cat(self.i.x, self.i.y)), + z_delay.i.eq(Cat([zi[-widths.p:] + for zi in accu.o.payload.flatten()])), + eqh(accu.i.p, self.i.p), + accu.i.f.eq(self.i.f), + accu.i.clr.eq(self.i.clr), + accu.i.stb.eq(self.i.stb), + self.i.ack.eq(accu.i.ack), + accu.o.ack.eq(1), + [Cat(c.xi, c.yi).eq(xy_delay.o) for c in cordic], + Cat([c.zi for c in cordic]).eq(z_delay.o), ] - c = [] - for i in range(parallelism): - ci = Cordic(width=width, widthz=p_width, - guard=None, eval_mode="pipelined") - self.submodules += ci - c.append(ci) - qoi = getattr(q.o, "z{}".format(i)) - self.comb += [ - eqh(ci.xi, a), - ci.yi.eq(0), - eqh(ci.zi, qoi), - eqh(self.o[i], ci.xo), - ] - self.latency += c[0].latency - self.gain = c[0].gain - -class DDSFast(Module): - def __init__(self, width, t_width=None, - a_width=None, p_width=None, f_width=None, - a_order=4, p_order=1, f_order=2, parallelism=8): - if t_width is None: - t_width = width - if a_width is None: - a_width = width + (a_order - 1)*t_width - if p_width is None: - p_width = width + (p_order - 1)*t_width - if f_width is None: - f_width = width + (f_order + 1)*t_width - a = Spline(order=a_order, width=a_width) - p = Spline(order=p_order, width=p_width) - f = Spline(order=f_order, width=f_width) - self.submodules += a, p, f - - self.a = a.tri(t_width) - self.f = f.tri(t_width) - self.p = p.tri(t_width) - self.i = [self.a, self.f, self.p] - self.o = [[Signal((width, True)) for i in range(2)] - for i in range(parallelism)] - self.parallelism = parallelism - self.latency = 0 # will be accumulated +class SplineParallelDUC(ParallelDDS): + def __init__(self, widths, orders, **kwargs): + p = Spline(order=orders.p, width=widths.p) + f = Spline(order=orders.f, width=widths.f) + self.f = f.tri(widths.t) + self.p = p.tri(widths.t) + self.submodules += p, f + self.ce = Signal(reset=1) + self.clr = Signal() + super().__init__(widths._replace(p=len(self.f.a0), f=len(self.f.a0)), + **kwargs) + self.latency += f.latency ### - self.latency += p.latency - q = PhasedAccu(f_width, parallelism) - self.submodules += q - self.latency += q.latency - da = [Signal((width, True)) for i in range(q.latency)] + assert p.latency == f.latency - self.sync += [ - If(q.i.stb & q.i.ack, - eqh(da[0], a.o.a0), - [da[i + 1].eq(da[i]) for i in range(len(da) - 1)], - ), - If(p.o.stb & p.o.ack, - q.i.clr.eq(0), - ), - If(p.i.stb & p.i.ack, - q.i.clr.eq(self.clr), - ), + self.comb += [ + p.o.ack.eq(self.ce), + f.o.ack.eq(self.ce), + eqh(self.i.f, f.o.a0), + eqh(self.i.p, p.o.a0), + self.i.clr.eq(self.clr), + self.i.stb.eq(p.o.stb & f.o.stb), ] + + +class SplineParallelDDS(SplineParallelDUC): + def __init__(self, widths, orders, **kwargs): + a = Spline(order=orders.a, width=widths.a) + self.a = a.tri(widths.t) + self.submodules += a + super().__init__(widths._replace(a=len(self.a.a0)), + orders, **kwargs) + + ### + self.comb += [ a.o.ack.eq(self.ce), - p.o.ack.eq(self.ce), - f.o.ack.eq(self.ce), - q.i.stb.eq(self.ce), - eqh(q.i.p, p.o.a0), - q.i.f.eq(f.o.a0), - q.o.ack.eq(1), + eqh(self.i.x, a.o.a0), + self.i.y.eq(0), ] - c = [] - for i in range(parallelism): - ci = Cordic(width=width, widthz=p_width, - guard=None, eval_mode="pipelined") - self.submodules += ci - c.append(ci) - qoi = getattr(q.o, "z{}".format(i)) - self.comb += [ - ci.xi.eq(da[-1]), - ci.yi.eq(0), - eqh(ci.zi, qoi), - eqh(self.o[i][0], ci.xo), - eqh(self.o[i][1], ci.yo), - ] - self.latency += c[0].latency - self.gain = c[0].gain - - -class DDSSlow(Module): - def __init__(self, width, t_width, a_width, p_width, f_width, - a_order=4, p_order=1, f_order=2): - a = Spline(order=a_order, width=a_width) - p = Spline(order=p_order, width=p_width) - f = Spline(order=f_order, width=f_width) - self.submodules += a, p, f - - self.a = a.tri(t_width) - self.f = f.tri(t_width) - self.p = p.tri(t_width) - self.i = [self.a, self.f, self.p] - self.i_names = "a f p".split() - self.o = [Signal((width, True)) for i in range(2)] - self.ce = Signal() - self.clr = Signal() - self.latency = 0 # will be accumulated - - ### - - self.latency += p.latency - q = Accu(f_width) - self.latency += q.latency - da = CEInserter()(Delay)(width, q.latency) - c = Cordic(width=width, widthz=p_width, - guard=None, eval_mode="pipelined") - self.latency += c.latency - self.gain = c.gain - self.submodules += q, da, c - - self.sync += [ - If(p.o.stb & p.o.ack, - q.i.clr.eq(0), - ), - If(p.i.stb & p.i.ack, - q.i.clr.eq(self.clr), - ), - ] - self.comb += [ - da.ce.eq(q.i.stb & q.i.ack), - a.o.ack.eq(self.ce), - p.o.ack.eq(self.ce), - f.o.ack.eq(self.ce), - q.i.stb.eq(self.ce), - eqh(da.i, a.o.a0), - eqh(q.i.p, p.o.a0), - q.i.f.eq(f.o.a0), - q.o.ack.eq(1), - c.xi.eq(da.o), - c.yi.eq(0), - eqh(c.zi, q.o.z), - eqh(self.o[0], c.xo), - eqh(self.o[1], c.yo), - ] - - -class DDS(Module): - def __init__(self, width, t_width=None, - a_width=None, p_width=None, f_width=None, - a_order=4, p_order=1, f_order=2, parallelism=8): - if t_width is None: - t_width = width - if a_width is None: - a_width = width + (a_order - 1)*t_width - if p_width is None: - p_width = width + (p_order - 1)*t_width - if f_width is None: - f_width = width + (f_order + 1)*t_width - self.b = [DDSSlow(width, t_width, a_width, p_width, f_width, a_order, - p_order, f_order) for i in range(2)] - p = Spline(order=1, width=p_width) - f = Spline(order=1, width=f_width) - self.submodules += self.b, p, f - - self.f0 = f.tri(t_width) - self.p0 = p.tri(t_width) - self.i = [self.f0, self.p0] - self.i_names = "f0 p0".split() - for i, bi in enumerate(self.b): - self.i += bi.i - for ii in bi.i_names: - self.i_names.append("{}{}".format(ii, i + 1)) - for j in "afp": - setattr(self, "{}{}".format(j, i + 1), getattr(bi, j)) - self.o = [[Signal((width, True)) for i in range(2)] - for i in range(parallelism)] - self.ce = Signal() - self.clr = Signal() - self.parallelism = parallelism - self.latency = 0 # will be accumulated - - ### - - self.latency += self.b[0].latency # TODO: f0/p0, q.latency delta - q = PhasedAccu(f_width, parallelism) - self.submodules += q - - self.sync += [ - If(p.o.stb & p.o.ack, - q.i.clr.eq(0), - ), - If(p.i.stb & p.i.ack, - q.i.clr.eq(self.clr), - ), - ] - self.comb += [ - [bi.ce.eq(self.ce) for bi in self.b], - [bi.clr.eq(self.clr) for bi in self.b], - p.o.ack.eq(self.ce), - f.o.ack.eq(self.ce), - q.i.stb.eq(self.ce), - eqh(q.i.p, p.o.a0), - eqh(q.i.f, f.o.a0), - q.o.ack.eq(1), - ] - x = self.sat_add(bi.o[0] for bi in self.b) - y = self.sat_add(bi.o[1] for bi in self.b) - - c = [] - for i in range(parallelism): - ci = Cordic(width=width, widthz=p_width, - guard=None, eval_mode="pipelined") - self.submodules += ci - c.append(ci) - qoi = getattr(q.o, "z{}".format(i)) - self.comb += [ - ci.xi.eq(x), - ci.yi.eq(y), - eqh(ci.zi, qoi), - eqh(self.o[i][0], ci.xo), - eqh(self.o[i][1], ci.yo), - ] - self.latency += c[0].latency - self.gain = self.b[0].gain * c[0].gain - class Config(Module): def __init__(self): - self.cfg = Record([("tap", 5), ("clr", 1), ("iq", 2)]) - self.i = Endpoint(self.cfg.layout) + self.clr = Signal(4) + self.iq_en = Signal(2) + limit = [Signal((16, True)) for i in range(2*2)] + self.limit = [limit[i:i + 2] for i in range(0, len(limit), 2)] + self.i = Endpoint([("addr", bits_for(len(limit) + 2)), ("data", 16)]) self.ce = Signal() ### - n = Signal(1 << len(self.i.tap)) - tap = Signal.like(self.i.tap) - clk = Signal() - clk0 = Signal() + div = Signal(16) + n = Signal.like(div) + + reg = Array([Cat(self.clr, self.iq_en), Cat(div, n)] + self.limit) self.comb += [ self.i.ack.eq(1), - clk.eq(Array(n)[tap]), + self.ce.eq(n == 0), ] self.sync += [ - clk0.eq(clk), - self.ce.eq(0), - If(clk0 ^ clk, - self.ce.eq(1), + n.eq(n - 1), + If(self.ce, + n.eq(div), ), - n.eq(n + 1), If(self.i.stb, - n.eq(0), - self.cfg.eq(self.i.payload), + reg[self.i.addr].eq(self.i.data), ), ] -class Channel(Module): - def __init__(self, width=16, t_width=None, u_order=4, **kwargs): - if t_width is None: - t_width = width - du = Spline(width=width + (u_order - 1)*t_width, order=u_order) - da = DDS(width, t_width, **kwargs) +class Channel(Module, SatAddMixin): + def __init__(self, width=16, parallelism=4, widths=None, orders=None): + if orders is None: + orders = _Orders(a=4, f=2, p=1) + if widths is None: + widths = _Widths(t=width, a=orders.a*width, p=orders.p*width, + f=3*width + (orders.f - 1)*width) + cfg = Config() - self.submodules += du, da, cfg - self.i = [cfg.i, du.tri(t_width)] + da.i - self.i_names = "cfg u".split() + da.i_names - self.q_i = [Signal((width, True)) for i in range(da.parallelism)] - self.q_o = [ai[1] for ai in da.o] - self.o = [Signal((width, True)) for i in range(da.parallelism)] - self.width = width - self.parallelism = da.parallelism - self.latency = da.latency + 1 - self.cordic_gain = da.gain + a1 = SplineParallelDDS(widths, orders) + a2 = SplineParallelDDS(widths, orders) + b = SplineParallelDUC(widths, orders, parallelism=parallelism, + a_delay=-a1.latency) + u = Spline(width=widths.a, order=orders.a) + du = Delay(widths.a, a1.latency + b.latency - u.latency) + self.submodules += cfg, a1, a2, b, u, du + self.cfg = cfg.i + self.u = u.tri(widths.t) + self.i = [self.cfg, self.u, a1.a, a1.f, a1.p, a2.a, a2.f, a2.p, b.f, b.p] + self.y_in = [Signal((width, True)) for i in range(b.parallelism)] + self.y_out = b.yo + self.o = [Signal((width, True)) for i in range(b.parallelism)] + self.widths = widths + self.orders = orders + self.parallelism = parallelism + self.latency = a1.latency + b.latency + 1 + self.cordic_gain = a1.gain*b.gain ### - # delay du to match da - ddu = Delay((width, True), da.latency - du.latency) - self.submodules += ddu self.comb += [ - ddu.i.eq(du.o.a0[-width:]), - da.clr.eq(cfg.cfg.clr), - da.ce.eq(cfg.ce), - du.o.ack.eq(cfg.ce), + a1.ce.eq(cfg.ce), + a2.ce.eq(cfg.ce), + b.ce.eq(cfg.ce), + u.o.ack.eq(cfg.ce), + Cat(a1.clr, a2.clr, b.clr).eq(cfg.clr), + b.i.x.eq(self.sat_add([a1.xo[0], a2.xo[0]])), + b.i.y.eq(self.sat_add([a1.yo[0], a2.yo[0]])), + eqh(du.i, u.o.a0), ] # wire up outputs and q_{i,o} exchange - for oi, ai, qi in zip(self.o, da.o, self.q_i): + for o, x, y in zip(self.o, b.xo, self.y_in): self.sync += [ - oi.eq(self.sat_add([ - ddu.o + - # du.o.a0[-width:], - Mux(cfg.cfg.iq[0], ai[0], 0), - Mux(cfg.cfg.iq[1], qi, 0)])), + o.eq(self.sat_add([du.o, + Mux(cfg.iq_en[0], x, 0), + Mux(cfg.iq_en[1], y, 0)])), ] - def connect_q(self, buddy): - for i, qi in enumerate(self.q_i): - self.comb += qi.eq(buddy.q_o[i]) + def connect_q_from(self, buddy): + self.comb += Cat(self.y_in).eq(Cat(buddy.y_out))