dsp: implement sawg features

This commit is contained in:
Robert Jördens 2016-11-17 02:36:49 +01:00
parent 98193d6fa1
commit 51f23feeac
1 changed files with 121 additions and 299 deletions

View File

@ -1,354 +1,176 @@
from collections import namedtuple
from migen import * from migen import *
from misoc.interconnect.stream import Endpoint from misoc.interconnect.stream import Endpoint
from misoc.cores.cordic import Cordic from misoc.cores.cordic import Cordic
from .accu import PhasedAccu, Accu from .accu import PhasedAccu, Accu
from .tools import eqh, Delay from .tools import eqh, Delay, SatAddMixin
from .spline import Spline from .spline import Spline
class DDSFast(Module): _Widths = namedtuple("_Widths", "t a p f")
def __init__(self, width, parallelism=4): _Orders = namedtuple("_Orders", "a p f")
a_width = width
p_width = width
f_width = 2*width
self.o = [Signal((width, True)) for i in range(parallelism)]
self.width = width class ParallelDDS(Module):
def __init__(self, widths, parallelism=1, a_delay=0):
self.i = Endpoint([("x", widths.a), ("y", widths.a),
("f", widths.f), ("p", widths.f), ("clr", 1)])
self.parallelism = parallelism self.parallelism = parallelism
self.latency = 1 # will be accumulated self.widths = widths
q = PhasedAccu(f_width, parallelism)
self.submodules += q
self.latency += q.latency
self.a = Endpoint([("a", a_width)])
self.f = Endpoint([("f", f_width)])
self.p = Endpoint([("p", p_width)])
self.i = [self.a, self.f, self.p]
### ###
a = Signal.like(self.a.a) accu = PhasedAccu(widths.f, parallelism)
self.sync += [ cordic = [Cordic(width=widths.a, widthz=widths.p, guard=None,
If(self.a.stb, eval_mode="pipelined") for i in range(parallelism)]
a.eq(self.a.a) self.xo = [c.xo for c in cordic]
), self.yo = [c.yo for c in cordic]
If(self.f.stb, a_delay += accu.latency
eqh(q.i.f, self.f.f) xy_delay = Delay(2*widths.a, max(0, a_delay))
), z_delay = Delay(parallelism*widths.p, max(0, -a_delay))
q.i.clr.eq(0), self.submodules += accu, xy_delay, z_delay, cordic
If(self.p.stb, self.latency = max(0, a_delay) + cordic[0].latency
eqh(q.i.p, self.p.p), self.gain = cordic[0].gain
q.i.clr.eq(1)
),
q.i.stb.eq(self.f.stb | self.p.stb),
]
self.comb += [ self.comb += [
self.a.ack.eq(1), xy_delay.i.eq(Cat(self.i.x, self.i.y)),
self.f.ack.eq(1), z_delay.i.eq(Cat([zi[-widths.p:]
self.p.ack.eq(1), for zi in accu.o.payload.flatten()])),
q.o.ack.eq(1), eqh(accu.i.p, self.i.p),
accu.i.f.eq(self.i.f),
accu.i.clr.eq(self.i.clr),
accu.i.stb.eq(self.i.stb),
self.i.ack.eq(accu.i.ack),
accu.o.ack.eq(1),
[Cat(c.xi, c.yi).eq(xy_delay.o) for c in cordic],
Cat([c.zi for c in cordic]).eq(z_delay.o),
] ]
c = []
for i in range(parallelism):
ci = Cordic(width=width, widthz=p_width,
guard=None, eval_mode="pipelined")
self.submodules += ci
c.append(ci)
qoi = getattr(q.o, "z{}".format(i))
self.comb += [
eqh(ci.xi, a),
ci.yi.eq(0),
eqh(ci.zi, qoi),
eqh(self.o[i], ci.xo),
]
self.latency += c[0].latency
self.gain = c[0].gain
class SplineParallelDUC(ParallelDDS):
class DDSFast(Module): def __init__(self, widths, orders, **kwargs):
def __init__(self, width, t_width=None, p = Spline(order=orders.p, width=widths.p)
a_width=None, p_width=None, f_width=None, f = Spline(order=orders.f, width=widths.f)
a_order=4, p_order=1, f_order=2, parallelism=8): self.f = f.tri(widths.t)
if t_width is None: self.p = p.tri(widths.t)
t_width = width self.submodules += p, f
if a_width is None: self.ce = Signal(reset=1)
a_width = width + (a_order - 1)*t_width self.clr = Signal()
if p_width is None: super().__init__(widths._replace(p=len(self.f.a0), f=len(self.f.a0)),
p_width = width + (p_order - 1)*t_width **kwargs)
if f_width is None: self.latency += f.latency
f_width = width + (f_order + 1)*t_width
a = Spline(order=a_order, width=a_width)
p = Spline(order=p_order, width=p_width)
f = Spline(order=f_order, width=f_width)
self.submodules += a, p, f
self.a = a.tri(t_width)
self.f = f.tri(t_width)
self.p = p.tri(t_width)
self.i = [self.a, self.f, self.p]
self.o = [[Signal((width, True)) for i in range(2)]
for i in range(parallelism)]
self.parallelism = parallelism
self.latency = 0 # will be accumulated
### ###
self.latency += p.latency assert p.latency == f.latency
q = PhasedAccu(f_width, parallelism)
self.submodules += q
self.latency += q.latency
da = [Signal((width, True)) for i in range(q.latency)]
self.sync += [ self.comb += [
If(q.i.stb & q.i.ack, p.o.ack.eq(self.ce),
eqh(da[0], a.o.a0), f.o.ack.eq(self.ce),
[da[i + 1].eq(da[i]) for i in range(len(da) - 1)], eqh(self.i.f, f.o.a0),
), eqh(self.i.p, p.o.a0),
If(p.o.stb & p.o.ack, self.i.clr.eq(self.clr),
q.i.clr.eq(0), self.i.stb.eq(p.o.stb & f.o.stb),
),
If(p.i.stb & p.i.ack,
q.i.clr.eq(self.clr),
),
] ]
class SplineParallelDDS(SplineParallelDUC):
def __init__(self, widths, orders, **kwargs):
a = Spline(order=orders.a, width=widths.a)
self.a = a.tri(widths.t)
self.submodules += a
super().__init__(widths._replace(a=len(self.a.a0)),
orders, **kwargs)
###
self.comb += [ self.comb += [
a.o.ack.eq(self.ce), a.o.ack.eq(self.ce),
p.o.ack.eq(self.ce), eqh(self.i.x, a.o.a0),
f.o.ack.eq(self.ce), self.i.y.eq(0),
q.i.stb.eq(self.ce),
eqh(q.i.p, p.o.a0),
q.i.f.eq(f.o.a0),
q.o.ack.eq(1),
] ]
c = []
for i in range(parallelism):
ci = Cordic(width=width, widthz=p_width,
guard=None, eval_mode="pipelined")
self.submodules += ci
c.append(ci)
qoi = getattr(q.o, "z{}".format(i))
self.comb += [
ci.xi.eq(da[-1]),
ci.yi.eq(0),
eqh(ci.zi, qoi),
eqh(self.o[i][0], ci.xo),
eqh(self.o[i][1], ci.yo),
]
self.latency += c[0].latency
self.gain = c[0].gain
class DDSSlow(Module):
def __init__(self, width, t_width, a_width, p_width, f_width,
a_order=4, p_order=1, f_order=2):
a = Spline(order=a_order, width=a_width)
p = Spline(order=p_order, width=p_width)
f = Spline(order=f_order, width=f_width)
self.submodules += a, p, f
self.a = a.tri(t_width)
self.f = f.tri(t_width)
self.p = p.tri(t_width)
self.i = [self.a, self.f, self.p]
self.i_names = "a f p".split()
self.o = [Signal((width, True)) for i in range(2)]
self.ce = Signal()
self.clr = Signal()
self.latency = 0 # will be accumulated
###
self.latency += p.latency
q = Accu(f_width)
self.latency += q.latency
da = CEInserter()(Delay)(width, q.latency)
c = Cordic(width=width, widthz=p_width,
guard=None, eval_mode="pipelined")
self.latency += c.latency
self.gain = c.gain
self.submodules += q, da, c
self.sync += [
If(p.o.stb & p.o.ack,
q.i.clr.eq(0),
),
If(p.i.stb & p.i.ack,
q.i.clr.eq(self.clr),
),
]
self.comb += [
da.ce.eq(q.i.stb & q.i.ack),
a.o.ack.eq(self.ce),
p.o.ack.eq(self.ce),
f.o.ack.eq(self.ce),
q.i.stb.eq(self.ce),
eqh(da.i, a.o.a0),
eqh(q.i.p, p.o.a0),
q.i.f.eq(f.o.a0),
q.o.ack.eq(1),
c.xi.eq(da.o),
c.yi.eq(0),
eqh(c.zi, q.o.z),
eqh(self.o[0], c.xo),
eqh(self.o[1], c.yo),
]
class DDS(Module):
def __init__(self, width, t_width=None,
a_width=None, p_width=None, f_width=None,
a_order=4, p_order=1, f_order=2, parallelism=8):
if t_width is None:
t_width = width
if a_width is None:
a_width = width + (a_order - 1)*t_width
if p_width is None:
p_width = width + (p_order - 1)*t_width
if f_width is None:
f_width = width + (f_order + 1)*t_width
self.b = [DDSSlow(width, t_width, a_width, p_width, f_width, a_order,
p_order, f_order) for i in range(2)]
p = Spline(order=1, width=p_width)
f = Spline(order=1, width=f_width)
self.submodules += self.b, p, f
self.f0 = f.tri(t_width)
self.p0 = p.tri(t_width)
self.i = [self.f0, self.p0]
self.i_names = "f0 p0".split()
for i, bi in enumerate(self.b):
self.i += bi.i
for ii in bi.i_names:
self.i_names.append("{}{}".format(ii, i + 1))
for j in "afp":
setattr(self, "{}{}".format(j, i + 1), getattr(bi, j))
self.o = [[Signal((width, True)) for i in range(2)]
for i in range(parallelism)]
self.ce = Signal()
self.clr = Signal()
self.parallelism = parallelism
self.latency = 0 # will be accumulated
###
self.latency += self.b[0].latency # TODO: f0/p0, q.latency delta
q = PhasedAccu(f_width, parallelism)
self.submodules += q
self.sync += [
If(p.o.stb & p.o.ack,
q.i.clr.eq(0),
),
If(p.i.stb & p.i.ack,
q.i.clr.eq(self.clr),
),
]
self.comb += [
[bi.ce.eq(self.ce) for bi in self.b],
[bi.clr.eq(self.clr) for bi in self.b],
p.o.ack.eq(self.ce),
f.o.ack.eq(self.ce),
q.i.stb.eq(self.ce),
eqh(q.i.p, p.o.a0),
eqh(q.i.f, f.o.a0),
q.o.ack.eq(1),
]
x = self.sat_add(bi.o[0] for bi in self.b)
y = self.sat_add(bi.o[1] for bi in self.b)
c = []
for i in range(parallelism):
ci = Cordic(width=width, widthz=p_width,
guard=None, eval_mode="pipelined")
self.submodules += ci
c.append(ci)
qoi = getattr(q.o, "z{}".format(i))
self.comb += [
ci.xi.eq(x),
ci.yi.eq(y),
eqh(ci.zi, qoi),
eqh(self.o[i][0], ci.xo),
eqh(self.o[i][1], ci.yo),
]
self.latency += c[0].latency
self.gain = self.b[0].gain * c[0].gain
class Config(Module): class Config(Module):
def __init__(self): def __init__(self):
self.cfg = Record([("tap", 5), ("clr", 1), ("iq", 2)]) self.clr = Signal(4)
self.i = Endpoint(self.cfg.layout) self.iq_en = Signal(2)
limit = [Signal((16, True)) for i in range(2*2)]
self.limit = [limit[i:i + 2] for i in range(0, len(limit), 2)]
self.i = Endpoint([("addr", bits_for(len(limit) + 2)), ("data", 16)])
self.ce = Signal() self.ce = Signal()
### ###
n = Signal(1 << len(self.i.tap)) div = Signal(16)
tap = Signal.like(self.i.tap) n = Signal.like(div)
clk = Signal()
clk0 = Signal() reg = Array([Cat(self.clr, self.iq_en), Cat(div, n)] + self.limit)
self.comb += [ self.comb += [
self.i.ack.eq(1), self.i.ack.eq(1),
clk.eq(Array(n)[tap]), self.ce.eq(n == 0),
] ]
self.sync += [ self.sync += [
clk0.eq(clk), n.eq(n - 1),
self.ce.eq(0), If(self.ce,
If(clk0 ^ clk, n.eq(div),
self.ce.eq(1),
), ),
n.eq(n + 1),
If(self.i.stb, If(self.i.stb,
n.eq(0), reg[self.i.addr].eq(self.i.data),
self.cfg.eq(self.i.payload),
), ),
] ]
class Channel(Module): class Channel(Module, SatAddMixin):
def __init__(self, width=16, t_width=None, u_order=4, **kwargs): def __init__(self, width=16, parallelism=4, widths=None, orders=None):
if t_width is None: if orders is None:
t_width = width orders = _Orders(a=4, f=2, p=1)
du = Spline(width=width + (u_order - 1)*t_width, order=u_order) if widths is None:
da = DDS(width, t_width, **kwargs) widths = _Widths(t=width, a=orders.a*width, p=orders.p*width,
f=3*width + (orders.f - 1)*width)
cfg = Config() cfg = Config()
self.submodules += du, da, cfg a1 = SplineParallelDDS(widths, orders)
self.i = [cfg.i, du.tri(t_width)] + da.i a2 = SplineParallelDDS(widths, orders)
self.i_names = "cfg u".split() + da.i_names b = SplineParallelDUC(widths, orders, parallelism=parallelism,
self.q_i = [Signal((width, True)) for i in range(da.parallelism)] a_delay=-a1.latency)
self.q_o = [ai[1] for ai in da.o] u = Spline(width=widths.a, order=orders.a)
self.o = [Signal((width, True)) for i in range(da.parallelism)] du = Delay(widths.a, a1.latency + b.latency - u.latency)
self.width = width self.submodules += cfg, a1, a2, b, u, du
self.parallelism = da.parallelism self.cfg = cfg.i
self.latency = da.latency + 1 self.u = u.tri(widths.t)
self.cordic_gain = da.gain self.i = [self.cfg, self.u, a1.a, a1.f, a1.p, a2.a, a2.f, a2.p, b.f, b.p]
self.y_in = [Signal((width, True)) for i in range(b.parallelism)]
self.y_out = b.yo
self.o = [Signal((width, True)) for i in range(b.parallelism)]
self.widths = widths
self.orders = orders
self.parallelism = parallelism
self.latency = a1.latency + b.latency + 1
self.cordic_gain = a1.gain*b.gain
### ###
# delay du to match da
ddu = Delay((width, True), da.latency - du.latency)
self.submodules += ddu
self.comb += [ self.comb += [
ddu.i.eq(du.o.a0[-width:]), a1.ce.eq(cfg.ce),
da.clr.eq(cfg.cfg.clr), a2.ce.eq(cfg.ce),
da.ce.eq(cfg.ce), b.ce.eq(cfg.ce),
du.o.ack.eq(cfg.ce), u.o.ack.eq(cfg.ce),
Cat(a1.clr, a2.clr, b.clr).eq(cfg.clr),
b.i.x.eq(self.sat_add([a1.xo[0], a2.xo[0]])),
b.i.y.eq(self.sat_add([a1.yo[0], a2.yo[0]])),
eqh(du.i, u.o.a0),
] ]
# wire up outputs and q_{i,o} exchange # wire up outputs and q_{i,o} exchange
for oi, ai, qi in zip(self.o, da.o, self.q_i): for o, x, y in zip(self.o, b.xo, self.y_in):
self.sync += [ self.sync += [
oi.eq(self.sat_add([ o.eq(self.sat_add([du.o,
ddu.o + Mux(cfg.iq_en[0], x, 0),
# du.o.a0[-width:], Mux(cfg.iq_en[1], y, 0)])),
Mux(cfg.cfg.iq[0], ai[0], 0),
Mux(cfg.cfg.iq[1], qi, 0)])),
] ]
def connect_q(self, buddy): def connect_q_from(self, buddy):
for i, qi in enumerate(self.q_i): self.comb += Cat(self.y_in).eq(Cat(buddy.y_out))
self.comb += qi.eq(buddy.q_o[i])