From a3bb6c167c6d7f2b337962b436df4970f38b6a8b Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 16 Sep 2017 14:13:42 +0800 Subject: [PATCH] rtio: use SED --- artiq/gateware/drtio/rt_controller_master.py | 19 +- artiq/gateware/rtio/__init__.py | 3 +- artiq/gateware/rtio/cdc.py | 20 +- artiq/gateware/rtio/channel.py | 36 ++ artiq/gateware/rtio/core.py | 433 +++++-------------- 5 files changed, 164 insertions(+), 347 deletions(-) create mode 100644 artiq/gateware/rtio/channel.py diff --git a/artiq/gateware/drtio/rt_controller_master.py b/artiq/gateware/drtio/rt_controller_master.py index d97f418c6..0522e68e2 100644 --- a/artiq/gateware/drtio/rt_controller_master.py +++ b/artiq/gateware/drtio/rt_controller_master.py @@ -7,7 +7,6 @@ from migen.genlib.resetsync import AsyncResetSynchronizer from misoc.interconnect.csr import * -from artiq.gateware.rtio.cdc import RTIOCounter from artiq.gateware.rtio import cri @@ -33,6 +32,24 @@ class _CSRs(AutoCSR): self.o_wait = CSRStatus() +class RTIOCounter(Module): + def __init__(self, width): + self.width = width + # Timestamp counter in RTIO domain + self.value_rtio = Signal(width) + # Timestamp counter resynchronized to sys domain + # Lags behind value_rtio, monotonic and glitch-free + self.value_sys = Signal(width) + + # # # + + # note: counter is in rtio domain and never affected by the reset CSRs + self.sync.rtio += self.value_rtio.eq(self.value_rtio + 1) + gt = GrayCodeTransfer(width) + self.submodules += gt + self.comb += gt.i.eq(self.value_rtio), self.value_sys.eq(gt.o) + + class RTController(Module): def __init__(self, rt_packet, channel_count, fine_ts_width): self.csrs = _CSRs() diff --git a/artiq/gateware/rtio/__init__.py b/artiq/gateware/rtio/__init__.py index 18feec299..718f2bc7f 100644 --- a/artiq/gateware/rtio/__init__.py +++ b/artiq/gateware/rtio/__init__.py @@ -1,5 +1,6 @@ from artiq.gateware.rtio.cri import KernelInitiator, CRIInterconnectShared -from artiq.gateware.rtio.core import Channel, LogChannel, Core +from artiq.gateware.rtio.channel import Channel, LogChannel +from artiq.gateware.rtio.core import Core from artiq.gateware.rtio.analyzer import Analyzer from artiq.gateware.rtio.moninj import MonInj from artiq.gateware.rtio.dma import DMA diff --git a/artiq/gateware/rtio/cdc.py b/artiq/gateware/rtio/cdc.py index af93b4105..493b1e2e8 100644 --- a/artiq/gateware/rtio/cdc.py +++ b/artiq/gateware/rtio/cdc.py @@ -2,7 +2,7 @@ from migen import * from migen.genlib.cdc import * -__all__ = ["GrayCodeTransfer", "RTIOCounter", "BlindTransfer"] +__all__ = ["GrayCodeTransfer", "BlindTransfer"] # note: transfer is in rtio/sys domains and not affected by the reset CSRs @@ -28,24 +28,6 @@ class GrayCodeTransfer(Module): self.sync += self.o.eq(value_sys) -class RTIOCounter(Module): - def __init__(self, width): - self.width = width - # Timestamp counter in RTIO domain - self.value_rtio = Signal(width) - # Timestamp counter resynchronized to sys domain - # Lags behind value_rtio, monotonic and glitch-free - self.value_sys = Signal(width) - - # # # - - # note: counter is in rtio domain and never affected by the reset CSRs - self.sync.rtio += self.value_rtio.eq(self.value_rtio + 1) - gt = GrayCodeTransfer(width) - self.submodules += gt - self.comb += gt.i.eq(self.value_rtio), self.value_sys.eq(gt.o) - - class BlindTransfer(Module): def __init__(self, idomain="rio", odomain="rsys"): self.i = Signal() diff --git a/artiq/gateware/rtio/channel.py b/artiq/gateware/rtio/channel.py new file mode 100644 index 000000000..5e85a1add --- /dev/null +++ b/artiq/gateware/rtio/channel.py @@ -0,0 +1,36 @@ +import warnings + +from artiq.gateware.rtio import rtlink + + +class Channel: + def __init__(self, interface, probes=None, overrides=None, + ofifo_depth=None, ififo_depth=64): + if probes is None: + probes = [] + if overrides is None: + overrides = [] + + self.interface = interface + self.probes = probes + self.overrides = overrides + if ofifo_depth is None: + ofifo_depth = 64 + else: + warnings.warn("ofifo_depth is deprecated", DeprecationWarning) + self.ofifo_depth = ofifo_depth + self.ififo_depth = ififo_depth + + @classmethod + def from_phy(cls, phy, **kwargs): + probes = getattr(phy, "probes", []) + overrides = getattr(phy, "overrides", []) + return cls(phy.rtlink, probes, overrides, **kwargs) + + +class LogChannel: + """A degenerate channel used to log messages into the analyzer.""" + def __init__(self): + self.interface = rtlink.Interface(rtlink.OInterface(32)) + self.probes = [] + self.overrides = [] diff --git a/artiq/gateware/rtio/core.py b/artiq/gateware/rtio/core.py index 76bcd09ee..87c5975ee 100644 --- a/artiq/gateware/rtio/core.py +++ b/artiq/gateware/rtio/core.py @@ -5,195 +5,18 @@ from migen import * from migen.genlib.record import Record from migen.genlib.fifo import AsyncFIFO from migen.genlib.resetsync import AsyncResetSynchronizer +from migen.genlib.cdc import PulseSynchronizer from misoc.interconnect.csr import * -from artiq.gateware.rtio import cri, rtlink +from artiq.gateware.rtio import cri +from artiq.gateware.rtio import rtlink +from artiq.gateware.rtio.channel import * from artiq.gateware.rtio.cdc import * - - -# CHOOSING A GUARD TIME -# -# The buffer must be transferred to the FIFO soon enough to account for: -# * transfer of counter to sys domain: Tio + 2*Tsys + Tsys -# * FIFO latency: Tsys + 2*Tio -# * FIFO buffer latency: Tio -# Therefore we must choose: -# guard_io_cycles > (4*Tio + 4*Tsys)/Tio -# -# We are writing to the FIFO from the buffer when the guard time has been -# reached. This can fill the FIFO and deassert the writable flag. A race -# condition occurs that causes problems if the deassertion happens between -# the CPU checking the writable flag (and reading 1) and writing a new event. -# -# When the FIFO is about to be full, it contains fifo_depth-1 events of -# strictly increasing timestamps. -# -# Thus the FIFO-filling event's timestamp must satisfy: -# timestamp*Tio > (fifo_depth-1)*Tio + time -# We also have (guard time reached): -# timestamp*Tio < time + guard_io_cycles*Tio -# [NB: time > counter.value_sys*Tio] -# Thus we must have: -# guard_io_cycles > fifo_depth-1 -# -# We can prevent overflows by choosing instead: -# guard_io_cycles < fifo_depth-1 - -class _OutputManager(Module): - def __init__(self, interface, counter, fifo_depth, guard_io_cycles): - data_width = rtlink.get_data_width(interface) - address_width = rtlink.get_address_width(interface) - fine_ts_width = rtlink.get_fine_ts_width(interface) - - ev_layout = [] - if data_width: - ev_layout.append(("data", data_width)) - if address_width: - ev_layout.append(("address", address_width)) - ev_layout.append(("timestamp", counter.width + fine_ts_width)) - # ev must be valid 1 cycle before we to account for the latency in - # generating replace, sequence_error and collision - self.ev = Record(ev_layout) - - self.writable = Signal() - self.we = Signal() # maximum throughput 1/2 - - self.underflow = Signal() # valid 1 cycle after we, pulsed - self.sequence_error = Signal() - self.collision = Signal() - self.busy = Signal() # pulsed - - # # # - - # FIFO - fifo = ClockDomainsRenamer({"write": "rsys", "read": "rio"})( - AsyncFIFO(layout_len(ev_layout), fifo_depth)) - self.submodules += fifo - fifo_in = Record(ev_layout) - fifo_out = Record(ev_layout) - self.comb += [ - fifo.din.eq(fifo_in.raw_bits()), - fifo_out.raw_bits().eq(fifo.dout) - ] - - # Buffer - buf_pending = Signal() - buf = Record(ev_layout) - buf_just_written = Signal() - - # Special cases - replace = Signal(reset_less=True) - sequence_error = Signal(reset_less=True) - collision = Signal(reset_less=True) - any_error = Signal() - if interface.enable_replace: - # Note: replace may be asserted at the same time as collision - # when addresses are different. In that case, it is a collision. - self.sync.rsys += replace.eq(self.ev.timestamp == buf.timestamp) - # Detect sequence errors on coarse timestamps only - # so that they are mutually exclusive with collision errors. - self.sync.rsys += sequence_error.eq(self.ev.timestamp[fine_ts_width:] < - buf.timestamp[fine_ts_width:]) - if interface.enable_replace: - if address_width: - different_addresses = self.ev.address != buf.address - else: - different_addresses = 0 - if fine_ts_width: - self.sync.rsys += collision.eq( - (self.ev.timestamp[fine_ts_width:] == buf.timestamp[fine_ts_width:]) - & ((self.ev.timestamp[:fine_ts_width] != buf.timestamp[:fine_ts_width]) - |different_addresses)) - else: - self.sync.rsys += collision.eq( - (self.ev.timestamp == buf.timestamp) & different_addresses) - else: - self.sync.rsys += collision.eq( - self.ev.timestamp[fine_ts_width:] == buf.timestamp[fine_ts_width:]) - self.comb += [ - any_error.eq(sequence_error | collision), - self.sequence_error.eq(self.we & sequence_error), - self.collision.eq(self.we & collision) - ] - - # Buffer read and FIFO write - self.comb += fifo_in.eq(buf) - in_guard_time = Signal() - self.comb += in_guard_time.eq( - buf.timestamp[fine_ts_width:] - < counter.value_sys + guard_io_cycles) - self.sync.rsys += If(in_guard_time, buf_pending.eq(0)) - self.comb += \ - If(buf_pending, - If(in_guard_time, - If(buf_just_written, - self.underflow.eq(1) - ).Else( - fifo.we.eq(1) - ) - ), - If(self.we & ~replace & ~any_error, - fifo.we.eq(1) - ) - ) - - # Buffer write - # Must come after read to handle concurrent read+write properly - self.sync.rsys += [ - buf_just_written.eq(0), - If(self.we & ~any_error, - buf_just_written.eq(1), - buf_pending.eq(1), - buf.eq(self.ev) - ) - ] - self.comb += self.writable.eq(fifo.writable) - - # Buffer output of FIFO to improve timing - dout_stb = Signal() - dout_ack = Signal() - dout = Record(ev_layout) - self.sync.rio += \ - If(fifo.re, - dout_stb.eq(1), - dout.eq(fifo_out) - ).Elif(dout_ack, - dout_stb.eq(0) - ) - self.comb += fifo.re.eq(fifo.readable & (~dout_stb | dout_ack)) - - # latency compensation - if interface.delay: - counter_rtio = Signal.like(counter.value_rtio, reset_less=True) - self.sync.rtio += counter_rtio.eq(counter.value_rtio - - (interface.delay + 1)) - else: - counter_rtio = counter.value_rtio - - # FIFO read through buffer - self.comb += [ - dout_ack.eq( - dout.timestamp[fine_ts_width:] == counter_rtio), - interface.stb.eq(dout_stb & dout_ack) - ] - - busy_transfer = BlindTransfer() - self.submodules += busy_transfer - self.comb += [ - busy_transfer.i.eq(interface.stb & interface.busy), - self.busy.eq(busy_transfer.o), - ] - - if data_width: - self.comb += interface.data.eq(dout.data) - if address_width: - self.comb += interface.address.eq(dout.address) - if fine_ts_width: - self.comb += interface.fine_ts.eq(dout.timestamp[:fine_ts_width]) +from artiq.gateware.rtio.sed.core import * class _InputManager(Module): - def __init__(self, interface, counter, fifo_depth): + def __init__(self, interface, coarse_ts, fifo_depth): data_width = rtlink.get_data_width(interface) fine_ts_width = rtlink.get_fine_ts_width(interface) @@ -201,7 +24,7 @@ class _InputManager(Module): if data_width: ev_layout.append(("data", data_width)) if interface.timestamped: - ev_layout.append(("timestamp", counter.width + fine_ts_width)) + ev_layout.append(("timestamp", len(coarse_ts) + fine_ts_width)) self.ev = Record(ev_layout) self.readable = Signal() @@ -223,11 +46,11 @@ class _InputManager(Module): # latency compensation if interface.delay: - counter_rtio = Signal.like(counter.value_rtio, reset_less=True) - self.sync.rtio += counter_rtio.eq(counter.value_rtio - + counter_rtio = Signal.like(coarse_ts, reset_less=True) + self.sync.rtio += counter_rtio.eq(coarse_ts - (interface.delay + 1)) else: - counter_rtio = counter.value_rtio + counter_rtio = coarse_ts # FIFO write if data_width: @@ -255,41 +78,80 @@ class _InputManager(Module): ] -class Channel: - def __init__(self, interface, probes=None, overrides=None, - ofifo_depth=64, ififo_depth=64): - if probes is None: - probes = [] - if overrides is None: - overrides = [] +class _Inputs(Module): + def __init__(self, interface, coarse_ts, channels): + self.cri = interface - self.interface = interface - self.probes = probes - self.overrides = overrides - self.ofifo_depth = ofifo_depth - self.ififo_depth = ififo_depth + # Inputs + i_statuses = [] + i_datas, i_timestamps = [], [] + i_ack = Signal() + sel = self.cri.chan_sel[:16] + for n, channel in enumerate(channels): + if isinstance(channel, LogChannel): + i_datas.append(0) + i_timestamps.append(0) + i_statuses.append(0) + continue - @classmethod - def from_phy(cls, phy, **kwargs): - probes = getattr(phy, "probes", []) - overrides = getattr(phy, "overrides", []) - return cls(phy.rtlink, probes, overrides, **kwargs) + if channel.interface.i is not None: + selected = Signal() + self.comb += selected.eq(sel == n) + i_manager = _InputManager(channel.interface.i, coarse_ts, + channel.ififo_depth) + self.submodules += i_manager -class LogChannel: - """A degenerate channel used to log messages into the analyzer.""" - def __init__(self): - self.interface = rtlink.Interface(rtlink.OInterface(32)) - self.probes = [] - self.overrides = [] + if hasattr(i_manager.ev, "data"): + i_datas.append(i_manager.ev.data) + else: + i_datas.append(0) + if channel.interface.i.timestamped: + ts_shift = (len(self.cri.i_timestamp) - len(i_manager.ev.timestamp)) + i_timestamps.append(i_manager.ev.timestamp << ts_shift) + else: + i_timestamps.append(0) + + overflow = Signal() + self.sync.rsys += [ + If(selected & i_ack, + overflow.eq(0)), + If(i_manager.overflow, + overflow.eq(1)) + ] + self.comb += i_manager.re.eq(selected & i_ack & ~overflow) + i_statuses.append(Cat(i_manager.readable & ~overflow, overflow)) + + else: + i_datas.append(0) + i_timestamps.append(0) + i_statuses.append(0) + + i_status_raw = Signal(2) + self.comb += i_status_raw.eq(Array(i_statuses)[sel]) + input_timeout = Signal.like(self.cri.timestamp) + input_pending = Signal() + self.sync.rsys += [ + i_ack.eq(0), + If(i_ack, + self.cri.i_status.eq(Cat(~i_status_raw[0], i_status_raw[1], 0)), + self.cri.i_data.eq(Array(i_datas)[sel]), + self.cri.i_timestamp.eq(Array(i_timestamps)[sel]), + ), + If((self.cri.counter >= input_timeout) | (i_status_raw != 0), + If(input_pending, i_ack.eq(1)), + input_pending.eq(0) + ), + If(self.cri.cmd == cri.commands["read"], + input_timeout.eq(self.cri.timestamp), + input_pending.eq(1), + self.cri.i_status.eq(0b100) + ) + ] class Core(Module, AutoCSR): - def __init__(self, channels, fine_ts_width=None, guard_io_cycles=20): - if fine_ts_width is None: - fine_ts_width = max(rtlink.get_fine_ts_width(c.interface) - for c in channels) - + def __init__(self, channels, lane_count=8, fifo_depth=128): self.cri = cri.Interface() self.reset = CSR() self.reset_phy = CSR() @@ -297,7 +159,7 @@ class Core(Module, AutoCSR): # Clocking/Reset # Create rsys, rio and rio_phy domains based on sys and rtio - # with reset controlled by CRI. + # with reset controlled by CSR. # # The `rio` CD contains logic that is reset with `core.reset()`. # That's state that could unduly affect subsequent experiments, @@ -327,125 +189,44 @@ class Core(Module, AutoCSR): self.specials += AsyncResetSynchronizer(self.cd_rio, cmd_reset) self.specials += AsyncResetSynchronizer(self.cd_rio_phy, cmd_reset_phy) - # Managers - self.submodules.counter = RTIOCounter(len(self.cri.timestamp) - fine_ts_width) + # TSC + fine_ts_width = max(rtlink.get_fine_ts_width(channel.interface) + for channel in channels) + coarse_ts = Signal(64-fine_ts_width) + self.sync.rtio += coarse_ts.eq(coarse_ts + 1) + coarse_ts_cdc = GrayCodeTransfer(len(coarse_ts)) + self.submodules += coarse_ts_cdc + self.comb += [ + coarse_ts_cdc.i.eq(coarse_ts), + self.cri.counter.eq(coarse_ts_cdc.o << fine_ts_width) + ] - # Collision is not an asynchronous error with local RTIO, but - # we treat it as such for consistency with DRTIO, where collisions - # are reported by the satellites. - o_underflow = Signal() - o_sequence_error = Signal() + # Asychronous output errors + o_collision_sync = PulseSynchronizer("rtio", "rsys") + o_busy_sync = PulseSynchronizer("rtio", "rsys") + self.submodules += o_collision_sync, o_busy_sync o_collision = Signal() o_busy = Signal() - self.sync.rsys += [ - If(self.cri.cmd == cri.commands["write"], - o_underflow.eq(0), - o_sequence_error.eq(0), - ) - ] self.sync += [ If(self.async_error.re, If(self.async_error.r[0], o_collision.eq(0)), If(self.async_error.r[1], o_busy.eq(0)), - ) + ), + If(o_collision_sync.o, o_collision.eq(1)), + If(o_busy_sync.o, o_busy.eq(1)) ] + self.comb += self.async_error.w.eq(Cat(o_collision, o_busy)) - o_statuses, i_statuses = [], [] - i_datas, i_timestamps = [], [] - i_ack = Signal() - sel = self.cri.chan_sel[:16] - for n, channel in enumerate(channels): - if isinstance(channel, LogChannel): - o_statuses.append(1) - i_datas.append(0) - i_timestamps.append(0) - i_statuses.append(0) - continue + # Inputs + inputs = _Inputs(self.cri, coarse_ts, channels) + self.submodules += inputs - selected = Signal() - self.comb += selected.eq(sel == n) - - o_manager = _OutputManager(channel.interface.o, self.counter, - channel.ofifo_depth, guard_io_cycles) - self.submodules += o_manager - - if hasattr(o_manager.ev, "data"): - self.comb += o_manager.ev.data.eq(self.cri.o_data) - if hasattr(o_manager.ev, "address"): - self.comb += o_manager.ev.address.eq(self.cri.o_address) - ts_shift = len(self.cri.timestamp) - len(o_manager.ev.timestamp) - self.comb += o_manager.ev.timestamp.eq(self.cri.timestamp[ts_shift:]) - - self.comb += o_manager.we.eq(selected & (self.cri.cmd == cri.commands["write"])) - - self.sync.rsys += [ - If(o_manager.underflow, o_underflow.eq(1)), - If(o_manager.sequence_error, o_sequence_error.eq(1)) - ] - self.sync += [ - If(o_manager.collision, o_collision.eq(1)), - If(o_manager.busy, o_busy.eq(1)) - ] - o_statuses.append(o_manager.writable) - - if channel.interface.i is not None: - i_manager = _InputManager(channel.interface.i, self.counter, - channel.ififo_depth) - self.submodules += i_manager - - if hasattr(i_manager.ev, "data"): - i_datas.append(i_manager.ev.data) - else: - i_datas.append(0) - if channel.interface.i.timestamped: - ts_shift = (len(self.cri.i_timestamp) - len(i_manager.ev.timestamp)) - i_timestamps.append(i_manager.ev.timestamp << ts_shift) - else: - i_timestamps.append(0) - - overflow = Signal() - self.sync.rsys += [ - If(selected & i_ack, - overflow.eq(0)), - If(i_manager.overflow, - overflow.eq(1)) - ] - self.comb += i_manager.re.eq(selected & i_ack & ~overflow) - i_statuses.append(Cat(i_manager.readable & ~overflow, overflow)) - - else: - i_datas.append(0) - i_timestamps.append(0) - i_statuses.append(0) - - o_status_raw = Signal() + # Outputs + outputs = SED(channels, "async", interface=self.cri) + self.submodules += outputs + self.comb += outputs.coarse_timestamp.eq(coarse_ts) + self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16) self.comb += [ - o_status_raw.eq(Array(o_statuses)[sel]), - self.cri.o_status.eq(Cat( - ~o_status_raw, o_underflow, o_sequence_error)), - self.async_error.w.eq(Cat(o_collision, o_busy)) + o_collision_sync.i.eq(outputs.collision), + o_busy_sync.i.eq(outputs.busy) ] - - i_status_raw = Signal(2) - self.comb += i_status_raw.eq(Array(i_statuses)[sel]) - input_timeout = Signal.like(self.cri.timestamp) - input_pending = Signal() - self.sync.rsys += [ - i_ack.eq(0), - If(i_ack, - self.cri.i_status.eq(Cat(~i_status_raw[0], i_status_raw[1], 0)), - self.cri.i_data.eq(Array(i_datas)[sel]), - self.cri.i_timestamp.eq(Array(i_timestamps)[sel]), - ), - If((self.cri.counter >= input_timeout) | (i_status_raw != 0), - If(input_pending, i_ack.eq(1)), - input_pending.eq(0) - ), - If(self.cri.cmd == cri.commands["read"], - input_timeout.eq(self.cri.timestamp), - input_pending.eq(1), - self.cri.i_status.eq(0b100) - ) - ] - - self.comb += self.cri.counter.eq(self.counter.value_sys << fine_ts_width)