From c9857bb831fbfc8988bc6d37c647dcd3fd300d4a Mon Sep 17 00:00:00 2001 From: Donald Sebastian Leung Date: Wed, 30 Sep 2020 10:55:08 +0800 Subject: [PATCH] Reset translation progress --- README.md | 11 +-- rtio/__init__.py | 1 + rtio/channel.py | 35 -------- rtio/core.py | 1 + rtio/cri.py | 125 -------------------------- rtio/input_collector.py | 1 + rtio/rtlink.py | 96 -------------------- rtio/sed/__init__.py | 1 + rtio/sed/core.py | 1 + rtio/sed/fifos.py | 1 + rtio/sed/gates.py | 1 + rtio/sed/lane_distributor.py | 169 ----------------------------------- rtio/sed/layouts.py | 78 ---------------- rtio/sed/output_driver.py | 104 --------------------- rtio/sed/output_network.py | 104 --------------------- 15 files changed, 13 insertions(+), 716 deletions(-) diff --git a/README.md b/README.md index b0bdfe5..c89463b 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,19 @@ Formally verified implementation of the ARTIQ RTIO core in nMigen ## Progress +- Devise a suitable migration strategy for `artiq.gateware.rtio` from Migen to nMigen - [ ] Implement the core in nMigen - - [ ] `rtio.core` - - [ ] `rtio.cri` -- - [x] `rtio.rtlink` -- - [x] `rtio.channel` +- - [ ] `rtio.rtlink` +- - [ ] `rtio.channel` - - [ ] `rtio.sed.core` -- - [x] `rtio.sed.layouts` -- - [x] `rtio.sed.lane_distributor` +- - [ ] `rtio.sed.layouts` +- - [ ] `rtio.sed.lane_distributor` - - [ ] `rtio.sed.fifos` - - [ ] `rtio.sed.gates` - - [ ] `rtio.sed.output_driver` -- - [x] `rtio.sed.output_network` +- - [ ] `rtio.sed.output_network` - - [ ] `rtio.input_collector` - [ ] Add suitable assertions for verification (BMC / unbounded proof?) diff --git a/rtio/__init__.py b/rtio/__init__.py index e69de29..8b13789 100644 --- a/rtio/__init__.py +++ b/rtio/__init__.py @@ -0,0 +1 @@ + diff --git a/rtio/channel.py b/rtio/channel.py index 6c0c440..8b13789 100644 --- a/rtio/channel.py +++ b/rtio/channel.py @@ -1,36 +1 @@ -import warnings -from rtio import rtlink - - -class Channel: - def __init__(self, interface, probes=None, overrides=None, - ofifo_depth=None, ififo_depth=64): - if probes is None: - probes = [] - if overrides is None: - overrides = [] - - self.interface = interface - self.probes = probes - self.overrides = overrides - if ofifo_depth is None: - ofifo_depth = 64 - else: - warnings.warn("ofifo_depth is deprecated", FutureWarning) - self.ofifo_depth = ofifo_depth - self.ififo_depth = ififo_depth - - @classmethod - def from_phy(cls, phy, **kwargs): - probes = getattr(phy, "probes", []) - overrides = getattr(phy, "overrides", []) - return cls(phy.rtlink, probes, overrides, **kwargs) - - -class LogChannel: - """A degenerate channel used to log messages into the analyzer.""" - def __init__(self): - self.interface = rtlink.Interface(rtlink.OInterface(32)) - self.probes = [] - self.overrides = [] diff --git a/rtio/core.py b/rtio/core.py index e69de29..8b13789 100644 --- a/rtio/core.py +++ b/rtio/core.py @@ -0,0 +1 @@ + diff --git a/rtio/cri.py b/rtio/cri.py index edd8701..8b13789 100644 --- a/rtio/cri.py +++ b/rtio/cri.py @@ -1,126 +1 @@ -from nmigen import * -from nmigen.utils import * -from nmigen.hdl.rec import * -"""Common RTIO Interface""" - -# CRI write happens in 3 cycles: -# 1. set timestamp and channel -# 2. set other payload elements and issue write command -# 3. check status - -commands = { - "nop": 0, - "write": 1, - # i_status should have the "wait for status" bit set until - # an event is available, or timestamp is reached. - "read": 2, - # targets must assert o_buffer_space_valid in response - # to this command - "get_buffer_space": 3 -} - - -layout = [ - ("cmd", 2, DIR_FANOUT), - # 8 MSBs of chan_sel = routing destination - # 16 LSBs of chan_sel = channel within the destination - ("chan_sel", 24, DIR_FANOUT), - - ("o_timestamp", 64, DIR_FANOUT), - ("o_data", 512, DIR_FANOUT), - ("o_address", 8, DIR_FANOUT), - # o_status bits: - # <0:wait> <1:underflow> <2:destination unreachable> - ("o_status", 3, DIR_FANIN), - - # pessimistic estimate of the number of outputs events that can be - # written without waiting. - # this feature may be omitted on systems without DRTIO. - ("o_buffer_space_valid", 1, DIR_FANIN), - ("o_buffer_space", 16, DIR_FANIN), - - ("i_timeout", 64, DIR_FANOUT), - ("i_data", 32, DIR_FANIN), - ("i_timestamp", 64, DIR_FANIN), - # i_status bits: - # <0:wait for event (command timeout)> <1:overflow> <2:wait for status> - # <3:destination unreachable> - # <0> and <1> are mutually exclusive. <1> has higher priority. - ("i_status", 4, DIR_FANIN), -] - -class Interface(Record): - def __init__(self, **kwargs): - super().__init__(layout, **kwargs) - -# Skip KernelInitiator for now - -class CRIDecoder(Elaboratable): - def __init__(self, slaves=2, master=None, mode="async", enable_routing=False): - if isinstance(slaves, int): - slaves = [Interface() for _ in range(slaves)] - if master is None: - master = Interface() - self.slaves = slaves - self.master = master - self.mode = mode - self.enable_routing = enable_routing - - def elaborate(self, platform): - m = Module() - - # routing - if self.enable_routing: - destination_unreachable = Interface() - m.d.comb += destination_unreachable.o_status.eq(4) - m.d.comb += destination_unreachable.i_status.eq(8) - self.slaves = self.slaves[:] - self.slaves.append(destination_unreachable) - target_len = 2 ** (len(slaves) - 1).bit_length() - self.slaves += [destination_unreachable] * (target_len - len(slaves)) - - slave_bits = bits_for(len(self.slaves) - 1) - selected = Signal(slave_bits) - - if self.enable_routing: - routing_table = Memory(slave_bits, 256) - - if self.mode == "async": - rtp_decoder_rdport = routing_table.read_port() - rtp_decoder_wrport = routing_table.write_port() - elif self.mode == "sync": - rtp_decoder_rdport = routing_table.read_port(clock_domain="rtio") - rtp_decoder_wrport = routing_tables.write_port(clock_domain="rtio") - else: - raise ValueError - m.submodules.rtp_decoder_rdport = rtp_decoder_rdport - m.submodules.rtp_decoder_wrport = rtp_decoder_wrport - m.d.comb += rtp_decoder_rdport.addr.eq(self.master.chan_sel[16:]) - m.d.comb += selected.eq(rtp_decoder_rdport.data) - else: - m.d.sync += selected.eq(self.master.chan_sel[16:]) - - # master -> slave - for n, slave in enumerate(self.slaves): - for name, size, direction in layout: - if direction == DIR_FANOUT and name != "cmd": - m.d.comb += getattr(slave, name).eq(getattr(self.master, name)) - with m.If(selected == n): - m.d.comb += slave.cmd.eq(self.master.cmd) - - # slave -> master - with m.Switch(selected): - for n, slave in enumerate(self.slaves): - with m.Case(n): - for name, size, direction in layout: - if direction == DIR_FANIN: - m.d.comb += getattr(self.master, name).eq(getattr(slave, name)) - - return m - -# TODO: CRISwitch - -# TODO: CRIInterconnectShared - -# TODO: RoutingTableAccess diff --git a/rtio/input_collector.py b/rtio/input_collector.py index e69de29..8b13789 100644 --- a/rtio/input_collector.py +++ b/rtio/input_collector.py @@ -0,0 +1 @@ + diff --git a/rtio/rtlink.py b/rtio/rtlink.py index 585e22c..8b13789 100644 --- a/rtio/rtlink.py +++ b/rtio/rtlink.py @@ -1,97 +1 @@ -from nmigen import * - -class OInterface: - def __init__(self, data_width, address_width=0, - fine_ts_width=0, enable_replace=True, - delay=0): - self.stb = Signal() - self.busy = Signal() - - assert 0 <= data_width <= 512 - assert 0 <= address_width <= 8 - assert 0 <= fine_ts_width <= 4 - - if data_width: - self.data = Signal(data_width, reset_less=True) - if address_width: - self.address = Signal(address_width, reset_less=True) - if fine_ts_width: - self.fine_ts = Signal(fine_ts_width, reset_less=True) - - self.enable_replace = enable_replace - - if delay < 0: - raise ValueError("only positive delays allowed", delay) - self.delay = delay - - @classmethod - def like(cls, other): - return cls(get_data_width(other), - get_address_width(other), - get_fine_ts_width(other), - other.enable_replace, - other.delay) - - -class IInterface: - def __init__(self, data_width, - timestamped=True, fine_ts_width=0, delay=0): - self.stb = Signal() - - assert 0 <= data_width <= 32 - assert 0 <= fine_ts_width <= 4 - - if data_width: - self.data = Signal(data_width, reset_less=True) - if fine_ts_width: - self.fine_ts = Signal(fine_ts_width, reset_less=True) - - assert(not fine_ts_width or timestamped) - self.timestamped = timestamped - if delay < 0: - raise ValueError("only positive delays") - self.delay = delay - - @classmethod - def like(cls, other): - return cls(get_data_width(other), - other.timestamped, - get_fine_ts_width(other), - other.delay) - - -class Interface: - def __init__(self, o, i=None): - self.o = o - self.i = i - - @classmethod - def like(cls, other): - if other.i is None: - return cls(OInterface.like(other.o)) - else: - return cls(OInterface.like(other.o), - IInterface.like(other.i)) - - -def _get_or_zero(interface, attr): - if interface is None: - return 0 - assert isinstance(interface, (OInterface, IInterface)) - if hasattr(interface, attr): - return len(getattr(interface, attr)) - else: - return 0 - - -def get_data_width(interface): - return _get_or_zero(interface, "data") - - -def get_address_width(interface): - return _get_or_zero(interface, "address") - - -def get_fine_ts_width(interface): - return _get_or_zero(interface, "fine_ts") diff --git a/rtio/sed/__init__.py b/rtio/sed/__init__.py index e69de29..8b13789 100644 --- a/rtio/sed/__init__.py +++ b/rtio/sed/__init__.py @@ -0,0 +1 @@ + diff --git a/rtio/sed/core.py b/rtio/sed/core.py index e69de29..8b13789 100644 --- a/rtio/sed/core.py +++ b/rtio/sed/core.py @@ -0,0 +1 @@ + diff --git a/rtio/sed/fifos.py b/rtio/sed/fifos.py index e69de29..8b13789 100644 --- a/rtio/sed/fifos.py +++ b/rtio/sed/fifos.py @@ -0,0 +1 @@ + diff --git a/rtio/sed/gates.py b/rtio/sed/gates.py index e69de29..8b13789 100644 --- a/rtio/sed/gates.py +++ b/rtio/sed/gates.py @@ -0,0 +1 @@ + diff --git a/rtio/sed/lane_distributor.py b/rtio/sed/lane_distributor.py index a770a43..8b13789 100644 --- a/rtio/sed/lane_distributor.py +++ b/rtio/sed/lane_distributor.py @@ -1,170 +1 @@ -from nmigen import * -from rtio import cri -from rtio.sed import layouts - -__all__ = ["LaneDistributor"] - -class LaneDistributor(Elaboratable): - def __init__(self, lane_count, seqn_width, layout_payload, - compensation, glbl_fine_ts_width, - enable_spread=True, quash_channels=[], interface=None): - if lane_count & (lane_count - 1): - raise NotImplementedError("lane count must be a power of 2") - - if interface is None: - interface = cri.Interface() - self.cri = interface - self.sequence_error = Signal() - self.sequence_error_channel = Signal(16, reset_less=True) - # The minimum timestamp that an event must have to avoid triggering - # an underflow, at the time when the CRI write happens, and to a channel - # with zero latency compensation. This is synchronous to the system clock - # domain. - us_timestamp_width = 64 - glbl_fine_ts_width - self.minimum_coarse_timestamp = Signal(us_timestamp_width) - self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload)) - for _ in range(lane_count)] - self.lane_count = lane_count - self.us_timestamp_width = us_timestamp_width - self.seqn_width = seqn_width - self.glbl_fine_ts_width = glbl_fine_ts_width - self.quash_channels = quash_channels - self.compensation = compensation - self.enable_spread = enable_spread - - def elaborate(self, platform): - m = Module() - - o_status_wait = Signal() - o_status_underflow = Signal() - m.d.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow)) - - # The core keeps writing events into the current lane as long as timestamps - # (after compensation) are strictly increasing, otherwise it switches to - # the next lane. - # If spread is enabled, it also switches to the next lane after the current - # lane has been full, in order to maximize lane utilization. - # The current lane is called lane "A". The next lane (which may be chosen - # at a later stage by the core) is called lane "B". - # Computations for both lanes are prepared in advance to increase performance. - - current_lane = Signal(range(self.lane_count)) - # The last coarse timestamp received from the CRI, after compensation. - # Used to determine when to switch lanes. - last_coarse_timestamp = Signal(self.us_timestamp_width) - # The last coarse timestamp written to each lane. Used to detect - # sequence errors. - last_lane_coarse_timestamps = Array(Signal(self.us_timestamp_width) - for _ in range(self.lane_count)) - # Sequence number counter. The sequence number is used to determine which - # event wins during a replace. - seqn = Signal(self.seqn_width) - - # distribute data to lanes - for lio in self.output: - m.d.comb += lio.seqn.eq(seqn) - m.d.comb += lio.payload.channel.eq(self.cri.chan_sel[:16]) - m.d.comb += lio.payload.timestamp.eq(self.cri.o_timestamp) - if hasattr(lio.payload, "address"): - m.d.comb += lio.payload.address.eq(self.cri.o_address) - if hasattr(lio.payload, "data"): - m.d.comb += lio.payload.data.eq(self.cri.o_data) - - # when timestamp and channel arrive in cycle #1, prepare computations - coarse_timestamp = Signal(self.us_timestamp_width) - m.d.comb += coarse_timestamp.eq(self.cri.o_timestamp[self.glbl_fine_ts_width:]) - min_minus_timestamp = Signal(Shape(self.us_timestamp_width + 1, True), - reset_less=True) - laneAmin_minus_timestamp = Signal.like(min_minus_timestamp) - laneBmin_minus_timestamp = Signal.like(min_minus_timestamp) - last_minus_timestamp = Signal.like(min_minus_timestamp) - current_lane_plus_one = Signal(range(self.lane_count)) - m.d.comb += current_lane_plus_one.eq(current_lane + 1) - m.d.sync += min_minus_timestamp.eq(self.minimum_coarse_timestamp - coarse_timestamp) - m.d.sync += laneAmin_minus_timestamp.eq(last_lane_coarse_timestamps[current_lane] - coarse_timestamp) - m.d.sync += laneBmin_minus_timestamp.eq(last_lane_coarse_timestamps[current_lane_plus_one] - coarse_timestamp) - m.d.sync += last_minus_timestamp.eq(last_coarse_timestamp - coarse_timestamp) - - # Quash channels are "dummy" channels to which writes are completely ignored. - # This is used by the RTIO log channel, which is taken into account - # by the analyzer but does not enter the lanes. - quash = Signal() - m.d.sync += quash.eq(0) - for channel in self.quash_channels: - with m.If(self.cri.chan_sel[:16] == channel): - m.d.sync += quash.eq(1) - - assert all(abs(c) < 1 << 14 - 1 for c in self.compensation) - latency_compensation = Memory(width=14, depth=len(self.compensation), init=self.compensation) - latency_compensation_rdport = latency_compensation.read_port() - m.submodules.latency_compensation_rdport = latency_compensation_rdport - m.d.comb += latency_compensation_rdport.addr.eq(self.cri.chan_sel[:16]) - - # cycle #2, write - compensation = Signal(Shape(14, True)) - m.d.comb += compensation.eq(latency_compensation_rdport.data) - timestamp_above_min = Signal() - timestamp_above_last = Signal() - timestamp_above_laneA_min = Signal() - timestamp_above_laneB_min = Signal() - timestamp_above_lane_min = Signal() - force_laneB = Signal() - use_laneB = Signal() - use_lanen = Signal(range(self.lane_count)) - - do_write = Signal() - do_underflow = Signal() - do_sequence_error = Signal() - m.d.comb += timestamp_above_min.eq(min_minus_timestamp - compensation < 0) - m.d.comb += timestamp_above_laneA_min.eq(laneAmin_minus_timestamp - compensation < 0) - m.d.comb += timestamp_above_laneB_min.eq(laneBmin_minus_timestamp - compensation < 0) - m.d.comb += timestamp_above_last.eq(last_minus_timestamp - compensation < 0) - with m.If(force_laneB | ~timestamp_above_last): - m.d.comb += use_lanen.eq(current_lane_plus_one) - m.d.comb += use_laneB.eq(1) - with m.Else(): - m.d.comb += use_lanen.eq(current_lane) - m.d.comb += use_laneB.eq(0) - - m.d.comb += timestamp_above_lane_min.eq(Mux(use_laneB, timestamp_above_laneB_min, timestamp_above_laneA_min)) - with m.If(~quash & (self.cri.cmd == cri.commands["write"])): - with m.If(timestamp_above_min): - with m.If(timestamp_above_lane_min): - m.d.comb += do_write.eq(1) - with m.Else(): - m.d.comb += do_sequence_error.eq(1) - with m.Else(): - m.d.comb += do_underflow.eq(1) - m.d.comb += Array(lio.we for lio in self.output)[use_lanen].eq(do_write) - compensated_timestamp = Signal(64) - m.d.comb += compensated_timestamp.eq(self.cri.o_timestamp + (compensation << self.glbl_fine_ts_width)) - with m.If(do_write): - m.d.sync += current_lane.eq(use_lanen) - m.d.sync += last_coarse_timestamp.eq(compensated_timestamp[self.glbl_fine_ts_width:]) - m.d.sync += last_lane_coarse_timestamps[use_lanen].eq(compensated_timestamp[self.glbl_fine_ts_width:]) - m.d.sync += seqn.eq(seqn + 1) - for lio in self.output: - m.d.comb += lio.payload.timestamp.eq(compensated_timestamp) - - # cycle #3, read status - current_lane_writable = Signal() - m.d.comb += current_lane_writable.eq(Array(lio.writable for lio in self.output)[current_lane]) - m.d.comb += o_status_wait.eq(~current_lane_writable) - with m.If(self.cri.cmd == cri.commands["write"]): - m.d.sync += o_status_underflow.eq(0) - with m.If(do_underflow): - m.d.sync += o_status_underflow.eq(1) - m.d.sync += self.sequence_error.eq(do_sequence_error) - m.d.sync += self.sequence_error_channel.eq(self.cri.chan_sel[:16]) - - # current lane has been full, spread events by switching to the next. - if self.enable_spread: - current_lane_writable_r = Signal(reset=1) - m.d.sync += current_lane_writable_r.eq(current_lane_writable) - with m.If(~current_lane_writable_r & current_lane_writable): - m.d.sync += force_laneB.eq(1) - with m.If(do_write): - m.d.sync += force_laneB.eq(0) - - return m diff --git a/rtio/sed/layouts.py b/rtio/sed/layouts.py index c731e4c..8b13789 100644 --- a/rtio/sed/layouts.py +++ b/rtio/sed/layouts.py @@ -1,79 +1 @@ -from nmigen import * -from nmigen.utils import * -from nmigen.hdl.rec import * -from rtio import rtlink - - -def fifo_payload(channels): - address_width = max(rtlink.get_address_width(channel.interface.o) - for channel in channels) - data_width = max(rtlink.get_data_width(channel.interface.o) - for channel in channels) - - layout = [ - ("channel", bits_for(len(channels)-1)), - ("timestamp", 64) - ] - if address_width: - layout.append(("address", address_width)) - if data_width: - layout.append(("data", data_width)) - - return layout - - -def seqn_width(lane_count, fifo_depth): - # There must be a unique sequence number for every possible event in every FIFO. - # Plus 2 bits to detect and handle wraparounds. - return bits_for(lane_count*fifo_depth-1) + 2 - - -def fifo_ingress(seqn_width, layout_payload): - return [ - ("we", 1, DIR_FANOUT), - ("writable", 1, DIR_FANIN), - ("seqn", seqn_width, DIR_FANOUT), - ("payload", [(a, b, DIR_FANOUT) for a, b in layout_payload]) - ] - - -def fifo_egress(seqn_width, layout_payload): - return [ - ("re", 1, DIR_FANIN), - ("readable", 1, DIR_FANOUT), - ("seqn", seqn_width, DIR_FANOUT), - ("payload", [(a, b, DIR_FANOUT) for a, b in layout_payload]) - ] - - -# We use glbl_fine_ts_width in the output network so that collisions due -# to insufficiently increasing timestamps are always reliably detected. -# We can still have undetected collisions on the address by making it wrap -# around, but those are more rare and easier to debug, and addresses are -# not normally exposed directly to the ARTIQ user. -def output_network_payload(channels, glbl_fine_ts_width): - address_width = max(rtlink.get_address_width(channel.interface.o) - for channel in channels) - data_width = max(rtlink.get_data_width(channel.interface.o) - for channel in channels) - - layout = [("channel", bits_for(len(channels)-1))] - if glbl_fine_ts_width: - layout.append(("fine_ts", glbl_fine_ts_width)) - if address_width: - layout.append(("address", address_width)) - if data_width: - layout.append(("data", data_width)) - - return layout - - -def output_network_node(seqn_width, layout_payload): - return [ - ("valid", 1), - ("seqn", seqn_width), - ("replace_occured", 1), - ("nondata_replace_occured", 1), - ("payload", layout_payload) - ] diff --git a/rtio/sed/output_driver.py b/rtio/sed/output_driver.py index a234626..8b13789 100644 --- a/rtio/sed/output_driver.py +++ b/rtio/sed/output_driver.py @@ -1,105 +1 @@ -from functools import reduce -from operator import or_ -from nmigen import * - -from rtio.sed import layouts -from rtio.sed.output_network import OutputNetwork - -__all__ = ["OutputDriver"] - -class OutputDriver(Elaboratable): - def __init__(self, channels, glbl_fine_ts_width, lane_count, seqn_width): - self.channels = channels - self.glbl_fine_ts_width = glbl_fine_ts_width - self.lane_count = lane_count - self.seqn_width = seqn_width - self.collision = Signal() - self.collision_channel = Signal(range(len(channels)), reset_less=True) - self.busy = Signal() - self.busy_channel = Signal(range(len(channels)), reset_less=True) - self.input = None - - def elaborate(self, platform): - m = Module() - - # output network - layout_on_payload = layouts.output_network_payload(self.channels, self.glbl_fine_ts_width) - output_network = OutputNetwork(self.lane_count, self.seqn_width, layout_on_payload) - m.submodules.output_network = output_network - self.input = output_network.input - - # detect collisions (adds one pipeline stage) - layout_lane_data = [ - ("valid", 1), - ("collision", 1), - ("payload", layout_on_payload) - ] - lane_datas = [Record(layout_lane_data, reset_less=True) for _ in range(lane_count)] - en_replaces = [channel.interface.o.enable_replace for channel in channels] - for lane_data, on_output in zip(lane_datas, output_network.output): - lane_data.valid.reset_less = False - lane_data.collision.reset_less = False - replace_occurred_r = Signal() - nondata_replace_occurred_r = Signal() - m.d.sync += lane_data.valid.eq(on_output.valid) - m.d.sync += lane_data.payload.eq(on_output.payload) - m.d.sync += replace_occurred_r.eq(on_output.replace_occurred) - m.d.sync += nondata_replace_occurred_r.eq(on_output.nondata_replace_occurred) - - en_replaces_rom = Memory(1, len(en_replaces), init=en_replaces) - en_replaces_rom_rdport = en_replaces_rom.read_port() - m.submodules.en_replaces_rom_rdport = en_replaces_rom_rdport - m.d.comb += en_replaces_rom_rdport.addr.eq(on_output.payload.channel) - m.d.comb += lane_data.collision.eq(replace_occurred_r & (~en_replaces_rom_rdport.data | nondata_replace_occurred_r)) - - m.d.sync += self.collision.eq(0) - m.d.sync += self.collision_channel.eq(0) - for lane_data in lane_datas: - with m.If(lane_data.valid & lane_data.collision): - m.d.sync += self.collision.eq(1) - m.d.sync += self.collision_channel.eq(lane_data.payload.channel) - - # demultiplex channels (adds one pipeline stage) - for n, channel in enumerate(channels): - oif = channel.interface.o - - onehot_stb = [] - onehot_fine_ts = [] - onehot_address = [] - onehot_data = [] - for lane_data in lane_datas: - selected = Signal() - m.d.comb += selected.eq(lane_data.valid & ~lane_data.collision & (lane_data.payload.channel == n)) - onehot_stb.append(selected) - if hasattr(lane_data.payload, "fine_ts") and hasattr(oif, "fine_ts"): - ts_shift = len(lane_data.payload.fine_ts) - len(oif.fine_ts) - onehot_fine_ts.append(Mux(selected, lane_data.payload.fine_ts[ts_shift:], 0)) - if hasattr(lane_data.payload, "address"): - onehot_address.append(Mux(selected, lane_data.payload.address, 0)) - if hasattr(lane_data.payload, "data"): - onehot_data.append(Mux(selected, lane_data.payload.data, 0)) - - m.d.sync += oif.stb.eq(reduce(or_, onehot_stb)) - if hasattr(oif, "fine_ts"): - m.d.sync += oif.fine_ts.eq(reduce(or_, onehot_fine_ts)) - if hasattr(oif, "address"): - m.d.sync += oif.address.eq(reduce(or_, onehot_address)) - if hasattr(oif, "data"): - m.d.sync += oif.data.eq(reduce(or_, onehot_data)) - - # detect busy errors, at lane level to reduce muxing - m.d.sync += self.busy.eq(0) - m.d.sync += self.busy_channel.eq(0) - for lane_data in lane_datas: - stb_r = Signal() - channel_r = Signal(range(len(channels)), reset_less=True) - m.d.sync += stb_r.eq(lane_data.valid & ~lane_data.collision) - m.d.sync += channel_r.eq(lane_data.payload.channel) - with m.If(stb_r & Array(channel.interface.o.busy for channel in channels)[channel_r]): - m.d.sync += self.busy.eq(1) - m.d.sync += self.busy_channel.eq(channel_r) - - return m - -OutputDriver([], 1, 1, 1).elaborate(None) diff --git a/rtio/sed/output_network.py b/rtio/sed/output_network.py index 8c60624..8b13789 100644 --- a/rtio/sed/output_network.py +++ b/rtio/sed/output_network.py @@ -1,105 +1 @@ -from nmigen import * -from nmigen.utils import * -from rtio.sed import layouts - -__all__ = ["latency", "OutputNetwork"] - -# Based on: https://github.com/Bekbolatov/SortingNetworks/blob/master/src/main/js/gr.js -def boms_get_partner(n, l, p): - if p == 1: - return n ^ (1 << (l - 1)) - scale = 1 << (l - p) - box = 1 << p - sn = n//scale - n//scale//box*box - if sn == 0 or sn == (box - 1): - return n - if (sn % 2) == 0: - return n - scale - return n + scale - -def boms_steps_pairs(lane_count): - d = log2_int(lane_count) - steps = [] - for l in range(1, d+1): - for p in range(1, l+1): - pairs = [] - for n in range(2**d): - partner = boms_get_partner(n, l, p) - if partner != n: - if partner > n: - pair = (n, partner) - else: - pair = (partner, n) - if pair not in pairs: - pairs.append(pair) - steps.append(pairs) - return steps - -def latency(lane_count): - d = log2_int(lane_count) - return sum(l for l in range(1, d+1)) - -def cmp_wrap(a, b): - return Mux((a[-2] == a[-1]) & (b[-2] == b[-1]) & (a[-1] != b[-1]), a[-1], a < b) - -class OutputNetwork(Elaboratable): - def __init__(self, lane_count, seqn_width, layout_payload): - self.lane_count = lane_count - self.seqn_width = seqn_width - self.layout_payload = layout_payload - self.input = [Record(layouts.output_network_node(seqn_width, layout_payload)) - for _ in range(lane_count)] - self.output = None - - def elaborate(self, platform): - m = Module() - - step_input = self.input - for step in boms_steps_pairs(self.lane_count): - step_output = [] - for i in range(lane_count): - rec = Record(layouts.output_network_node(seqn_width, layout_payload), - reset_less=True) - rec.valid.reset_less = False - step_output.append(rec) - - for node1, node2 in step: - nondata_difference = Signal() - for field, _ in layout_payload: - if field != "data": - f1 = getattr(step_input[node1].payload, field) - f2 = getattr(step_input[node2].payload, field) - with m.If(f1 != f2): - m.d.comb += nondata_difference.eq(1) - - k1 = Cat(step_input[node1].payload.channel, ~step_input[node1].valid) - k2 = Cat(step_input[node2].payload.channel, ~step_input[node2].valid) - with m.If(k1 == k2): - with m.If(cmp_wrap(step_input[node1].seqn, step_input[node2].seqn)): - m.d.sync += step_output[node1].eq(step_input[node2]) - m.d.sync += step_output[node2].eq(step_input[node1]) - with m.Else(): - m.d.sync += step_output[node1].eq(step_input[node1]) - m.d.sync += step_output[node2].eq(step_input[node2]) - m.d.sync += step_output[node1].replace_occurred.eq(1) - m.d.sync += step_output[node1].nondata_replace_occurred.eq(nondata_difference), - m.d.sync += step_output[node2].valid.eq(0) - with m.Elif(k1 < k2): - m.d.sync += step_output[node1].eq(step_input[node1]) - m.d.sync += step_output[node2].eq(step_input[node2]) - with m.Else(): - m.d.sync += step_output[node1].eq(step_input[node2]) - m.d.sync += step_output[node2].eq(step_input[node1]) - - unchanged = list(range(lane_count)) - for node1, node2 in step: - unchanged.remove(node1) - unchanged.remove(node2) - for node in unchanged: - m.d.sync += step_output[node].eq(step_input[node]) - - self.output = step_output - step_input = step_output - - return m