From 0a687b7902c4fb0f27ce2c7fa63bc0b7bde9cf92 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 1 Apr 2017 12:18:00 +0800 Subject: [PATCH] drtio: report satellite errors through firmware --- artiq/firmware/runtime/rtio_mgt.rs | 46 +++++++------------- artiq/firmware/satman/lib.rs | 26 ++++++++++- artiq/gateware/drtio/core.py | 8 +++- artiq/gateware/drtio/rt_controller_master.py | 35 ++++++++------- artiq/gateware/drtio/rt_errors_satellite.py | 36 +++++++++++++++ artiq/gateware/drtio/rt_ios_satellite.py | 17 +++++--- artiq/gateware/drtio/rt_packet_master.py | 35 ++++++--------- artiq/gateware/drtio/rt_packet_satellite.py | 42 +++--------------- artiq/gateware/drtio/rt_serializer.py | 17 +------- artiq/gateware/test/drtio/test_full_stack.py | 19 ++++---- 10 files changed, 143 insertions(+), 138 deletions(-) create mode 100644 artiq/gateware/drtio/rt_errors_satellite.py diff --git a/artiq/firmware/runtime/rtio_mgt.rs b/artiq/firmware/runtime/rtio_mgt.rs index 607a03a8a..74a7c52d6 100644 --- a/artiq/firmware/runtime/rtio_mgt.rs +++ b/artiq/firmware/runtime/rtio_mgt.rs @@ -144,37 +144,23 @@ pub mod drtio { } } - // keep this in sync with error_codes in rt_packets.py - fn str_packet_error(err_code: u8) -> &'static str { - match err_code { - 0 => "Received packet of an unknown type", - 1 => "Satellite reported reception of a packet of an unknown type", - 2 => "Received truncated packet", - 3 => "Satellite reported reception of a truncated packet", - 4 => "Satellite reported write overflow", - 5 => "Satellite reported write underflow", - _ => "Unknown error code" - } - } - - fn poll_errors() -> bool { - unsafe { - if csr::drtio::packet_err_present_read() != 0 { - let err_code = csr::drtio::packet_err_code_read(); - error!("packet error {} ({})", err_code, str_packet_error(err_code)); - csr::drtio::packet_err_present_write(1) - } - if csr::drtio::o_fifo_space_timeout_read() != 0 { - error!("timeout attempting to get remote FIFO space"); - csr::drtio::o_fifo_space_timeout_write(1) - } - } - false - } - pub fn error_thread(io: Io) { - // HACK - io.until(poll_errors).unwrap(); + loop { + unsafe { + io.until(|| csr::drtio::protocol_error_read() != 0).unwrap(); + let errors = csr::drtio::protocol_error_read(); + if errors & 1 != 0 { + error!("received packet of an unknown type"); + } + if errors & 2 != 0 { + error!("received truncated packet"); + } + if errors & 4 != 0 { + error!("timeout attempting to get remote FIFO space"); + } + csr::drtio::protocol_error_write(errors); + } + } } } diff --git a/artiq/firmware/satman/lib.rs b/artiq/firmware/satman/lib.rs index 80c8b7607..fe6633067 100644 --- a/artiq/firmware/satman/lib.rs +++ b/artiq/firmware/satman/lib.rs @@ -69,6 +69,27 @@ fn process_aux_packets() { } +fn process_errors() { + let errors; + unsafe { + errors = board::csr::drtio::protocol_error_read(); + board::csr::drtio::protocol_error_write(errors); + } + if errors & 1 != 0 { + error!("received packet of an unknown type"); + } + if errors & 2 != 0 { + error!("received truncated packet"); + } + if errors & 4 != 0 { + error!("write underflow"); + } + if errors & 8 != 0 { + error!("write overflow"); + } +} + + #[cfg(rtio_frequency = "62.5")] const SI5324_SETTINGS: board::si5324::FrequencySettings = board::si5324::FrequencySettings { @@ -111,10 +132,13 @@ fn startup() { board::si5324::setup(&SI5324_SETTINGS).expect("cannot initialize si5324"); loop { - while !drtio_link_is_up() {} + while !drtio_link_is_up() { + process_errors(); + } info!("link is up, switching to recovered clock"); board::si5324::select_ext_input(true).expect("failed to switch clocks"); while drtio_link_is_up() { + process_errors(); process_aux_packets(); } info!("link is down, switching to local crystal clock"); diff --git a/artiq/gateware/drtio/core.py b/artiq/gateware/drtio/core.py index 23e226247..2c063a79d 100644 --- a/artiq/gateware/drtio/core.py +++ b/artiq/gateware/drtio/core.py @@ -4,7 +4,8 @@ from migen import * from migen.genlib.cdc import ElasticBuffer from artiq.gateware.drtio import (link_layer, aux_controller, - rt_packet_satellite, rt_ios_satellite, + rt_packet_satellite, rt_ios_satellite, + rt_errors_satellite, rt_packet_master, rt_controller_master) @@ -63,6 +64,9 @@ class DRTIOSatellite(Module): self.submodules.ios = rt_ios_satellite.IOS( self.rt_packet, channels, fine_ts_width, full_ts_width) + self.submodules.rt_errors = rt_errors_satellite.RTErrorsSatellite( + self.rt_packet, self.ios) + self.clock_domains.cd_rio = ClockDomain() self.clock_domains.cd_rio_phy = ClockDomain() self.comb += [ @@ -77,7 +81,7 @@ class DRTIOSatellite(Module): def get_csrs(self): return (self.link_layer.get_csrs() + self.link_stats.get_csrs() + - self.aux_controller.get_csrs()) + self.rt_errors.get_csrs() + self.aux_controller.get_csrs()) class DRTIOMaster(Module): diff --git a/artiq/gateware/drtio/rt_controller_master.py b/artiq/gateware/drtio/rt_controller_master.py index 596ec749e..bd533cef6 100644 --- a/artiq/gateware/drtio/rt_controller_master.py +++ b/artiq/gateware/drtio/rt_controller_master.py @@ -13,6 +13,8 @@ from artiq.gateware.rtio import cri class _CSRs(AutoCSR): def __init__(self): + self.protocol_error = CSR(3) + self.chan_sel_override = CSRStorage(16) self.chan_sel_override_en = CSRStorage() @@ -29,7 +31,6 @@ class _CSRs(AutoCSR): self.o_dbg_fifo_space_req_cnt = CSRStatus(32) self.o_reset_channel_status = CSR() self.o_wait = CSRStatus() - self.o_fifo_space_timeout = CSR() class RTController(Module): @@ -38,6 +39,24 @@ class RTController(Module): self.cri = cri.Interface() self.comb += self.cri.arb_gnt.eq(1) + # protocol errors + err_unknown_packet_type = Signal() + err_packet_truncated = Signal() + signal_fifo_space_timeout = Signal() + err_fifo_space_timeout = Signal() + self.sync.sys_with_rst += [ + If(self.csrs.protocol_error.re, + If(self.csrs.protocol_error.r[0], err_unknown_packet_type.eq(0)), + If(self.csrs.protocol_error.r[1], err_packet_truncated.eq(0)), + If(self.csrs.protocol_error.r[2], err_fifo_space_timeout.eq(0)) + ), + If(rt_packet.err_unknown_packet_type, err_unknown_packet_type.eq(1)), + If(rt_packet.err_packet_truncated, err_packet_truncated.eq(1)), + If(signal_fifo_space_timeout, err_fifo_space_timeout.eq(1)) + ] + self.comb += self.csrs.protocol_error.w.eq( + Cat(err_unknown_packet_type, err_packet_truncated, err_fifo_space_timeout)) + # channel selection chan_sel = Signal(16) self.comb += chan_sel.eq( @@ -135,11 +154,6 @@ class RTController(Module): If(o_sequence_error_set, o_status_sequence_error.eq(1)) ] - signal_fifo_space_timeout = Signal() - self.sync.sys_with_rst += [ - If(self.csrs.o_fifo_space_timeout.re, self.csrs.o_fifo_space_timeout.w.eq(0)), - If(signal_fifo_space_timeout, self.csrs.o_fifo_space_timeout.w.eq(1)) - ] timeout_counter = WaitTimer(8191) self.submodules += timeout_counter @@ -273,9 +287,6 @@ class RTManager(Module, AutoCSR): def __init__(self, rt_packet): self.request_echo = CSR() - self.packet_err_present = CSR() - self.packet_err_code = CSRStatus(8) - self.update_packet_cnt = CSR() self.packet_cnt_tx = CSRStatus(32) self.packet_cnt_rx = CSRStatus(32) @@ -288,12 +299,6 @@ class RTManager(Module, AutoCSR): If(self.request_echo.re, rt_packet.echo_stb.eq(1)) ] - self.comb += [ - self.packet_err_present.w.eq(rt_packet.error_not), - rt_packet.error_not_ack.eq(self.packet_err_present.re), - self.packet_err_code.status.eq(rt_packet.error_code) - ] - self.sync += \ If(self.update_packet_cnt.re, self.packet_cnt_tx.status.eq(rt_packet.packet_cnt_tx), diff --git a/artiq/gateware/drtio/rt_errors_satellite.py b/artiq/gateware/drtio/rt_errors_satellite.py new file mode 100644 index 000000000..36b3e1365 --- /dev/null +++ b/artiq/gateware/drtio/rt_errors_satellite.py @@ -0,0 +1,36 @@ +"""Protocol error reporting for satellites.""" + +from migen import * +from migen.genlib.cdc import PulseSynchronizer +from misoc.interconnect.csr import * + + +class RTErrorsSatellite(Module, AutoCSR): + def __init__(self, rt_packet, ios): + self.protocol_error = CSR(4) + + # The master is normally responsible for avoiding output overflows and + # output underflows. + # Error reports here are only for diagnosing internal ARTIQ bugs. + + unknown_packet_type = Signal() + packet_truncated = Signal() + write_overflow = Signal() + write_underflow = Signal() + self.comb += self.protocol_error.w.eq( + Cat(unknown_packet_type, packet_truncated, + write_underflow, write_overflow)) + + for n, (target, source) in enumerate([ + (unknown_packet_type, rt_packet.unknown_packet_type), + (packet_truncated, rt_packet.packet_truncated), + (write_underflow, ios.write_underflow), + (write_overflow, ios.write_overflow)]): + ps = PulseSynchronizer("rtio", "sys") + self.submodules += ps + self.comb += ps.i.eq(source) + self.sync += [ + If(self.protocol_error.re & self.protocol_error.r[n], target.eq(0)), + If(ps.o, target.eq(1)) + ] + diff --git a/artiq/gateware/drtio/rt_ios_satellite.py b/artiq/gateware/drtio/rt_ios_satellite.py index 55a0652e1..a1a94746e 100644 --- a/artiq/gateware/drtio/rt_ios_satellite.py +++ b/artiq/gateware/drtio/rt_ios_satellite.py @@ -9,6 +9,9 @@ from artiq.gateware.rtio import rtlink class IOS(Module): def __init__(self, rt_packet, channels, max_fine_ts_width, full_ts_width): + self.write_underflow = Signal() + self.write_overflow = Signal() + self.rt_packet = rt_packet self.max_fine_ts_width = max_fine_ts_width @@ -21,6 +24,10 @@ class IOS(Module): ) self.comb += rt_packet.tsc_input.eq(self.tsc) + self.sync.rio += [ + self.write_underflow.eq(0), + self.write_overflow.eq(0) + ] for n, channel in enumerate(channels): self.add_output(n, channel) self.add_input(n, channel) @@ -70,15 +77,11 @@ class IOS(Module): self.comb += fifo.we.eq(rt_packet.write_stb & (rt_packet.write_channel == n)) self.sync.rio += [ - If(rt_packet.write_overflow_ack, - rt_packet.write_overflow.eq(0)), - If(rt_packet.write_underflow_ack, - rt_packet.write_underflow.eq(0)), If(fifo.we, - If(~fifo.writable, rt_packet.write_overflow.eq(1)), If(rt_packet.write_timestamp[max_fine_ts_width:] < (tsc_comp + 4), - rt_packet.write_underflow.eq(1) - ) + self.write_underflow.eq(1) + ), + If(~fifo.writable, self.write_overflow.eq(1)) ) ] if data_width: diff --git a/artiq/gateware/drtio/rt_packet_master.py b/artiq/gateware/drtio/rt_packet_master.py index b07ce23a1..bec036161 100644 --- a/artiq/gateware/drtio/rt_packet_master.py +++ b/artiq/gateware/drtio/rt_packet_master.py @@ -116,10 +116,9 @@ class RTPacketMaster(Module): self.reset_ack = Signal() self.reset_phy = Signal() - # errors - self.error_not = Signal() - self.error_not_ack = Signal() - self.error_code = Signal(8) + # rx errors + self.err_unknown_packet_type = Signal() + self.err_packet_truncated = Signal() # packet counters self.packet_cnt_tx = Signal(32) @@ -235,12 +234,6 @@ class RTPacketMaster(Module): self.echo_stb, self.echo_ack, None, echo_stb, echo_ack, None) - error_not = Signal() - error_code = Signal(8) - self.submodules += _CrossDomainNotification("rtio_rx", - error_not, error_code, - self.error_not, self.error_not_ack, self.error_code) - read_not = Signal() read_no_event = Signal() read_is_overflow = Signal() @@ -259,6 +252,14 @@ class RTPacketMaster(Module): read_timestamp.eq(rx_dp.packet_as["read_reply"].timestamp) ] + err_unknown_packet_type = PulseSynchronizer("rtio_rx", "sys") + err_packet_truncated = PulseSynchronizer("rtio_rx", "sys") + self.submodules += err_unknown_packet_type, err_packet_truncated + self.comb += [ + self.err_unknown_packet_type.eq(err_unknown_packet_type.o), + self.err_packet_truncated.eq(err_packet_truncated.o) + ] + # TX FSM tx_fsm = ClockDomainsRenamer("rtio")(FSM(reset_state="IDLE")) self.submodules += tx_fsm @@ -367,30 +368,20 @@ class RTPacketMaster(Module): rx_dp.packet_buffer_load.eq(1), If(rx_dp.packet_last, Case(rx_dp.packet_type, { - rx_plm.types["error"]: NextState("ERROR"), rx_plm.types["echo_reply"]: echo_received_now.eq(1), rx_plm.types["fifo_space_reply"]: NextState("FIFO_SPACE"), rx_plm.types["read_reply"]: NextState("READ_REPLY"), rx_plm.types["read_reply_noevent"]: NextState("READ_REPLY_NOEVENT"), - "default": [ - error_not.eq(1), - error_code.eq(error_codes["unknown_type_local"]) - ] + "default": err_unknown_packet_type.i.eq(1) }) ).Else( ongoing_packet_next.eq(1) ) ), If(~rx_dp.frame_r & ongoing_packet, - error_not.eq(1), - error_code.eq(error_codes["truncated_local"]) + err_packet_truncated.i.eq(1) ) ) - rx_fsm.act("ERROR", - error_not.eq(1), - error_code.eq(rx_dp.packet_as["error"].code), - NextState("INPUT") - ) rx_fsm.act("FIFO_SPACE", fifo_space_not.eq(1), fifo_space.eq(rx_dp.packet_as["fifo_space_reply"].space), diff --git a/artiq/gateware/drtio/rt_packet_satellite.py b/artiq/gateware/drtio/rt_packet_satellite.py index 9faeeea65..c490ca34b 100644 --- a/artiq/gateware/drtio/rt_packet_satellite.py +++ b/artiq/gateware/drtio/rt_packet_satellite.py @@ -8,6 +8,9 @@ from artiq.gateware.drtio.rt_serializer import * class RTPacketSatellite(Module): def __init__(self, link_layer): + self.unknown_packet_type = Signal() + self.packet_truncated = Signal() + self.tsc_load = Signal() self.tsc_load_value = Signal(64) self.tsc_input = Signal(64) @@ -25,9 +28,7 @@ class RTPacketSatellite(Module): self.write_address = Signal(16) self.write_data = Signal(512) self.write_overflow = Signal() - self.write_overflow_ack = Signal() self.write_underflow = Signal() - self.write_underflow_ack = Signal() self.read_channel = Signal(16) self.read_readable = Signal() @@ -68,19 +69,13 @@ class RTPacketSatellite(Module): # RX->TX echo_req = Signal() - err_set = Signal() - err_req = Signal() - err_ack = Signal() fifo_space_set = Signal() fifo_space_req = Signal() fifo_space_ack = Signal() self.sync += [ - If(err_ack, err_req.eq(0)), - If(err_set, err_req.eq(1)), If(fifo_space_ack, fifo_space_req.eq(0)), If(fifo_space_set, fifo_space_req.eq(1)), ] - err_code = Signal(max=len(error_codes)+1) # RX FSM self.comb += [ @@ -145,16 +140,13 @@ class RTPacketSatellite(Module): rx_plm.types["write"]: NextState("WRITE"), rx_plm.types["fifo_space_request"]: NextState("FIFO_SPACE"), rx_plm.types["read_request"]: NextState("READ_REQUEST"), - "default": [ - err_set.eq(1), - NextValue(err_code, error_codes["unknown_type_remote"])] + "default": self.unknown_packet_type.eq(1) }) ).Else( ongoing_packet_next.eq(1) ), If(~rx_dp.frame_r & ongoing_packet, - err_set.eq(1), - NextValue(err_code, error_codes["truncated_remote"]) + self.packet_truncated.eq(1) ) ) ) @@ -178,8 +170,7 @@ class RTPacketSatellite(Module): ).Else( write_data_buffer_load.eq(1), If(~rx_dp.frame_r, - err_set.eq(1), - NextValue(err_code, error_codes["truncated_remote"]), + self.packet_truncated.eq(1), NextState("INPUT") ) ) @@ -202,14 +193,11 @@ class RTPacketSatellite(Module): tx_fsm.act("IDLE", If(echo_req, NextState("ECHO")), If(fifo_space_req, NextState("FIFO_SPACE")), - If(self.write_overflow, NextState("ERROR_WRITE_OVERFLOW")), - If(self.write_underflow, NextState("ERROR_WRITE_UNDERFLOW")), If(~read_request_wait & read_request_pending, If(read_request_timeout, NextState("READ_TIMEOUT")), If(self.read_overflow, NextState("READ_OVERFLOW")), If(self.read_readable, NextState("READ")) - ), - If(err_req, NextState("ERROR")) + ) ) tx_fsm.act("ECHO", @@ -222,16 +210,6 @@ class RTPacketSatellite(Module): tx_dp.send("fifo_space_reply", space=self.fifo_space), If(tx_dp.packet_last, NextState("IDLE")) ) - tx_fsm.act("ERROR_WRITE_OVERFLOW", - self.write_overflow_ack.eq(1), - tx_dp.send("error", code=error_codes["write_overflow"]), - If(tx_dp.packet_last, NextState("IDLE")) - ) - tx_fsm.act("ERROR_WRITE_UNDERFLOW", - self.write_underflow_ack.eq(1), - tx_dp.send("error", code=error_codes["write_underflow"]), - If(tx_dp.packet_last, NextState("IDLE")) - ) tx_fsm.act("READ_TIMEOUT", tx_dp.send("read_reply_noevent", overflow=0), @@ -256,9 +234,3 @@ class RTPacketSatellite(Module): NextState("IDLE") ) ) - - tx_fsm.act("ERROR", - err_ack.eq(1), - tx_dp.send("error", code=err_code), - If(tx_dp.packet_last, NextState("IDLE")) - ) diff --git a/artiq/gateware/drtio/rt_serializer.py b/artiq/gateware/drtio/rt_serializer.py index 6b61bd239..df1e2e5c6 100644 --- a/artiq/gateware/drtio/rt_serializer.py +++ b/artiq/gateware/drtio/rt_serializer.py @@ -4,7 +4,7 @@ from types import SimpleNamespace from migen import * -__all__ = ["ReceiveDatapath", "TransmitDatapath", "error_codes", +__all__ = ["ReceiveDatapath", "TransmitDatapath", "get_m2s_layouts", "get_s2m_layouts"] @@ -64,7 +64,6 @@ def get_m2s_layouts(alignment): def get_s2m_layouts(alignment): plm = PacketLayoutManager(alignment) - plm.add_type("error", ("code", 8)) plm.add_type("echo_reply") plm.add_type("fifo_space_reply", ("space", 16)) @@ -75,20 +74,6 @@ def get_s2m_layouts(alignment): return plm -# keep this in sync with str_packet_error in rtio_mgt.rs -error_codes = { - "unknown_type_local": 0, - "unknown_type_remote": 1, - "truncated_local": 2, - "truncated_remote": 3, - # The transmitter is normally responsible for avoiding - # overflows and underflows. Those error reports are only - # for diagnosing internal ARTIQ bugs. - "write_overflow": 4, - "write_underflow": 5 -} - - class ReceiveDatapath(Module): def __init__(self, frame, data, plm): ws = len(data) diff --git a/artiq/gateware/test/drtio/test_full_stack.py b/artiq/gateware/test/drtio/test_full_stack.py index e3c08d630..66bbd4982 100644 --- a/artiq/gateware/test/drtio/test_full_stack.py +++ b/artiq/gateware/test/drtio/test_full_stack.py @@ -78,6 +78,7 @@ class TestFullStack(unittest.TestCase): kcsrs = dut.master_ki csrs = dut.master.rt_controller.csrs mgr = dut.master.rt_manager + saterr = dut.satellite.rt_errors ttl_changes = [] correct_ttl_changes = [ @@ -176,24 +177,22 @@ class TestFullStack(unittest.TestCase): self.assertGreater(max_wlen, 5) def test_tsc_error(): - err_present = yield from mgr.packet_err_present.read() - self.assertEqual(err_present, 0) + errors = yield from saterr.protocol_error.read() + self.assertEqual(errors, 0) yield from csrs.tsc_correction.write(100000000) yield from csrs.set_time.write(1) for i in range(15): yield delay(10000*8) yield from write(0, 1) - for i in range(10): + for i in range(12): yield - err_present = yield from mgr.packet_err_present.read() - err_code = yield from mgr.packet_err_code.read() - self.assertEqual(err_present, 1) - self.assertEqual(err_code, rt_serializer.error_codes["write_underflow"]) - yield from mgr.packet_err_present.write(1) + errors = yield from saterr.protocol_error.read() + self.assertEqual(errors, 4) # write underflow + yield from saterr.protocol_error.write(errors) yield - err_present = yield from mgr.packet_err_present.read() - self.assertEqual(err_present, 0) + errors = yield from saterr.protocol_error.read() + self.assertEqual(errors, 0) def wait_ttl_events(): while len(ttl_changes) < len(correct_ttl_changes):