From edf403b837258ce42ed91886579ac82d85629e9f Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Wed, 12 Sep 2018 15:44:34 +0800 Subject: [PATCH] drtio: improve error reporting --- artiq/firmware/satman/main.rs | 6 ++- artiq/firmware/satman/repeater.rs | 39 +++++++++++++++++-- artiq/gateware/drtio/core.py | 2 +- .../gateware/drtio/rt_controller_repeater.py | 25 +++++++++--- artiq/gateware/drtio/rt_errors_satellite.py | 7 +++- artiq/gateware/drtio/rt_packet_repeater.py | 6 +-- artiq/gateware/drtio/rt_packet_satellite.py | 2 + 7 files changed, 71 insertions(+), 16 deletions(-) diff --git a/artiq/firmware/satman/main.rs b/artiq/firmware/satman/main.rs index 12bb9d945..56345ae7a 100644 --- a/artiq/firmware/satman/main.rs +++ b/artiq/firmware/satman/main.rs @@ -260,7 +260,11 @@ fn drtiosat_process_errors() { error!("received truncated packet"); } if errors & 4 != 0 { - error!("timeout attempting to get buffer space from CRI") + let destination; + unsafe { + destination = csr::drtiosat::buffer_space_timeout_dest_read(); + } + error!("timeout attempting to get buffer space from CRI, destination=0x{:02x}", destination) } if errors & 8 != 0 { let channel; diff --git a/artiq/firmware/satman/repeater.rs b/artiq/firmware/satman/repeater.rs index c58d4b846..6f5b7b28f 100644 --- a/artiq/firmware/satman/repeater.rs +++ b/artiq/firmware/satman/repeater.rs @@ -2,10 +2,10 @@ use board_misoc::{csr, clock}; use board_artiq::{drtioaux, drtio_routing}; #[cfg(has_drtio_routing)] -fn rep_link_rx_up(linkno: u8) -> bool { - let linkno = linkno as usize; +fn rep_link_rx_up(repno: u8) -> bool { + let repno = repno as usize; unsafe { - (csr::DRTIOREP[linkno].rx_up_read)() == 1 + (csr::DRTIOREP[repno].rx_up_read)() == 1 } } @@ -40,6 +40,8 @@ impl Repeater { } pub fn service(&mut self, routing_table: &drtio_routing::RoutingTable, rank: u8) { + self.process_errors(); + match self.state { RepeaterState::Down => { if rep_link_rx_up(self.repno) { @@ -109,6 +111,37 @@ impl Repeater { } } + fn process_errors(&self) { + let repno = self.repno as usize; + let errors; + unsafe { + errors = (csr::DRTIOREP[repno].protocol_error_read)(); + } + if errors & 1 != 0 { + error!("[REP#{}] received packet of an unknown type", repno); + } + if errors & 2 != 0 { + error!("[REP#{}] received truncated packet", repno); + } + if errors & 4 != 0 { + let chan_sel; + unsafe { + chan_sel = (csr::DRTIOREP[repno].command_missed_chan_sel_read)(); + } + error!("[REP#{}] CRI command missed, chan_sel=0x{:06x}", repno, chan_sel) + } + if errors & 8 != 0 { + let destination; + unsafe { + destination = (csr::DRTIOREP[repno].buffer_space_timeout_dest_read)(); + } + error!("[REP#{}] timeout attempting to get remote buffer space, destination=0x{:02x}", repno, destination); + } + unsafe { + (csr::DRTIOREP[repno].protocol_error_write)(errors); + } + } + fn recv_aux_timeout(&self, timeout: u32) -> Result { let max_time = clock::get_ms() + timeout as u64; loop { diff --git a/artiq/gateware/drtio/core.py b/artiq/gateware/drtio/core.py index 52e2bb948..55176b1ce 100644 --- a/artiq/gateware/drtio/core.py +++ b/artiq/gateware/drtio/core.py @@ -143,7 +143,7 @@ class DRTIOSatellite(Module): ] self.submodules.rt_errors = rt_errors_satellite.RTErrorsSatellite( - self.rt_packet, tsc, self.cri, self.async_errors) + self.rt_packet, tsc, self.async_errors) def get_csrs(self): return ([self.reset, self.reset_phy, self.tsc_loaded] + diff --git a/artiq/gateware/drtio/rt_controller_repeater.py b/artiq/gateware/drtio/rt_controller_repeater.py index d75e87f64..655bf641e 100644 --- a/artiq/gateware/drtio/rt_controller_repeater.py +++ b/artiq/gateware/drtio/rt_controller_repeater.py @@ -10,6 +10,8 @@ class RTController(Module, AutoCSR): def __init__(self, rt_packet): self.set_time = CSR() self.protocol_error = CSR(4) + self.command_missed_chan_sel = CSRStatus(24) + self.buffer_space_timeout_dest = CSRStatus(8) set_time_stb = Signal() set_time_ack = Signal() @@ -23,14 +25,21 @@ class RTController(Module, AutoCSR): self.comb += self.set_time.w.eq(set_time_stb) errors = [ - (rt_packet.err_unknown_packet_type, "rtio_rx"), - (rt_packet.err_packet_truncated, "rtio_rx"), - (rt_packet.err_command_missed, "rtio"), - (rt_packet.err_buffer_space_timeout, "rtio") + (rt_packet.err_unknown_packet_type, "rtio_rx", None, None), + (rt_packet.err_packet_truncated, "rtio_rx", None, None), + (rt_packet.err_command_missed, "rtio", + rt_packet.cri.chan_sel, self.command_missed_chan_sel.status), + (rt_packet.err_buffer_space_timeout, "rtio", + rt_packet.buffer_space_destination, self.buffer_space_timeout_dest.status) ] - for n, (err_i, err_cd) in enumerate(errors): - xfer = BlindTransfer(err_cd, "sys") + for n, (err_i, err_cd, din, dout) in enumerate(errors): + if din is not None: + data_width = len(din) + else: + data_width = 0 + + xfer = BlindTransfer(err_cd, "sys", data_width=data_width) self.submodules += xfer self.comb += xfer.i.eq(err_i) @@ -41,3 +50,7 @@ class RTController(Module, AutoCSR): If(xfer.o, err_pending.eq(1)) ] self.comb += self.protocol_error.w[n].eq(err_pending) + + if din is not None: + self.comb += xfer.data_i.eq(din) + self.sync += If(xfer.o & ~err_pending, dout.eq(xfer.data_o)) diff --git a/artiq/gateware/drtio/rt_errors_satellite.py b/artiq/gateware/drtio/rt_errors_satellite.py index 2bf190a0f..1d857654c 100644 --- a/artiq/gateware/drtio/rt_errors_satellite.py +++ b/artiq/gateware/drtio/rt_errors_satellite.py @@ -7,11 +7,12 @@ from artiq.gateware.rtio.cdc import BlindTransfer class RTErrorsSatellite(Module, AutoCSR): - def __init__(self, rt_packet, tsc, cri, async_errors): + def __init__(self, rt_packet, tsc, async_errors): self.protocol_error = CSR(5) self.underflow_channel = CSRStatus(16) self.underflow_timestamp_event = CSRStatus(64) self.underflow_timestamp_counter = CSRStatus(64) + self.buffer_space_timeout_dest = CSRStatus(8) self.rtio_error = CSR(3) self.sequence_error_channel = CSRStatus(16) @@ -47,6 +48,7 @@ class RTErrorsSatellite(Module, AutoCSR): self.comb += xfer.data_i.eq(din) self.sync += If(xfer.o & ~pending, dout.eq(xfer.data_o)) + cri = rt_packet.cri # The master is normally responsible for avoiding output overflows # and output underflows. The error reports here are only for diagnosing @@ -68,7 +70,8 @@ class RTErrorsSatellite(Module, AutoCSR): error_csr(self.protocol_error, (rt_packet.unknown_packet_type, False, None, None), (rt_packet.packet_truncated, False, None, None), - (rt_packet.buffer_space_timeout, False, None, None), + (rt_packet.buffer_space_timeout, False, + cri.chan_sel[16:], self.buffer_space_timeout_dest.status), (underflow, True, underflow_error_cri, underflow_error_csr), (overflow, True, None, None) ) diff --git a/artiq/gateware/drtio/rt_packet_repeater.py b/artiq/gateware/drtio/rt_packet_repeater.py index 1798b6da8..0ecd353a8 100644 --- a/artiq/gateware/drtio/rt_packet_repeater.py +++ b/artiq/gateware/drtio/rt_packet_repeater.py @@ -20,6 +20,7 @@ class RTPacketRepeater(Module): # in rtio domain self.err_command_missed = Signal() self.err_buffer_space_timeout = Signal() + self.buffer_space_destination = Signal(8) # set_time interface, in rtio domain self.set_time_stb = Signal() @@ -85,9 +86,8 @@ class RTPacketRepeater(Module): ) # Buffer space - buffer_space_destination = Signal(8) self.sync.rtio += If(self.cri.cmd == cri.commands["get_buffer_space"], - buffer_space_destination.eq(self.cri.chan_sel[16:])) + self.buffer_space_destination.eq(self.cri.chan_sel[16:])) rx_buffer_space_not = Signal() rx_buffer_space = Signal(16) @@ -153,7 +153,7 @@ class RTPacketRepeater(Module): ) ) tx_fsm.act("BUFFER_SPACE", - tx_dp.send("buffer_space_request", destination=buffer_space_destination), + tx_dp.send("buffer_space_request", destination=self.buffer_space_destination), If(tx_dp.packet_last, buffer_space_not_ack.eq(1), NextState("WAIT_BUFFER_SPACE") diff --git a/artiq/gateware/drtio/rt_packet_satellite.py b/artiq/gateware/drtio/rt_packet_satellite.py index 4aba55dcb..49ea2c3d0 100644 --- a/artiq/gateware/drtio/rt_packet_satellite.py +++ b/artiq/gateware/drtio/rt_packet_satellite.py @@ -144,6 +144,7 @@ class RTPacketSatellite(Module): NextState("INPUT") ) + # CRI mux defaults to write information rx_fsm.act("WRITE", If(write_data_buffer_cnt == rx_dp.packet_as["write"].extra_data_cnt, NextState("WRITE_CMD") @@ -170,6 +171,7 @@ class RTPacketSatellite(Module): NextState("BUFFER_SPACE") ) rx_fsm.act("BUFFER_SPACE", + cri_buffer_space.eq(1), timeout_counter.wait.eq(1), If(timeout_counter.done, self.buffer_space_timeout.eq(1),