From 5437f0e3e36cabce5757b343db7d3b2a5bb5fbde Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 29 Sep 2017 14:40:06 +0800 Subject: [PATCH] rtio: make sequence errors consistently asychronous --- artiq/coredevice/__init__.py | 4 +- artiq/coredevice/comm_analyzer.py | 2 +- artiq/coredevice/exceptions.py | 13 +++--- artiq/firmware/ksupport/lib.rs | 19 +++----- artiq/firmware/ksupport/rtio.rs | 6 --- artiq/firmware/runtime/rtio_mgt.rs | 3 ++ artiq/gateware/drtio/rt_errors_satellite.py | 6 +-- artiq/gateware/rtio/analyzer.py | 4 -- artiq/gateware/rtio/core.py | 14 +++--- artiq/gateware/rtio/cri.py | 6 +-- artiq/gateware/rtio/dma.py | 45 ++++++++----------- artiq/gateware/rtio/sed/core.py | 16 +++++++ artiq/gateware/rtio/sed/lane_distributor.py | 14 +++--- artiq/gateware/test/drtio/test_full_stack.py | 2 - .../test/rtio/test_sed_lane_distributor.py | 2 +- artiq/gateware/test/rtio/test_sed_top.py | 2 +- artiq/test/coredevice/test_rtio.py | 28 +++++------- doc/manual/rtio.rst | 16 +++++++ 18 files changed, 102 insertions(+), 100 deletions(-) diff --git a/artiq/coredevice/__init__.py b/artiq/coredevice/__init__.py index 70539315d..0a1a4de6c 100644 --- a/artiq/coredevice/__init__.py +++ b/artiq/coredevice/__init__.py @@ -1,9 +1,9 @@ from artiq.coredevice import exceptions, dds, spi -from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError, RTIOOverflow) +from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOOverflow) from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE, PHASE_MODE_TRACKING) __all__ = [] -__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOOverflow"] +__all__ += ["RTIOUnderflow", "RTIOOverflow"] __all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE", "PHASE_MODE_TRACKING"] diff --git a/artiq/coredevice/comm_analyzer.py b/artiq/coredevice/comm_analyzer.py index d8b6e0434..f4b2d56f0 100644 --- a/artiq/coredevice/comm_analyzer.py +++ b/artiq/coredevice/comm_analyzer.py @@ -27,9 +27,9 @@ class ExceptionType(Enum): legacy_o_sequence_error_reset = 0b010001 legacy_o_collision_reset = 0b010010 legacy_i_overflow_reset = 0b100000 + legacy_o_sequence_error = 0b010101 o_underflow = 0b010100 - o_sequence_error = 0b010101 i_overflow = 0b100001 diff --git a/artiq/coredevice/exceptions.py b/artiq/coredevice/exceptions.py index 44d0af86e..de86baf25 100644 --- a/artiq/coredevice/exceptions.py +++ b/artiq/coredevice/exceptions.py @@ -78,13 +78,6 @@ class RTIOUnderflow(Exception): """ artiq_builtin = True -class RTIOSequenceError(Exception): - """Raised when an event is submitted on a given channel with a timestamp - not larger than the previous one. - - The offending event is discarded and the RTIO core keeps operating. - """ - artiq_builtin = True class RTIOOverflow(Exception): """Raised when at least one event could not be registered into the RTIO @@ -96,26 +89,32 @@ class RTIOOverflow(Exception): """ artiq_builtin = True + class DMAError(Exception): """Raised when performing an invalid DMA operation.""" artiq_builtin = True + class DDSError(Exception): """Raised when attempting to start a DDS batch while already in a batch, when too many commands are batched, and when DDS channel settings are incorrect. """ + class WatchdogExpired(Exception): """Raised when a watchdog expires.""" + class ClockFailure(Exception): """Raised when RTIO PLL has lost lock.""" + class I2CError(Exception): """Raised when a I2C transaction fails.""" pass + class SPIError(Exception): """Raised when a SPI transaction fails.""" pass diff --git a/artiq/firmware/ksupport/lib.rs b/artiq/firmware/ksupport/lib.rs index 2f06b0118..8d3d8bf7c 100644 --- a/artiq/firmware/ksupport/lib.rs +++ b/artiq/firmware/ksupport/lib.rs @@ -380,22 +380,13 @@ extern fn dma_playback(timestamp: i64, ptr: i32) { while csr::rtio_dma::enable_read() != 0 {} csr::cri_con::selected_write(0); - let status = csr::rtio_dma::error_status_read(); - if status != 0 { + if csr::rtio_dma::underflow_read() != 0 { let timestamp = csr::rtio_dma::error_timestamp_read(); let channel = csr::rtio_dma::error_channel_read(); - if status & rtio::RTIO_O_STATUS_UNDERFLOW != 0 { - csr::rtio_dma::error_underflow_reset_write(1); - raise!("RTIOUnderflow", - "RTIO underflow at {0} mu, channel {1}", - timestamp as i64, channel as i64, 0) - } - if status & rtio::RTIO_O_STATUS_SEQUENCE_ERROR != 0 { - csr::rtio_dma::error_sequence_error_reset_write(1); - raise!("RTIOSequenceError", - "RTIO sequence error at {0} mu, channel {1}", - timestamp as i64, channel as i64, 0) - } + csr::rtio_dma::underflow_write(1); + raise!("RTIOUnderflow", + "RTIO underflow at {0} mu, channel {1}", + timestamp as i64, channel as i64, 0) } } } diff --git a/artiq/firmware/ksupport/rtio.rs b/artiq/firmware/ksupport/rtio.rs index e03b2ea4c..feeb7890f 100644 --- a/artiq/firmware/ksupport/rtio.rs +++ b/artiq/firmware/ksupport/rtio.rs @@ -6,7 +6,6 @@ use kernel_proto::*; pub const RTIO_O_STATUS_WAIT: u8 = 1; pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2; -pub const RTIO_O_STATUS_SEQUENCE_ERROR: u8 = 4; pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1; pub const RTIO_I_STATUS_OVERFLOW: u8 = 2; pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4; @@ -45,11 +44,6 @@ unsafe fn process_exceptional_status(timestamp: i64, channel: i32, status: u8) { "RTIO underflow at {0} mu, channel {1}, slack {2} mu", timestamp, channel as i64, timestamp - get_counter()) } - if status & RTIO_O_STATUS_SEQUENCE_ERROR != 0 { - raise!("RTIOSequenceError", - "RTIO sequence error at {0} mu, channel {1}", - timestamp, channel as i64, 0) - } } pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) { diff --git a/artiq/firmware/runtime/rtio_mgt.rs b/artiq/firmware/runtime/rtio_mgt.rs index c411295f4..992a62bb3 100644 --- a/artiq/firmware/runtime/rtio_mgt.rs +++ b/artiq/firmware/runtime/rtio_mgt.rs @@ -195,6 +195,9 @@ fn async_error_thread(io: Io) { if errors & 2 != 0 { error!("RTIO busy"); } + if errors & 4 != 0 { + error!("RTIO sequence error"); + } csr::rtio_core::async_error_write(errors); } } diff --git a/artiq/gateware/drtio/rt_errors_satellite.py b/artiq/gateware/drtio/rt_errors_satellite.py index 173dd7dcf..9b3ab00a0 100644 --- a/artiq/gateware/drtio/rt_errors_satellite.py +++ b/artiq/gateware/drtio/rt_errors_satellite.py @@ -35,11 +35,9 @@ class RTErrorsSatellite(Module, AutoCSR): # internal ARTIQ bugs. underflow = Signal() overflow = Signal() - sequence_error = Signal() self.comb += [ underflow.eq(outputs.cri.o_status[1]), - overflow.eq(outputs.cri.o_status[0]), - sequence_error.eq(outputs.cri.o_status[2]) + overflow.eq(outputs.cri.o_status[0]) ] error_csr(self.protocol_error, (rt_packet.unknown_packet_type, False), @@ -48,7 +46,7 @@ class RTErrorsSatellite(Module, AutoCSR): (overflow, True) ) error_csr(self.rtio_error, - (sequence_error, True), + (outputs.sequence_error, False), (outputs.collision, False), (outputs.busy, False) ) diff --git a/artiq/gateware/rtio/analyzer.py b/artiq/gateware/rtio/analyzer.py index 2461f80c1..f958ca8d6 100644 --- a/artiq/gateware/rtio/analyzer.py +++ b/artiq/gateware/rtio/analyzer.py @@ -94,10 +94,6 @@ class MessageEncoder(Module, AutoCSR): exception_stb.eq(1), exception.exception_type.eq(ExceptionType.o_underflow.value) ), - If(just_written & cri.o_status[2], - exception_stb.eq(1), - exception.exception_type.eq(ExceptionType.o_sequence_error.value) - ), If(read_overflow, exception_stb.eq(1), exception.exception_type.eq(ExceptionType.i_overflow.value) diff --git a/artiq/gateware/rtio/core.py b/artiq/gateware/rtio/core.py index 8305db821..c77d5084d 100644 --- a/artiq/gateware/rtio/core.py +++ b/artiq/gateware/rtio/core.py @@ -19,7 +19,7 @@ class Core(Module, AutoCSR): self.cri = cri.Interface() self.reset = CSR() self.reset_phy = CSR() - self.async_error = CSR(2) + self.async_error = CSR(3) # Clocking/Reset # Create rsys, rio and rio_phy domains based on sys and rtio @@ -75,18 +75,21 @@ class Core(Module, AutoCSR): o_collision_sync = BlindTransfer() o_busy_sync = BlindTransfer() self.submodules += o_collision_sync, o_busy_sync + o_sequence_error_trig = Signal() o_collision = Signal() o_busy = Signal() + o_sequence_error = Signal() self.sync += [ If(self.async_error.re, If(self.async_error.r[0], o_collision.eq(0)), If(self.async_error.r[1], o_busy.eq(0)), + If(self.async_error.r[2], o_sequence_error.eq(0)), ), If(o_collision_sync.o, o_collision.eq(1)), - If(o_busy_sync.o, o_busy.eq(1)) + If(o_busy_sync.o, o_busy.eq(1)), + If(o_sequence_error_trig, o_sequence_error.eq(1)) ] - self.comb += self.async_error.w.eq(Cat(o_collision, o_busy)) - + self.comb += self.async_error.w.eq(Cat(o_collision, o_busy, o_sequence_error)) # Outputs/Inputs quash_channels = [n for n, c in enumerate(channels) if isinstance(c, LogChannel)] @@ -100,7 +103,8 @@ class Core(Module, AutoCSR): self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16) self.comb += [ o_collision_sync.i.eq(outputs.collision), - o_busy_sync.i.eq(outputs.busy) + o_busy_sync.i.eq(outputs.busy), + o_sequence_error_trig.eq(outputs.sequence_error) ] inputs = InputCollector(channels, glbl_fine_ts_width, "async", diff --git a/artiq/gateware/rtio/cri.py b/artiq/gateware/rtio/cri.py index 5dad6b886..4885c0219 100644 --- a/artiq/gateware/rtio/cri.py +++ b/artiq/gateware/rtio/cri.py @@ -25,8 +25,8 @@ layout = [ ("o_data", 512, DIR_M_TO_S), ("o_address", 16, DIR_M_TO_S), # o_status bits: - # <0:wait> <1:underflow> <2:sequence_error> - ("o_status", 3, DIR_S_TO_M), + # <0:wait> <1:underflow> + ("o_status", 2, DIR_S_TO_M), # targets may optionally report a pessimistic estimate of the number # of outputs events that can be written without waiting. ("o_buffer_space", 16, DIR_S_TO_M), @@ -61,7 +61,7 @@ class KernelInitiator(Module, AutoCSR): self.o_data = CSRStorage(512, write_from_dev=True) self.o_address = CSRStorage(16) self.o_we = CSR() - self.o_status = CSRStatus(3) + self.o_status = CSRStatus(2) self.i_data = CSRStatus(32) self.i_timestamp = CSRStatus(64) diff --git a/artiq/gateware/rtio/dma.py b/artiq/gateware/rtio/dma.py index ecfb3b5f5..0ea364ef8 100644 --- a/artiq/gateware/rtio/dma.py +++ b/artiq/gateware/rtio/dma.py @@ -242,9 +242,7 @@ class TimeOffset(Module, AutoCSR): class CRIMaster(Module, AutoCSR): def __init__(self): - self.error_status = CSRStatus(3) # same encoding as RTIO status - self.error_underflow_reset = CSR() - self.error_sequence_error_reset = CSR() + self.underflow = CSR() self.error_channel = CSRStatus(24) self.error_timestamp = CSRStatus(64) @@ -256,19 +254,16 @@ class CRIMaster(Module, AutoCSR): # # # - error_set = Signal(2) - for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset]): - # bit 0 is RTIO wait and always 0 here - bit = i + 1 - self.sync += [ - If(error_set[i], - self.error_status.status[bit].eq(1), - self.error_channel.status.eq(self.sink.channel), - self.error_timestamp.status.eq(self.sink.timestamp), - self.error_address.status.eq(self.sink.address) - ), - If(rcsr.re, self.error_status.status[bit].eq(0)) - ] + underflow_trigger = Signal(2) + self.sync += [ + If(underflow_trigger, + self.underflow.w.eq(1), + self.error_channel.status.eq(self.sink.channel), + self.error_timestamp.status.eq(self.sink.timestamp), + self.error_address.status.eq(self.sink.address) + ), + If(self.underflow.re, self.underflow.w.eq(0)) + ] self.comb += [ self.cri.chan_sel.eq(self.sink.channel), @@ -281,7 +276,7 @@ class CRIMaster(Module, AutoCSR): self.submodules += fsm fsm.act("IDLE", - If(self.error_status.status == 0, + If(~self.underflow.w, If(self.sink.stb, If(self.sink.eop, # last packet contains dummy data, discard it @@ -306,16 +301,14 @@ class CRIMaster(Module, AutoCSR): self.sink.ack.eq(1), NextState("IDLE") ), - If(self.cri.o_status[1], NextState("UNDERFLOW")), - If(self.cri.o_status[2], NextState("SEQUENCE_ERROR")) + If(self.cri.o_status[1], NextState("UNDERFLOW")) + ) + fsm.act("UNDERFLOW", + self.busy.eq(1), + underflow_trigger.eq(1), + self.sink.ack.eq(1), + NextState("IDLE") ) - for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR"]): - fsm.act(name, - self.busy.eq(1), - error_set.eq(1 << n), - self.sink.ack.eq(1), - NextState("IDLE") - ) class DMA(Module): diff --git a/artiq/gateware/rtio/sed/core.py b/artiq/gateware/rtio/sed/core.py index 472eda626..ca757e976 100644 --- a/artiq/gateware/rtio/sed/core.py +++ b/artiq/gateware/rtio/sed/core.py @@ -61,26 +61,42 @@ class SED(Module): def cri(self): return self.lane_dist.cri + # in CRI clock domain @property def minimum_coarse_timestamp(self): return self.lane_dist.minimum_coarse_timestamp + # in I/O clock domain @property def coarse_timestamp(self): return self.gates.coarse_timestamp + # in CRI clock domain + @property + def sequence_error(self): + return self.lane_dist.sequence_error + + # in CRI clock domain + @property + def sequence_error_channel(self): + return self.lane_dist.sequence_error_channel + + # in I/O clock domain @property def collision(self): return self.output_driver.collision + # in I/O clock domain @property def collision_channel(self): return self.output_driver.collision_channel + # in I/O clock domain @property def busy(self): return self.output_driver.busy + # in I/O clock domain @property def busy_channel(self): return self.output_driver.busy_channel diff --git a/artiq/gateware/rtio/sed/lane_distributor.py b/artiq/gateware/rtio/sed/lane_distributor.py index 232c37ac6..d6e346c9a 100644 --- a/artiq/gateware/rtio/sed/lane_distributor.py +++ b/artiq/gateware/rtio/sed/lane_distributor.py @@ -22,6 +22,8 @@ class LaneDistributor(Module): if interface is None: interface = cri.Interface() self.cri = interface + self.sequence_error = Signal() + self.sequence_error_channel = Signal(16) self.minimum_coarse_timestamp = Signal(64-glbl_fine_ts_width) self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload)) for _ in range(lane_count)] @@ -30,9 +32,7 @@ class LaneDistributor(Module): o_status_wait = Signal() o_status_underflow = Signal() - o_status_sequence_error = Signal() - self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow, - o_status_sequence_error)) + self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow)) # internal state current_lane = Signal(max=lane_count) @@ -135,15 +135,13 @@ class LaneDistributor(Module): ] self.sync += [ If(self.cri.cmd == cri.commands["write"], - o_status_underflow.eq(0), - o_status_sequence_error.eq(0) + o_status_underflow.eq(0) ), If(do_underflow, o_status_underflow.eq(1) ), - If(do_sequence_error, - o_status_sequence_error.eq(1) - ) + self.sequence_error.eq(do_sequence_error), + self.sequence_error_channel.eq(self.cri.chan_sel[:16]) ] # current lane has been full, spread events by switching to the next. diff --git a/artiq/gateware/test/drtio/test_full_stack.py b/artiq/gateware/test/drtio/test_full_stack.py index 3b1281005..23810321c 100644 --- a/artiq/gateware/test/drtio/test_full_stack.py +++ b/artiq/gateware/test/drtio/test_full_stack.py @@ -111,8 +111,6 @@ class OutputsTestbench: status = yield from kcsrs.o_status.read() if status & 2: raise RTIOUnderflow - if status & 4: - raise RTIOSequenceError yield wlen += 1 return wlen diff --git a/artiq/gateware/test/rtio/test_sed_lane_distributor.py b/artiq/gateware/test/rtio/test_sed_lane_distributor.py index 488d914a9..c02b6c4bb 100644 --- a/artiq/gateware/test/rtio/test_sed_lane_distributor.py +++ b/artiq/gateware/test/rtio/test_sed_lane_distributor.py @@ -38,7 +38,7 @@ def simulate(input_events, compensation=None, wait=True): access_status = "ok" if status & 0x02: access_status = "underflow" - if status & 0x04: + if (yield dut.sequence_error): access_status = "sequence_error" access_results.append((access_status, access_time)) diff --git a/artiq/gateware/test/rtio/test_sed_top.py b/artiq/gateware/test/rtio/test_sed_top.py index eeb4c46a8..d5de88979 100644 --- a/artiq/gateware/test/rtio/test_sed_top.py +++ b/artiq/gateware/test/rtio/test_sed_top.py @@ -56,7 +56,7 @@ def simulate(input_events): access_status = "ok" if status & 0x02: access_status = "underflow" - if status & 0x04: + if (yield dut.sed.sequence_error): access_status = "sequence_error" access_results.append((access_status, access_time)) diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 086b964d8..79043c9b8 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -401,27 +401,23 @@ class CoredeviceTest(ExperimentCase): with self.assertRaises(RTIOUnderflow): self.execute(Underflow) + def execute_and_test_in_log(self, experiment, string): + core_addr = self.device_mgr.get_desc("core")["arguments"]["host"] + mgmt = CommMgmt(core_addr) + mgmt.clear_log() + self.execute(experiment) + log = mgmt.get_log() + self.assertIn(string, log) + mgmt.close() + def test_sequence_error(self): - with self.assertRaises(RTIOSequenceError): - self.execute(SequenceError) + self.execute_and_test_in_log(SequenceError, "RTIO sequence error") def test_collision(self): - core_addr = self.device_mgr.get_desc("core")["arguments"]["host"] - mgmt = CommMgmt(core_addr) - mgmt.clear_log() - self.execute(Collision) - log = mgmt.get_log() - self.assertIn("RTIO collision", log) - mgmt.close() + self.execute_and_test_in_log(Collision, "RTIO collision") def test_address_collision(self): - core_addr = self.device_mgr.get_desc("core")["arguments"]["host"] - mgmt = CommMgmt(core_addr) - mgmt.clear_log() - self.execute(AddressCollision) - log = mgmt.get_log() - self.assertIn("RTIO collision", log) - mgmt.close() + self.execute_and_test_in_log(AddressCollision, "RTIO collision") def test_watchdog(self): # watchdog only works on the device diff --git a/doc/manual/rtio.rst b/doc/manual/rtio.rst index a08edad32..f62c3b57d 100644 --- a/doc/manual/rtio.rst +++ b/doc/manual/rtio.rst @@ -117,6 +117,22 @@ To track down ``RTIOUnderflows`` in an experiment there are a few approaches: code. * The :any:`integrated logic analyzer ` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``). +Sequence errors +--------------- +A sequence error happens when the sequence of coarse timestamps cannot be supported by the gateware. For example, there may have been too many timeline rewinds. + +Internally, the gateware stores output events in an array of FIFO buffers (the "lanes") and the timestamps in each lane much be strictly increasing. The gateware selects a different lane when an event with a decreasing or equal timestamp is submitted. A sequence error occurs when no appropriate lane can be found. + +Notes: + +* Strictly increasing timestamps never cause sequence errors. +* Configuring the gateware with more lanes for the RTIO core reduces the frequency of sequence errors. +* Whether a particular sequence of timestamps causes a sequence error or not is fully deterministic (starting from a known RTIO state, e.g. after a reset). Adding a constant offset to the whole sequence does not affect the result. + +The offending event is discarded and the RTIO core keeps operating. + +This error is reported asynchronously via the core device log: for performance reasons with DRTIO, the CPU does not wait for an error report from the satellite after writing an event. Therefore, it is not possible to raise an exception precisely. + Collisions ---------- A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different.