forked from M-Labs/artiq
rtio: make sequence errors consistently asychronous
This commit is contained in:
parent
4e31e9a9ac
commit
5437f0e3e3
|
@ -1,9 +1,9 @@
|
|||
from artiq.coredevice import exceptions, dds, spi
|
||||
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError, RTIOOverflow)
|
||||
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOOverflow)
|
||||
from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE,
|
||||
PHASE_MODE_TRACKING)
|
||||
|
||||
__all__ = []
|
||||
__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOOverflow"]
|
||||
__all__ += ["RTIOUnderflow", "RTIOOverflow"]
|
||||
__all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE",
|
||||
"PHASE_MODE_TRACKING"]
|
||||
|
|
|
@ -27,9 +27,9 @@ class ExceptionType(Enum):
|
|||
legacy_o_sequence_error_reset = 0b010001
|
||||
legacy_o_collision_reset = 0b010010
|
||||
legacy_i_overflow_reset = 0b100000
|
||||
legacy_o_sequence_error = 0b010101
|
||||
|
||||
o_underflow = 0b010100
|
||||
o_sequence_error = 0b010101
|
||||
|
||||
i_overflow = 0b100001
|
||||
|
||||
|
|
|
@ -78,13 +78,6 @@ class RTIOUnderflow(Exception):
|
|||
"""
|
||||
artiq_builtin = True
|
||||
|
||||
class RTIOSequenceError(Exception):
|
||||
"""Raised when an event is submitted on a given channel with a timestamp
|
||||
not larger than the previous one.
|
||||
|
||||
The offending event is discarded and the RTIO core keeps operating.
|
||||
"""
|
||||
artiq_builtin = True
|
||||
|
||||
class RTIOOverflow(Exception):
|
||||
"""Raised when at least one event could not be registered into the RTIO
|
||||
|
@ -96,26 +89,32 @@ class RTIOOverflow(Exception):
|
|||
"""
|
||||
artiq_builtin = True
|
||||
|
||||
|
||||
class DMAError(Exception):
|
||||
"""Raised when performing an invalid DMA operation."""
|
||||
artiq_builtin = True
|
||||
|
||||
|
||||
class DDSError(Exception):
|
||||
"""Raised when attempting to start a DDS batch while already in a batch,
|
||||
when too many commands are batched, and when DDS channel settings are
|
||||
incorrect.
|
||||
"""
|
||||
|
||||
|
||||
class WatchdogExpired(Exception):
|
||||
"""Raised when a watchdog expires."""
|
||||
|
||||
|
||||
class ClockFailure(Exception):
|
||||
"""Raised when RTIO PLL has lost lock."""
|
||||
|
||||
|
||||
class I2CError(Exception):
|
||||
"""Raised when a I2C transaction fails."""
|
||||
pass
|
||||
|
||||
|
||||
class SPIError(Exception):
|
||||
"""Raised when a SPI transaction fails."""
|
||||
pass
|
||||
|
|
|
@ -380,23 +380,14 @@ extern fn dma_playback(timestamp: i64, ptr: i32) {
|
|||
while csr::rtio_dma::enable_read() != 0 {}
|
||||
csr::cri_con::selected_write(0);
|
||||
|
||||
let status = csr::rtio_dma::error_status_read();
|
||||
if status != 0 {
|
||||
if csr::rtio_dma::underflow_read() != 0 {
|
||||
let timestamp = csr::rtio_dma::error_timestamp_read();
|
||||
let channel = csr::rtio_dma::error_channel_read();
|
||||
if status & rtio::RTIO_O_STATUS_UNDERFLOW != 0 {
|
||||
csr::rtio_dma::error_underflow_reset_write(1);
|
||||
csr::rtio_dma::underflow_write(1);
|
||||
raise!("RTIOUnderflow",
|
||||
"RTIO underflow at {0} mu, channel {1}",
|
||||
timestamp as i64, channel as i64, 0)
|
||||
}
|
||||
if status & rtio::RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
|
||||
csr::rtio_dma::error_sequence_error_reset_write(1);
|
||||
raise!("RTIOSequenceError",
|
||||
"RTIO sequence error at {0} mu, channel {1}",
|
||||
timestamp as i64, channel as i64, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,6 @@ use kernel_proto::*;
|
|||
|
||||
pub const RTIO_O_STATUS_WAIT: u8 = 1;
|
||||
pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2;
|
||||
pub const RTIO_O_STATUS_SEQUENCE_ERROR: u8 = 4;
|
||||
pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1;
|
||||
pub const RTIO_I_STATUS_OVERFLOW: u8 = 2;
|
||||
pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4;
|
||||
|
@ -45,11 +44,6 @@ unsafe fn process_exceptional_status(timestamp: i64, channel: i32, status: u8) {
|
|||
"RTIO underflow at {0} mu, channel {1}, slack {2} mu",
|
||||
timestamp, channel as i64, timestamp - get_counter())
|
||||
}
|
||||
if status & RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
|
||||
raise!("RTIOSequenceError",
|
||||
"RTIO sequence error at {0} mu, channel {1}",
|
||||
timestamp, channel as i64, 0)
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {
|
||||
|
|
|
@ -195,6 +195,9 @@ fn async_error_thread(io: Io) {
|
|||
if errors & 2 != 0 {
|
||||
error!("RTIO busy");
|
||||
}
|
||||
if errors & 4 != 0 {
|
||||
error!("RTIO sequence error");
|
||||
}
|
||||
csr::rtio_core::async_error_write(errors);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,11 +35,9 @@ class RTErrorsSatellite(Module, AutoCSR):
|
|||
# internal ARTIQ bugs.
|
||||
underflow = Signal()
|
||||
overflow = Signal()
|
||||
sequence_error = Signal()
|
||||
self.comb += [
|
||||
underflow.eq(outputs.cri.o_status[1]),
|
||||
overflow.eq(outputs.cri.o_status[0]),
|
||||
sequence_error.eq(outputs.cri.o_status[2])
|
||||
overflow.eq(outputs.cri.o_status[0])
|
||||
]
|
||||
error_csr(self.protocol_error,
|
||||
(rt_packet.unknown_packet_type, False),
|
||||
|
@ -48,7 +46,7 @@ class RTErrorsSatellite(Module, AutoCSR):
|
|||
(overflow, True)
|
||||
)
|
||||
error_csr(self.rtio_error,
|
||||
(sequence_error, True),
|
||||
(outputs.sequence_error, False),
|
||||
(outputs.collision, False),
|
||||
(outputs.busy, False)
|
||||
)
|
||||
|
|
|
@ -94,10 +94,6 @@ class MessageEncoder(Module, AutoCSR):
|
|||
exception_stb.eq(1),
|
||||
exception.exception_type.eq(ExceptionType.o_underflow.value)
|
||||
),
|
||||
If(just_written & cri.o_status[2],
|
||||
exception_stb.eq(1),
|
||||
exception.exception_type.eq(ExceptionType.o_sequence_error.value)
|
||||
),
|
||||
If(read_overflow,
|
||||
exception_stb.eq(1),
|
||||
exception.exception_type.eq(ExceptionType.i_overflow.value)
|
||||
|
|
|
@ -19,7 +19,7 @@ class Core(Module, AutoCSR):
|
|||
self.cri = cri.Interface()
|
||||
self.reset = CSR()
|
||||
self.reset_phy = CSR()
|
||||
self.async_error = CSR(2)
|
||||
self.async_error = CSR(3)
|
||||
|
||||
# Clocking/Reset
|
||||
# Create rsys, rio and rio_phy domains based on sys and rtio
|
||||
|
@ -75,18 +75,21 @@ class Core(Module, AutoCSR):
|
|||
o_collision_sync = BlindTransfer()
|
||||
o_busy_sync = BlindTransfer()
|
||||
self.submodules += o_collision_sync, o_busy_sync
|
||||
o_sequence_error_trig = Signal()
|
||||
o_collision = Signal()
|
||||
o_busy = Signal()
|
||||
o_sequence_error = Signal()
|
||||
self.sync += [
|
||||
If(self.async_error.re,
|
||||
If(self.async_error.r[0], o_collision.eq(0)),
|
||||
If(self.async_error.r[1], o_busy.eq(0)),
|
||||
If(self.async_error.r[2], o_sequence_error.eq(0)),
|
||||
),
|
||||
If(o_collision_sync.o, o_collision.eq(1)),
|
||||
If(o_busy_sync.o, o_busy.eq(1))
|
||||
If(o_busy_sync.o, o_busy.eq(1)),
|
||||
If(o_sequence_error_trig, o_sequence_error.eq(1))
|
||||
]
|
||||
self.comb += self.async_error.w.eq(Cat(o_collision, o_busy))
|
||||
|
||||
self.comb += self.async_error.w.eq(Cat(o_collision, o_busy, o_sequence_error))
|
||||
|
||||
# Outputs/Inputs
|
||||
quash_channels = [n for n, c in enumerate(channels) if isinstance(c, LogChannel)]
|
||||
|
@ -100,7 +103,8 @@ class Core(Module, AutoCSR):
|
|||
self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16)
|
||||
self.comb += [
|
||||
o_collision_sync.i.eq(outputs.collision),
|
||||
o_busy_sync.i.eq(outputs.busy)
|
||||
o_busy_sync.i.eq(outputs.busy),
|
||||
o_sequence_error_trig.eq(outputs.sequence_error)
|
||||
]
|
||||
|
||||
inputs = InputCollector(channels, glbl_fine_ts_width, "async",
|
||||
|
|
|
@ -25,8 +25,8 @@ layout = [
|
|||
("o_data", 512, DIR_M_TO_S),
|
||||
("o_address", 16, DIR_M_TO_S),
|
||||
# o_status bits:
|
||||
# <0:wait> <1:underflow> <2:sequence_error>
|
||||
("o_status", 3, DIR_S_TO_M),
|
||||
# <0:wait> <1:underflow>
|
||||
("o_status", 2, DIR_S_TO_M),
|
||||
# targets may optionally report a pessimistic estimate of the number
|
||||
# of outputs events that can be written without waiting.
|
||||
("o_buffer_space", 16, DIR_S_TO_M),
|
||||
|
@ -61,7 +61,7 @@ class KernelInitiator(Module, AutoCSR):
|
|||
self.o_data = CSRStorage(512, write_from_dev=True)
|
||||
self.o_address = CSRStorage(16)
|
||||
self.o_we = CSR()
|
||||
self.o_status = CSRStatus(3)
|
||||
self.o_status = CSRStatus(2)
|
||||
|
||||
self.i_data = CSRStatus(32)
|
||||
self.i_timestamp = CSRStatus(64)
|
||||
|
|
|
@ -242,9 +242,7 @@ class TimeOffset(Module, AutoCSR):
|
|||
|
||||
class CRIMaster(Module, AutoCSR):
|
||||
def __init__(self):
|
||||
self.error_status = CSRStatus(3) # same encoding as RTIO status
|
||||
self.error_underflow_reset = CSR()
|
||||
self.error_sequence_error_reset = CSR()
|
||||
self.underflow = CSR()
|
||||
|
||||
self.error_channel = CSRStatus(24)
|
||||
self.error_timestamp = CSRStatus(64)
|
||||
|
@ -256,18 +254,15 @@ class CRIMaster(Module, AutoCSR):
|
|||
|
||||
# # #
|
||||
|
||||
error_set = Signal(2)
|
||||
for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset]):
|
||||
# bit 0 is RTIO wait and always 0 here
|
||||
bit = i + 1
|
||||
underflow_trigger = Signal(2)
|
||||
self.sync += [
|
||||
If(error_set[i],
|
||||
self.error_status.status[bit].eq(1),
|
||||
If(underflow_trigger,
|
||||
self.underflow.w.eq(1),
|
||||
self.error_channel.status.eq(self.sink.channel),
|
||||
self.error_timestamp.status.eq(self.sink.timestamp),
|
||||
self.error_address.status.eq(self.sink.address)
|
||||
),
|
||||
If(rcsr.re, self.error_status.status[bit].eq(0))
|
||||
If(self.underflow.re, self.underflow.w.eq(0))
|
||||
]
|
||||
|
||||
self.comb += [
|
||||
|
@ -281,7 +276,7 @@ class CRIMaster(Module, AutoCSR):
|
|||
self.submodules += fsm
|
||||
|
||||
fsm.act("IDLE",
|
||||
If(self.error_status.status == 0,
|
||||
If(~self.underflow.w,
|
||||
If(self.sink.stb,
|
||||
If(self.sink.eop,
|
||||
# last packet contains dummy data, discard it
|
||||
|
@ -306,13 +301,11 @@ class CRIMaster(Module, AutoCSR):
|
|||
self.sink.ack.eq(1),
|
||||
NextState("IDLE")
|
||||
),
|
||||
If(self.cri.o_status[1], NextState("UNDERFLOW")),
|
||||
If(self.cri.o_status[2], NextState("SEQUENCE_ERROR"))
|
||||
If(self.cri.o_status[1], NextState("UNDERFLOW"))
|
||||
)
|
||||
for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR"]):
|
||||
fsm.act(name,
|
||||
fsm.act("UNDERFLOW",
|
||||
self.busy.eq(1),
|
||||
error_set.eq(1 << n),
|
||||
underflow_trigger.eq(1),
|
||||
self.sink.ack.eq(1),
|
||||
NextState("IDLE")
|
||||
)
|
||||
|
|
|
@ -61,26 +61,42 @@ class SED(Module):
|
|||
def cri(self):
|
||||
return self.lane_dist.cri
|
||||
|
||||
# in CRI clock domain
|
||||
@property
|
||||
def minimum_coarse_timestamp(self):
|
||||
return self.lane_dist.minimum_coarse_timestamp
|
||||
|
||||
# in I/O clock domain
|
||||
@property
|
||||
def coarse_timestamp(self):
|
||||
return self.gates.coarse_timestamp
|
||||
|
||||
# in CRI clock domain
|
||||
@property
|
||||
def sequence_error(self):
|
||||
return self.lane_dist.sequence_error
|
||||
|
||||
# in CRI clock domain
|
||||
@property
|
||||
def sequence_error_channel(self):
|
||||
return self.lane_dist.sequence_error_channel
|
||||
|
||||
# in I/O clock domain
|
||||
@property
|
||||
def collision(self):
|
||||
return self.output_driver.collision
|
||||
|
||||
# in I/O clock domain
|
||||
@property
|
||||
def collision_channel(self):
|
||||
return self.output_driver.collision_channel
|
||||
|
||||
# in I/O clock domain
|
||||
@property
|
||||
def busy(self):
|
||||
return self.output_driver.busy
|
||||
|
||||
# in I/O clock domain
|
||||
@property
|
||||
def busy_channel(self):
|
||||
return self.output_driver.busy_channel
|
||||
|
|
|
@ -22,6 +22,8 @@ class LaneDistributor(Module):
|
|||
if interface is None:
|
||||
interface = cri.Interface()
|
||||
self.cri = interface
|
||||
self.sequence_error = Signal()
|
||||
self.sequence_error_channel = Signal(16)
|
||||
self.minimum_coarse_timestamp = Signal(64-glbl_fine_ts_width)
|
||||
self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload))
|
||||
for _ in range(lane_count)]
|
||||
|
@ -30,9 +32,7 @@ class LaneDistributor(Module):
|
|||
|
||||
o_status_wait = Signal()
|
||||
o_status_underflow = Signal()
|
||||
o_status_sequence_error = Signal()
|
||||
self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow,
|
||||
o_status_sequence_error))
|
||||
self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow))
|
||||
|
||||
# internal state
|
||||
current_lane = Signal(max=lane_count)
|
||||
|
@ -135,15 +135,13 @@ class LaneDistributor(Module):
|
|||
]
|
||||
self.sync += [
|
||||
If(self.cri.cmd == cri.commands["write"],
|
||||
o_status_underflow.eq(0),
|
||||
o_status_sequence_error.eq(0)
|
||||
o_status_underflow.eq(0)
|
||||
),
|
||||
If(do_underflow,
|
||||
o_status_underflow.eq(1)
|
||||
),
|
||||
If(do_sequence_error,
|
||||
o_status_sequence_error.eq(1)
|
||||
)
|
||||
self.sequence_error.eq(do_sequence_error),
|
||||
self.sequence_error_channel.eq(self.cri.chan_sel[:16])
|
||||
]
|
||||
|
||||
# current lane has been full, spread events by switching to the next.
|
||||
|
|
|
@ -111,8 +111,6 @@ class OutputsTestbench:
|
|||
status = yield from kcsrs.o_status.read()
|
||||
if status & 2:
|
||||
raise RTIOUnderflow
|
||||
if status & 4:
|
||||
raise RTIOSequenceError
|
||||
yield
|
||||
wlen += 1
|
||||
return wlen
|
||||
|
|
|
@ -38,7 +38,7 @@ def simulate(input_events, compensation=None, wait=True):
|
|||
access_status = "ok"
|
||||
if status & 0x02:
|
||||
access_status = "underflow"
|
||||
if status & 0x04:
|
||||
if (yield dut.sequence_error):
|
||||
access_status = "sequence_error"
|
||||
|
||||
access_results.append((access_status, access_time))
|
||||
|
|
|
@ -56,7 +56,7 @@ def simulate(input_events):
|
|||
access_status = "ok"
|
||||
if status & 0x02:
|
||||
access_status = "underflow"
|
||||
if status & 0x04:
|
||||
if (yield dut.sed.sequence_error):
|
||||
access_status = "sequence_error"
|
||||
|
||||
access_results.append((access_status, access_time))
|
||||
|
|
|
@ -401,27 +401,23 @@ class CoredeviceTest(ExperimentCase):
|
|||
with self.assertRaises(RTIOUnderflow):
|
||||
self.execute(Underflow)
|
||||
|
||||
def execute_and_test_in_log(self, experiment, string):
|
||||
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
|
||||
mgmt = CommMgmt(core_addr)
|
||||
mgmt.clear_log()
|
||||
self.execute(experiment)
|
||||
log = mgmt.get_log()
|
||||
self.assertIn(string, log)
|
||||
mgmt.close()
|
||||
|
||||
def test_sequence_error(self):
|
||||
with self.assertRaises(RTIOSequenceError):
|
||||
self.execute(SequenceError)
|
||||
self.execute_and_test_in_log(SequenceError, "RTIO sequence error")
|
||||
|
||||
def test_collision(self):
|
||||
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
|
||||
mgmt = CommMgmt(core_addr)
|
||||
mgmt.clear_log()
|
||||
self.execute(Collision)
|
||||
log = mgmt.get_log()
|
||||
self.assertIn("RTIO collision", log)
|
||||
mgmt.close()
|
||||
self.execute_and_test_in_log(Collision, "RTIO collision")
|
||||
|
||||
def test_address_collision(self):
|
||||
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
|
||||
mgmt = CommMgmt(core_addr)
|
||||
mgmt.clear_log()
|
||||
self.execute(AddressCollision)
|
||||
log = mgmt.get_log()
|
||||
self.assertIn("RTIO collision", log)
|
||||
mgmt.close()
|
||||
self.execute_and_test_in_log(AddressCollision, "RTIO collision")
|
||||
|
||||
def test_watchdog(self):
|
||||
# watchdog only works on the device
|
||||
|
|
|
@ -117,6 +117,22 @@ To track down ``RTIOUnderflows`` in an experiment there are a few approaches:
|
|||
code.
|
||||
* The :any:`integrated logic analyzer <core-device-rtio-analyzer-tool>` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``).
|
||||
|
||||
Sequence errors
|
||||
---------------
|
||||
A sequence error happens when the sequence of coarse timestamps cannot be supported by the gateware. For example, there may have been too many timeline rewinds.
|
||||
|
||||
Internally, the gateware stores output events in an array of FIFO buffers (the "lanes") and the timestamps in each lane much be strictly increasing. The gateware selects a different lane when an event with a decreasing or equal timestamp is submitted. A sequence error occurs when no appropriate lane can be found.
|
||||
|
||||
Notes:
|
||||
|
||||
* Strictly increasing timestamps never cause sequence errors.
|
||||
* Configuring the gateware with more lanes for the RTIO core reduces the frequency of sequence errors.
|
||||
* Whether a particular sequence of timestamps causes a sequence error or not is fully deterministic (starting from a known RTIO state, e.g. after a reset). Adding a constant offset to the whole sequence does not affect the result.
|
||||
|
||||
The offending event is discarded and the RTIO core keeps operating.
|
||||
|
||||
This error is reported asynchronously via the core device log: for performance reasons with DRTIO, the CPU does not wait for an error report from the satellite after writing an event. Therefore, it is not possible to raise an exception precisely.
|
||||
|
||||
Collisions
|
||||
----------
|
||||
A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different.
|
||||
|
|
Loading…
Reference in New Issue