rtio: make sequence errors consistently asychronous

This commit is contained in:
Sebastien Bourdeauducq 2017-09-29 14:40:06 +08:00
parent 4e31e9a9ac
commit 5437f0e3e3
18 changed files with 102 additions and 100 deletions

View File

@ -1,9 +1,9 @@
from artiq.coredevice import exceptions, dds, spi from artiq.coredevice import exceptions, dds, spi
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError, RTIOOverflow) from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOOverflow)
from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE, from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE,
PHASE_MODE_TRACKING) PHASE_MODE_TRACKING)
__all__ = [] __all__ = []
__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOOverflow"] __all__ += ["RTIOUnderflow", "RTIOOverflow"]
__all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE", __all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE",
"PHASE_MODE_TRACKING"] "PHASE_MODE_TRACKING"]

View File

@ -27,9 +27,9 @@ class ExceptionType(Enum):
legacy_o_sequence_error_reset = 0b010001 legacy_o_sequence_error_reset = 0b010001
legacy_o_collision_reset = 0b010010 legacy_o_collision_reset = 0b010010
legacy_i_overflow_reset = 0b100000 legacy_i_overflow_reset = 0b100000
legacy_o_sequence_error = 0b010101
o_underflow = 0b010100 o_underflow = 0b010100
o_sequence_error = 0b010101
i_overflow = 0b100001 i_overflow = 0b100001

View File

@ -78,13 +78,6 @@ class RTIOUnderflow(Exception):
""" """
artiq_builtin = True artiq_builtin = True
class RTIOSequenceError(Exception):
"""Raised when an event is submitted on a given channel with a timestamp
not larger than the previous one.
The offending event is discarded and the RTIO core keeps operating.
"""
artiq_builtin = True
class RTIOOverflow(Exception): class RTIOOverflow(Exception):
"""Raised when at least one event could not be registered into the RTIO """Raised when at least one event could not be registered into the RTIO
@ -96,26 +89,32 @@ class RTIOOverflow(Exception):
""" """
artiq_builtin = True artiq_builtin = True
class DMAError(Exception): class DMAError(Exception):
"""Raised when performing an invalid DMA operation.""" """Raised when performing an invalid DMA operation."""
artiq_builtin = True artiq_builtin = True
class DDSError(Exception): class DDSError(Exception):
"""Raised when attempting to start a DDS batch while already in a batch, """Raised when attempting to start a DDS batch while already in a batch,
when too many commands are batched, and when DDS channel settings are when too many commands are batched, and when DDS channel settings are
incorrect. incorrect.
""" """
class WatchdogExpired(Exception): class WatchdogExpired(Exception):
"""Raised when a watchdog expires.""" """Raised when a watchdog expires."""
class ClockFailure(Exception): class ClockFailure(Exception):
"""Raised when RTIO PLL has lost lock.""" """Raised when RTIO PLL has lost lock."""
class I2CError(Exception): class I2CError(Exception):
"""Raised when a I2C transaction fails.""" """Raised when a I2C transaction fails."""
pass pass
class SPIError(Exception): class SPIError(Exception):
"""Raised when a SPI transaction fails.""" """Raised when a SPI transaction fails."""
pass pass

View File

@ -380,22 +380,13 @@ extern fn dma_playback(timestamp: i64, ptr: i32) {
while csr::rtio_dma::enable_read() != 0 {} while csr::rtio_dma::enable_read() != 0 {}
csr::cri_con::selected_write(0); csr::cri_con::selected_write(0);
let status = csr::rtio_dma::error_status_read(); if csr::rtio_dma::underflow_read() != 0 {
if status != 0 {
let timestamp = csr::rtio_dma::error_timestamp_read(); let timestamp = csr::rtio_dma::error_timestamp_read();
let channel = csr::rtio_dma::error_channel_read(); let channel = csr::rtio_dma::error_channel_read();
if status & rtio::RTIO_O_STATUS_UNDERFLOW != 0 { csr::rtio_dma::underflow_write(1);
csr::rtio_dma::error_underflow_reset_write(1); raise!("RTIOUnderflow",
raise!("RTIOUnderflow", "RTIO underflow at {0} mu, channel {1}",
"RTIO underflow at {0} mu, channel {1}", timestamp as i64, channel as i64, 0)
timestamp as i64, channel as i64, 0)
}
if status & rtio::RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
csr::rtio_dma::error_sequence_error_reset_write(1);
raise!("RTIOSequenceError",
"RTIO sequence error at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
} }
} }
} }

View File

@ -6,7 +6,6 @@ use kernel_proto::*;
pub const RTIO_O_STATUS_WAIT: u8 = 1; pub const RTIO_O_STATUS_WAIT: u8 = 1;
pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2; pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2;
pub const RTIO_O_STATUS_SEQUENCE_ERROR: u8 = 4;
pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1; pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1;
pub const RTIO_I_STATUS_OVERFLOW: u8 = 2; pub const RTIO_I_STATUS_OVERFLOW: u8 = 2;
pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4; pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4;
@ -45,11 +44,6 @@ unsafe fn process_exceptional_status(timestamp: i64, channel: i32, status: u8) {
"RTIO underflow at {0} mu, channel {1}, slack {2} mu", "RTIO underflow at {0} mu, channel {1}, slack {2} mu",
timestamp, channel as i64, timestamp - get_counter()) timestamp, channel as i64, timestamp - get_counter())
} }
if status & RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
raise!("RTIOSequenceError",
"RTIO sequence error at {0} mu, channel {1}",
timestamp, channel as i64, 0)
}
} }
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) { pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {

View File

@ -195,6 +195,9 @@ fn async_error_thread(io: Io) {
if errors & 2 != 0 { if errors & 2 != 0 {
error!("RTIO busy"); error!("RTIO busy");
} }
if errors & 4 != 0 {
error!("RTIO sequence error");
}
csr::rtio_core::async_error_write(errors); csr::rtio_core::async_error_write(errors);
} }
} }

View File

@ -35,11 +35,9 @@ class RTErrorsSatellite(Module, AutoCSR):
# internal ARTIQ bugs. # internal ARTIQ bugs.
underflow = Signal() underflow = Signal()
overflow = Signal() overflow = Signal()
sequence_error = Signal()
self.comb += [ self.comb += [
underflow.eq(outputs.cri.o_status[1]), underflow.eq(outputs.cri.o_status[1]),
overflow.eq(outputs.cri.o_status[0]), overflow.eq(outputs.cri.o_status[0])
sequence_error.eq(outputs.cri.o_status[2])
] ]
error_csr(self.protocol_error, error_csr(self.protocol_error,
(rt_packet.unknown_packet_type, False), (rt_packet.unknown_packet_type, False),
@ -48,7 +46,7 @@ class RTErrorsSatellite(Module, AutoCSR):
(overflow, True) (overflow, True)
) )
error_csr(self.rtio_error, error_csr(self.rtio_error,
(sequence_error, True), (outputs.sequence_error, False),
(outputs.collision, False), (outputs.collision, False),
(outputs.busy, False) (outputs.busy, False)
) )

View File

@ -94,10 +94,6 @@ class MessageEncoder(Module, AutoCSR):
exception_stb.eq(1), exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_underflow.value) exception.exception_type.eq(ExceptionType.o_underflow.value)
), ),
If(just_written & cri.o_status[2],
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_sequence_error.value)
),
If(read_overflow, If(read_overflow,
exception_stb.eq(1), exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.i_overflow.value) exception.exception_type.eq(ExceptionType.i_overflow.value)

View File

@ -19,7 +19,7 @@ class Core(Module, AutoCSR):
self.cri = cri.Interface() self.cri = cri.Interface()
self.reset = CSR() self.reset = CSR()
self.reset_phy = CSR() self.reset_phy = CSR()
self.async_error = CSR(2) self.async_error = CSR(3)
# Clocking/Reset # Clocking/Reset
# Create rsys, rio and rio_phy domains based on sys and rtio # Create rsys, rio and rio_phy domains based on sys and rtio
@ -75,18 +75,21 @@ class Core(Module, AutoCSR):
o_collision_sync = BlindTransfer() o_collision_sync = BlindTransfer()
o_busy_sync = BlindTransfer() o_busy_sync = BlindTransfer()
self.submodules += o_collision_sync, o_busy_sync self.submodules += o_collision_sync, o_busy_sync
o_sequence_error_trig = Signal()
o_collision = Signal() o_collision = Signal()
o_busy = Signal() o_busy = Signal()
o_sequence_error = Signal()
self.sync += [ self.sync += [
If(self.async_error.re, If(self.async_error.re,
If(self.async_error.r[0], o_collision.eq(0)), If(self.async_error.r[0], o_collision.eq(0)),
If(self.async_error.r[1], o_busy.eq(0)), If(self.async_error.r[1], o_busy.eq(0)),
If(self.async_error.r[2], o_sequence_error.eq(0)),
), ),
If(o_collision_sync.o, o_collision.eq(1)), If(o_collision_sync.o, o_collision.eq(1)),
If(o_busy_sync.o, o_busy.eq(1)) If(o_busy_sync.o, o_busy.eq(1)),
If(o_sequence_error_trig, o_sequence_error.eq(1))
] ]
self.comb += self.async_error.w.eq(Cat(o_collision, o_busy)) self.comb += self.async_error.w.eq(Cat(o_collision, o_busy, o_sequence_error))
# Outputs/Inputs # Outputs/Inputs
quash_channels = [n for n, c in enumerate(channels) if isinstance(c, LogChannel)] quash_channels = [n for n, c in enumerate(channels) if isinstance(c, LogChannel)]
@ -100,7 +103,8 @@ class Core(Module, AutoCSR):
self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16) self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16)
self.comb += [ self.comb += [
o_collision_sync.i.eq(outputs.collision), o_collision_sync.i.eq(outputs.collision),
o_busy_sync.i.eq(outputs.busy) o_busy_sync.i.eq(outputs.busy),
o_sequence_error_trig.eq(outputs.sequence_error)
] ]
inputs = InputCollector(channels, glbl_fine_ts_width, "async", inputs = InputCollector(channels, glbl_fine_ts_width, "async",

View File

@ -25,8 +25,8 @@ layout = [
("o_data", 512, DIR_M_TO_S), ("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S), ("o_address", 16, DIR_M_TO_S),
# o_status bits: # o_status bits:
# <0:wait> <1:underflow> <2:sequence_error> # <0:wait> <1:underflow>
("o_status", 3, DIR_S_TO_M), ("o_status", 2, DIR_S_TO_M),
# targets may optionally report a pessimistic estimate of the number # targets may optionally report a pessimistic estimate of the number
# of outputs events that can be written without waiting. # of outputs events that can be written without waiting.
("o_buffer_space", 16, DIR_S_TO_M), ("o_buffer_space", 16, DIR_S_TO_M),
@ -61,7 +61,7 @@ class KernelInitiator(Module, AutoCSR):
self.o_data = CSRStorage(512, write_from_dev=True) self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16) self.o_address = CSRStorage(16)
self.o_we = CSR() self.o_we = CSR()
self.o_status = CSRStatus(3) self.o_status = CSRStatus(2)
self.i_data = CSRStatus(32) self.i_data = CSRStatus(32)
self.i_timestamp = CSRStatus(64) self.i_timestamp = CSRStatus(64)

View File

@ -242,9 +242,7 @@ class TimeOffset(Module, AutoCSR):
class CRIMaster(Module, AutoCSR): class CRIMaster(Module, AutoCSR):
def __init__(self): def __init__(self):
self.error_status = CSRStatus(3) # same encoding as RTIO status self.underflow = CSR()
self.error_underflow_reset = CSR()
self.error_sequence_error_reset = CSR()
self.error_channel = CSRStatus(24) self.error_channel = CSRStatus(24)
self.error_timestamp = CSRStatus(64) self.error_timestamp = CSRStatus(64)
@ -256,19 +254,16 @@ class CRIMaster(Module, AutoCSR):
# # # # # #
error_set = Signal(2) underflow_trigger = Signal(2)
for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset]): self.sync += [
# bit 0 is RTIO wait and always 0 here If(underflow_trigger,
bit = i + 1 self.underflow.w.eq(1),
self.sync += [ self.error_channel.status.eq(self.sink.channel),
If(error_set[i], self.error_timestamp.status.eq(self.sink.timestamp),
self.error_status.status[bit].eq(1), self.error_address.status.eq(self.sink.address)
self.error_channel.status.eq(self.sink.channel), ),
self.error_timestamp.status.eq(self.sink.timestamp), If(self.underflow.re, self.underflow.w.eq(0))
self.error_address.status.eq(self.sink.address) ]
),
If(rcsr.re, self.error_status.status[bit].eq(0))
]
self.comb += [ self.comb += [
self.cri.chan_sel.eq(self.sink.channel), self.cri.chan_sel.eq(self.sink.channel),
@ -281,7 +276,7 @@ class CRIMaster(Module, AutoCSR):
self.submodules += fsm self.submodules += fsm
fsm.act("IDLE", fsm.act("IDLE",
If(self.error_status.status == 0, If(~self.underflow.w,
If(self.sink.stb, If(self.sink.stb,
If(self.sink.eop, If(self.sink.eop,
# last packet contains dummy data, discard it # last packet contains dummy data, discard it
@ -306,16 +301,14 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1), self.sink.ack.eq(1),
NextState("IDLE") NextState("IDLE")
), ),
If(self.cri.o_status[1], NextState("UNDERFLOW")), If(self.cri.o_status[1], NextState("UNDERFLOW"))
If(self.cri.o_status[2], NextState("SEQUENCE_ERROR")) )
fsm.act("UNDERFLOW",
self.busy.eq(1),
underflow_trigger.eq(1),
self.sink.ack.eq(1),
NextState("IDLE")
) )
for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR"]):
fsm.act(name,
self.busy.eq(1),
error_set.eq(1 << n),
self.sink.ack.eq(1),
NextState("IDLE")
)
class DMA(Module): class DMA(Module):

View File

@ -61,26 +61,42 @@ class SED(Module):
def cri(self): def cri(self):
return self.lane_dist.cri return self.lane_dist.cri
# in CRI clock domain
@property @property
def minimum_coarse_timestamp(self): def minimum_coarse_timestamp(self):
return self.lane_dist.minimum_coarse_timestamp return self.lane_dist.minimum_coarse_timestamp
# in I/O clock domain
@property @property
def coarse_timestamp(self): def coarse_timestamp(self):
return self.gates.coarse_timestamp return self.gates.coarse_timestamp
# in CRI clock domain
@property
def sequence_error(self):
return self.lane_dist.sequence_error
# in CRI clock domain
@property
def sequence_error_channel(self):
return self.lane_dist.sequence_error_channel
# in I/O clock domain
@property @property
def collision(self): def collision(self):
return self.output_driver.collision return self.output_driver.collision
# in I/O clock domain
@property @property
def collision_channel(self): def collision_channel(self):
return self.output_driver.collision_channel return self.output_driver.collision_channel
# in I/O clock domain
@property @property
def busy(self): def busy(self):
return self.output_driver.busy return self.output_driver.busy
# in I/O clock domain
@property @property
def busy_channel(self): def busy_channel(self):
return self.output_driver.busy_channel return self.output_driver.busy_channel

View File

@ -22,6 +22,8 @@ class LaneDistributor(Module):
if interface is None: if interface is None:
interface = cri.Interface() interface = cri.Interface()
self.cri = interface self.cri = interface
self.sequence_error = Signal()
self.sequence_error_channel = Signal(16)
self.minimum_coarse_timestamp = Signal(64-glbl_fine_ts_width) self.minimum_coarse_timestamp = Signal(64-glbl_fine_ts_width)
self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload)) self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload))
for _ in range(lane_count)] for _ in range(lane_count)]
@ -30,9 +32,7 @@ class LaneDistributor(Module):
o_status_wait = Signal() o_status_wait = Signal()
o_status_underflow = Signal() o_status_underflow = Signal()
o_status_sequence_error = Signal() self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow))
self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow,
o_status_sequence_error))
# internal state # internal state
current_lane = Signal(max=lane_count) current_lane = Signal(max=lane_count)
@ -135,15 +135,13 @@ class LaneDistributor(Module):
] ]
self.sync += [ self.sync += [
If(self.cri.cmd == cri.commands["write"], If(self.cri.cmd == cri.commands["write"],
o_status_underflow.eq(0), o_status_underflow.eq(0)
o_status_sequence_error.eq(0)
), ),
If(do_underflow, If(do_underflow,
o_status_underflow.eq(1) o_status_underflow.eq(1)
), ),
If(do_sequence_error, self.sequence_error.eq(do_sequence_error),
o_status_sequence_error.eq(1) self.sequence_error_channel.eq(self.cri.chan_sel[:16])
)
] ]
# current lane has been full, spread events by switching to the next. # current lane has been full, spread events by switching to the next.

View File

@ -111,8 +111,6 @@ class OutputsTestbench:
status = yield from kcsrs.o_status.read() status = yield from kcsrs.o_status.read()
if status & 2: if status & 2:
raise RTIOUnderflow raise RTIOUnderflow
if status & 4:
raise RTIOSequenceError
yield yield
wlen += 1 wlen += 1
return wlen return wlen

View File

@ -38,7 +38,7 @@ def simulate(input_events, compensation=None, wait=True):
access_status = "ok" access_status = "ok"
if status & 0x02: if status & 0x02:
access_status = "underflow" access_status = "underflow"
if status & 0x04: if (yield dut.sequence_error):
access_status = "sequence_error" access_status = "sequence_error"
access_results.append((access_status, access_time)) access_results.append((access_status, access_time))

View File

@ -56,7 +56,7 @@ def simulate(input_events):
access_status = "ok" access_status = "ok"
if status & 0x02: if status & 0x02:
access_status = "underflow" access_status = "underflow"
if status & 0x04: if (yield dut.sed.sequence_error):
access_status = "sequence_error" access_status = "sequence_error"
access_results.append((access_status, access_time)) access_results.append((access_status, access_time))

View File

@ -401,27 +401,23 @@ class CoredeviceTest(ExperimentCase):
with self.assertRaises(RTIOUnderflow): with self.assertRaises(RTIOUnderflow):
self.execute(Underflow) self.execute(Underflow)
def execute_and_test_in_log(self, experiment, string):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(experiment)
log = mgmt.get_log()
self.assertIn(string, log)
mgmt.close()
def test_sequence_error(self): def test_sequence_error(self):
with self.assertRaises(RTIOSequenceError): self.execute_and_test_in_log(SequenceError, "RTIO sequence error")
self.execute(SequenceError)
def test_collision(self): def test_collision(self):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"] self.execute_and_test_in_log(Collision, "RTIO collision")
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(Collision)
log = mgmt.get_log()
self.assertIn("RTIO collision", log)
mgmt.close()
def test_address_collision(self): def test_address_collision(self):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"] self.execute_and_test_in_log(AddressCollision, "RTIO collision")
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(AddressCollision)
log = mgmt.get_log()
self.assertIn("RTIO collision", log)
mgmt.close()
def test_watchdog(self): def test_watchdog(self):
# watchdog only works on the device # watchdog only works on the device

View File

@ -117,6 +117,22 @@ To track down ``RTIOUnderflows`` in an experiment there are a few approaches:
code. code.
* The :any:`integrated logic analyzer <core-device-rtio-analyzer-tool>` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``). * The :any:`integrated logic analyzer <core-device-rtio-analyzer-tool>` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``).
Sequence errors
---------------
A sequence error happens when the sequence of coarse timestamps cannot be supported by the gateware. For example, there may have been too many timeline rewinds.
Internally, the gateware stores output events in an array of FIFO buffers (the "lanes") and the timestamps in each lane much be strictly increasing. The gateware selects a different lane when an event with a decreasing or equal timestamp is submitted. A sequence error occurs when no appropriate lane can be found.
Notes:
* Strictly increasing timestamps never cause sequence errors.
* Configuring the gateware with more lanes for the RTIO core reduces the frequency of sequence errors.
* Whether a particular sequence of timestamps causes a sequence error or not is fully deterministic (starting from a known RTIO state, e.g. after a reset). Adding a constant offset to the whole sequence does not affect the result.
The offending event is discarded and the RTIO core keeps operating.
This error is reported asynchronously via the core device log: for performance reasons with DRTIO, the CPU does not wait for an error report from the satellite after writing an event. Therefore, it is not possible to raise an exception precisely.
Collisions Collisions
---------- ----------
A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different. A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different.