forked from M-Labs/artiq
1
0
Fork 0

rtio: make sequence errors consistently asychronous

This commit is contained in:
Sebastien Bourdeauducq 2017-09-29 14:40:06 +08:00
parent 4e31e9a9ac
commit 5437f0e3e3
18 changed files with 102 additions and 100 deletions

View File

@ -1,9 +1,9 @@
from artiq.coredevice import exceptions, dds, spi
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError, RTIOOverflow)
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOOverflow)
from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE,
PHASE_MODE_TRACKING)
__all__ = []
__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOOverflow"]
__all__ += ["RTIOUnderflow", "RTIOOverflow"]
__all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE",
"PHASE_MODE_TRACKING"]

View File

@ -27,9 +27,9 @@ class ExceptionType(Enum):
legacy_o_sequence_error_reset = 0b010001
legacy_o_collision_reset = 0b010010
legacy_i_overflow_reset = 0b100000
legacy_o_sequence_error = 0b010101
o_underflow = 0b010100
o_sequence_error = 0b010101
i_overflow = 0b100001

View File

@ -78,13 +78,6 @@ class RTIOUnderflow(Exception):
"""
artiq_builtin = True
class RTIOSequenceError(Exception):
"""Raised when an event is submitted on a given channel with a timestamp
not larger than the previous one.
The offending event is discarded and the RTIO core keeps operating.
"""
artiq_builtin = True
class RTIOOverflow(Exception):
"""Raised when at least one event could not be registered into the RTIO
@ -96,26 +89,32 @@ class RTIOOverflow(Exception):
"""
artiq_builtin = True
class DMAError(Exception):
"""Raised when performing an invalid DMA operation."""
artiq_builtin = True
class DDSError(Exception):
"""Raised when attempting to start a DDS batch while already in a batch,
when too many commands are batched, and when DDS channel settings are
incorrect.
"""
class WatchdogExpired(Exception):
"""Raised when a watchdog expires."""
class ClockFailure(Exception):
"""Raised when RTIO PLL has lost lock."""
class I2CError(Exception):
"""Raised when a I2C transaction fails."""
pass
class SPIError(Exception):
"""Raised when a SPI transaction fails."""
pass

View File

@ -380,22 +380,13 @@ extern fn dma_playback(timestamp: i64, ptr: i32) {
while csr::rtio_dma::enable_read() != 0 {}
csr::cri_con::selected_write(0);
let status = csr::rtio_dma::error_status_read();
if status != 0 {
if csr::rtio_dma::underflow_read() != 0 {
let timestamp = csr::rtio_dma::error_timestamp_read();
let channel = csr::rtio_dma::error_channel_read();
if status & rtio::RTIO_O_STATUS_UNDERFLOW != 0 {
csr::rtio_dma::error_underflow_reset_write(1);
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
if status & rtio::RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
csr::rtio_dma::error_sequence_error_reset_write(1);
raise!("RTIOSequenceError",
"RTIO sequence error at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
csr::rtio_dma::underflow_write(1);
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
}
}

View File

@ -6,7 +6,6 @@ use kernel_proto::*;
pub const RTIO_O_STATUS_WAIT: u8 = 1;
pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2;
pub const RTIO_O_STATUS_SEQUENCE_ERROR: u8 = 4;
pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1;
pub const RTIO_I_STATUS_OVERFLOW: u8 = 2;
pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4;
@ -45,11 +44,6 @@ unsafe fn process_exceptional_status(timestamp: i64, channel: i32, status: u8) {
"RTIO underflow at {0} mu, channel {1}, slack {2} mu",
timestamp, channel as i64, timestamp - get_counter())
}
if status & RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
raise!("RTIOSequenceError",
"RTIO sequence error at {0} mu, channel {1}",
timestamp, channel as i64, 0)
}
}
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {

View File

@ -195,6 +195,9 @@ fn async_error_thread(io: Io) {
if errors & 2 != 0 {
error!("RTIO busy");
}
if errors & 4 != 0 {
error!("RTIO sequence error");
}
csr::rtio_core::async_error_write(errors);
}
}

View File

@ -35,11 +35,9 @@ class RTErrorsSatellite(Module, AutoCSR):
# internal ARTIQ bugs.
underflow = Signal()
overflow = Signal()
sequence_error = Signal()
self.comb += [
underflow.eq(outputs.cri.o_status[1]),
overflow.eq(outputs.cri.o_status[0]),
sequence_error.eq(outputs.cri.o_status[2])
overflow.eq(outputs.cri.o_status[0])
]
error_csr(self.protocol_error,
(rt_packet.unknown_packet_type, False),
@ -48,7 +46,7 @@ class RTErrorsSatellite(Module, AutoCSR):
(overflow, True)
)
error_csr(self.rtio_error,
(sequence_error, True),
(outputs.sequence_error, False),
(outputs.collision, False),
(outputs.busy, False)
)

View File

@ -94,10 +94,6 @@ class MessageEncoder(Module, AutoCSR):
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_underflow.value)
),
If(just_written & cri.o_status[2],
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_sequence_error.value)
),
If(read_overflow,
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.i_overflow.value)

View File

@ -19,7 +19,7 @@ class Core(Module, AutoCSR):
self.cri = cri.Interface()
self.reset = CSR()
self.reset_phy = CSR()
self.async_error = CSR(2)
self.async_error = CSR(3)
# Clocking/Reset
# Create rsys, rio and rio_phy domains based on sys and rtio
@ -75,18 +75,21 @@ class Core(Module, AutoCSR):
o_collision_sync = BlindTransfer()
o_busy_sync = BlindTransfer()
self.submodules += o_collision_sync, o_busy_sync
o_sequence_error_trig = Signal()
o_collision = Signal()
o_busy = Signal()
o_sequence_error = Signal()
self.sync += [
If(self.async_error.re,
If(self.async_error.r[0], o_collision.eq(0)),
If(self.async_error.r[1], o_busy.eq(0)),
If(self.async_error.r[2], o_sequence_error.eq(0)),
),
If(o_collision_sync.o, o_collision.eq(1)),
If(o_busy_sync.o, o_busy.eq(1))
If(o_busy_sync.o, o_busy.eq(1)),
If(o_sequence_error_trig, o_sequence_error.eq(1))
]
self.comb += self.async_error.w.eq(Cat(o_collision, o_busy))
self.comb += self.async_error.w.eq(Cat(o_collision, o_busy, o_sequence_error))
# Outputs/Inputs
quash_channels = [n for n, c in enumerate(channels) if isinstance(c, LogChannel)]
@ -100,7 +103,8 @@ class Core(Module, AutoCSR):
self.sync += outputs.minimum_coarse_timestamp.eq(coarse_ts + 16)
self.comb += [
o_collision_sync.i.eq(outputs.collision),
o_busy_sync.i.eq(outputs.busy)
o_busy_sync.i.eq(outputs.busy),
o_sequence_error_trig.eq(outputs.sequence_error)
]
inputs = InputCollector(channels, glbl_fine_ts_width, "async",

View File

@ -25,8 +25,8 @@ layout = [
("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S),
# o_status bits:
# <0:wait> <1:underflow> <2:sequence_error>
("o_status", 3, DIR_S_TO_M),
# <0:wait> <1:underflow>
("o_status", 2, DIR_S_TO_M),
# targets may optionally report a pessimistic estimate of the number
# of outputs events that can be written without waiting.
("o_buffer_space", 16, DIR_S_TO_M),
@ -61,7 +61,7 @@ class KernelInitiator(Module, AutoCSR):
self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16)
self.o_we = CSR()
self.o_status = CSRStatus(3)
self.o_status = CSRStatus(2)
self.i_data = CSRStatus(32)
self.i_timestamp = CSRStatus(64)

View File

@ -242,9 +242,7 @@ class TimeOffset(Module, AutoCSR):
class CRIMaster(Module, AutoCSR):
def __init__(self):
self.error_status = CSRStatus(3) # same encoding as RTIO status
self.error_underflow_reset = CSR()
self.error_sequence_error_reset = CSR()
self.underflow = CSR()
self.error_channel = CSRStatus(24)
self.error_timestamp = CSRStatus(64)
@ -256,19 +254,16 @@ class CRIMaster(Module, AutoCSR):
# # #
error_set = Signal(2)
for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset]):
# bit 0 is RTIO wait and always 0 here
bit = i + 1
self.sync += [
If(error_set[i],
self.error_status.status[bit].eq(1),
self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address)
),
If(rcsr.re, self.error_status.status[bit].eq(0))
]
underflow_trigger = Signal(2)
self.sync += [
If(underflow_trigger,
self.underflow.w.eq(1),
self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address)
),
If(self.underflow.re, self.underflow.w.eq(0))
]
self.comb += [
self.cri.chan_sel.eq(self.sink.channel),
@ -281,7 +276,7 @@ class CRIMaster(Module, AutoCSR):
self.submodules += fsm
fsm.act("IDLE",
If(self.error_status.status == 0,
If(~self.underflow.w,
If(self.sink.stb,
If(self.sink.eop,
# last packet contains dummy data, discard it
@ -306,16 +301,14 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1),
NextState("IDLE")
),
If(self.cri.o_status[1], NextState("UNDERFLOW")),
If(self.cri.o_status[2], NextState("SEQUENCE_ERROR"))
If(self.cri.o_status[1], NextState("UNDERFLOW"))
)
fsm.act("UNDERFLOW",
self.busy.eq(1),
underflow_trigger.eq(1),
self.sink.ack.eq(1),
NextState("IDLE")
)
for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR"]):
fsm.act(name,
self.busy.eq(1),
error_set.eq(1 << n),
self.sink.ack.eq(1),
NextState("IDLE")
)
class DMA(Module):

View File

@ -61,26 +61,42 @@ class SED(Module):
def cri(self):
return self.lane_dist.cri
# in CRI clock domain
@property
def minimum_coarse_timestamp(self):
return self.lane_dist.minimum_coarse_timestamp
# in I/O clock domain
@property
def coarse_timestamp(self):
return self.gates.coarse_timestamp
# in CRI clock domain
@property
def sequence_error(self):
return self.lane_dist.sequence_error
# in CRI clock domain
@property
def sequence_error_channel(self):
return self.lane_dist.sequence_error_channel
# in I/O clock domain
@property
def collision(self):
return self.output_driver.collision
# in I/O clock domain
@property
def collision_channel(self):
return self.output_driver.collision_channel
# in I/O clock domain
@property
def busy(self):
return self.output_driver.busy
# in I/O clock domain
@property
def busy_channel(self):
return self.output_driver.busy_channel

View File

@ -22,6 +22,8 @@ class LaneDistributor(Module):
if interface is None:
interface = cri.Interface()
self.cri = interface
self.sequence_error = Signal()
self.sequence_error_channel = Signal(16)
self.minimum_coarse_timestamp = Signal(64-glbl_fine_ts_width)
self.output = [Record(layouts.fifo_ingress(seqn_width, layout_payload))
for _ in range(lane_count)]
@ -30,9 +32,7 @@ class LaneDistributor(Module):
o_status_wait = Signal()
o_status_underflow = Signal()
o_status_sequence_error = Signal()
self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow,
o_status_sequence_error))
self.comb += self.cri.o_status.eq(Cat(o_status_wait, o_status_underflow))
# internal state
current_lane = Signal(max=lane_count)
@ -135,15 +135,13 @@ class LaneDistributor(Module):
]
self.sync += [
If(self.cri.cmd == cri.commands["write"],
o_status_underflow.eq(0),
o_status_sequence_error.eq(0)
o_status_underflow.eq(0)
),
If(do_underflow,
o_status_underflow.eq(1)
),
If(do_sequence_error,
o_status_sequence_error.eq(1)
)
self.sequence_error.eq(do_sequence_error),
self.sequence_error_channel.eq(self.cri.chan_sel[:16])
]
# current lane has been full, spread events by switching to the next.

View File

@ -111,8 +111,6 @@ class OutputsTestbench:
status = yield from kcsrs.o_status.read()
if status & 2:
raise RTIOUnderflow
if status & 4:
raise RTIOSequenceError
yield
wlen += 1
return wlen

View File

@ -38,7 +38,7 @@ def simulate(input_events, compensation=None, wait=True):
access_status = "ok"
if status & 0x02:
access_status = "underflow"
if status & 0x04:
if (yield dut.sequence_error):
access_status = "sequence_error"
access_results.append((access_status, access_time))

View File

@ -56,7 +56,7 @@ def simulate(input_events):
access_status = "ok"
if status & 0x02:
access_status = "underflow"
if status & 0x04:
if (yield dut.sed.sequence_error):
access_status = "sequence_error"
access_results.append((access_status, access_time))

View File

@ -401,27 +401,23 @@ class CoredeviceTest(ExperimentCase):
with self.assertRaises(RTIOUnderflow):
self.execute(Underflow)
def execute_and_test_in_log(self, experiment, string):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(experiment)
log = mgmt.get_log()
self.assertIn(string, log)
mgmt.close()
def test_sequence_error(self):
with self.assertRaises(RTIOSequenceError):
self.execute(SequenceError)
self.execute_and_test_in_log(SequenceError, "RTIO sequence error")
def test_collision(self):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(Collision)
log = mgmt.get_log()
self.assertIn("RTIO collision", log)
mgmt.close()
self.execute_and_test_in_log(Collision, "RTIO collision")
def test_address_collision(self):
core_addr = self.device_mgr.get_desc("core")["arguments"]["host"]
mgmt = CommMgmt(core_addr)
mgmt.clear_log()
self.execute(AddressCollision)
log = mgmt.get_log()
self.assertIn("RTIO collision", log)
mgmt.close()
self.execute_and_test_in_log(AddressCollision, "RTIO collision")
def test_watchdog(self):
# watchdog only works on the device

View File

@ -117,6 +117,22 @@ To track down ``RTIOUnderflows`` in an experiment there are a few approaches:
code.
* The :any:`integrated logic analyzer <core-device-rtio-analyzer-tool>` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``).
Sequence errors
---------------
A sequence error happens when the sequence of coarse timestamps cannot be supported by the gateware. For example, there may have been too many timeline rewinds.
Internally, the gateware stores output events in an array of FIFO buffers (the "lanes") and the timestamps in each lane much be strictly increasing. The gateware selects a different lane when an event with a decreasing or equal timestamp is submitted. A sequence error occurs when no appropriate lane can be found.
Notes:
* Strictly increasing timestamps never cause sequence errors.
* Configuring the gateware with more lanes for the RTIO core reduces the frequency of sequence errors.
* Whether a particular sequence of timestamps causes a sequence error or not is fully deterministic (starting from a known RTIO state, e.g. after a reset). Adding a constant offset to the whole sequence does not affect the result.
The offending event is discarded and the RTIO core keeps operating.
This error is reported asynchronously via the core device log: for performance reasons with DRTIO, the CPU does not wait for an error report from the satellite after writing an event. Therefore, it is not possible to raise an exception precisely.
Collisions
----------
A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different.