make collision and busy asynchronous errors, and simplify CPU/gateware handshake for output errors and reads

This commit is contained in:
Sebastien Bourdeauducq 2017-03-27 16:32:23 +08:00
parent 1ee3f96482
commit b74d6fb9ba
14 changed files with 134 additions and 244 deletions

View File

@ -26,6 +26,8 @@ Release notes
* The kc705 gateware target has been renamed kc705_dds.
* ``artiq.coredevice.comm_tcp`` has been renamed ``artiq.coredevice.comm_kernel``,
and ``Comm`` has been renamed ``CommKernel``.
* The "collision" and "busy" RTIO errors are reported through the log instead of
raising exceptions.
2.2

View File

@ -1,11 +1,9 @@
from artiq.coredevice import exceptions, dds, spi
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError,
RTIOCollision, RTIOOverflow, RTIOBusy)
from artiq.coredevice.exceptions import (RTIOUnderflow, RTIOSequenceError, RTIOOverflow)
from artiq.coredevice.dds import (PHASE_MODE_CONTINUOUS, PHASE_MODE_ABSOLUTE,
PHASE_MODE_TRACKING)
__all__ = []
__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOCollision",
"RTIOOverflow", "RTIOBusy"]
__all__ += ["RTIOUnderflow", "RTIOSequenceError", "RTIOOverflow"]
__all__ += ["PHASE_MODE_CONTINUOUS", "PHASE_MODE_ABSOLUTE",
"PHASE_MODE_TRACKING"]

View File

@ -23,12 +23,15 @@ class ExceptionType(Enum):
legacy_reset_falling = 0b000001
legacy_reset_phy = 0b000010
legacy_reset_phy_falling = 0b000011
legacy_o_underflow_reset = 0b010000
legacy_o_sequence_error_reset = 0b010001
legacy_o_collision_reset = 0b010010
legacy_i_overflow_reset = 0b100000
o_underflow_reset = 0b010000
o_sequence_error_reset = 0b010001
o_collision_reset = 0b010010
o_underflow = 0b010100
o_sequence_error = 0b010101
i_overflow_reset = 0b100000
i_overflow = 0b100001
def get_analyzer_dump(host, port=1382):

View File

@ -86,30 +86,6 @@ class RTIOSequenceError(Exception):
"""
artiq_builtin = True
class RTIOCollision(Exception):
"""Raised when an event is submitted on a given channel with the same
coarse timestamp as the previous one but with a different fine timestamp.
Coarse timestamps correspond to the RTIO system clock (typically around
125MHz) whereas fine timestamps correspond to the RTIO SERDES clock
(typically around 1GHz).
The offending event is discarded and the RTIO core keeps operating.
"""
artiq_builtin = True
class RTIOBusy(Exception):
"""Raised when at least one output event could not be executed because
the given channel was already busy executing a previous event.
This exception is raised late: after the error condition occurred. More
specifically it is raised on submitting an event on the same channel after
the execution of the faulty event was attempted.
The offending event was discarded.
"""
artiq_builtin = True
class RTIOOverflow(Exception):
"""Raised when at least one event could not be registered into the RTIO
input FIFO because it was full (CPU not reading fast enough).

View File

@ -359,18 +359,6 @@ extern fn dma_playback(timestamp: i64, name: CSlice<u8>) {
"RTIO sequence error at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
if status & rtio::RTIO_O_STATUS_COLLISION != 0 {
csr::rtio_dma::error_collision_reset_write(1);
raise!("RTIOCollision",
"RTIO collision at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
}
if status & rtio::RTIO_O_STATUS_BUSY != 0 {
csr::rtio_dma::error_busy_reset_write(1);
raise!("RTIOBusy",
"RTIO busy on channel {0}",
channel as i64, 0, 0)
}
}
}

View File

@ -7,8 +7,6 @@ use kernel_proto::*;
pub const RTIO_O_STATUS_WAIT: u32 = 1;
pub const RTIO_O_STATUS_UNDERFLOW: u32 = 2;
pub const RTIO_O_STATUS_SEQUENCE_ERROR: u32 = 4;
pub const RTIO_O_STATUS_COLLISION: u32 = 8;
pub const RTIO_O_STATUS_BUSY: u32 = 16;
pub const RTIO_I_STATUS_WAIT_EVENT: u32 = 1;
pub const RTIO_I_STATUS_OVERFLOW: u32 = 2;
pub const RTIO_I_STATUS_WAIT_STATUS: u32 = 4;
@ -43,29 +41,15 @@ unsafe fn process_exceptional_status(timestamp: i64, channel: i32, status: u32)
while csr::rtio::o_status_read() & RTIO_O_STATUS_WAIT != 0 {}
}
if status & RTIO_O_STATUS_UNDERFLOW != 0 {
csr::rtio::o_underflow_reset_write(1);
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}, slack {2} mu",
timestamp, channel as i64, timestamp - get_counter())
}
if status & RTIO_O_STATUS_SEQUENCE_ERROR != 0 {
csr::rtio::o_sequence_error_reset_write(1);
raise!("RTIOSequenceError",
"RTIO sequence error at {0} mu, channel {1}",
timestamp, channel as i64, 0)
}
if status & RTIO_O_STATUS_COLLISION != 0 {
csr::rtio::o_collision_reset_write(1);
raise!("RTIOCollision",
"RTIO collision at {0} mu, channel {1}",
timestamp, channel as i64, 0)
}
if status & RTIO_O_STATUS_BUSY != 0 {
csr::rtio::o_busy_reset_write(1);
raise!("RTIOBusy",
"RTIO busy on channel {0}",
channel as i64, 0, 0)
}
}
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {
@ -118,9 +102,7 @@ pub extern fn input_timestamp(timeout: i64, channel: i32) -> u64 {
return !0
}
let timestamp = csr::rtio::i_timestamp_read();
csr::rtio::i_re_write(1);
timestamp
csr::rtio::i_timestamp_read()
}
}
@ -142,9 +124,7 @@ pub extern fn input_data(channel: i32) -> i32 {
channel as i64, 0, 0);
}
let data = rtio_i_data_read(0);
csr::rtio::i_re_write(1);
data as i32
rtio_i_data_read(0) as i32
}
}

View File

@ -127,8 +127,10 @@ class RTController(Module):
o_sequence_error_set = Signal()
o_underflow_set = Signal()
self.sync.sys_with_rst += [
If(self.cri.cmd == cri.commands["o_underflow_reset"], o_status_underflow.eq(0)),
If(self.cri.cmd == cri.commands["o_sequence_error_reset"], o_status_sequence_error.eq(0)),
If(self.cri.cmd == cri.commands["write"],
o_status_underflow.eq(0),
o_status_sequence_error.eq(0),
),
If(o_underflow_set, o_status_underflow.eq(1)),
If(o_sequence_error_set, o_status_sequence_error.eq(1))
]
@ -184,7 +186,7 @@ class RTController(Module):
NextState("WRITE")
)
),
If(self.cri.cmd == cri.commands["read_request"], NextState("READ")),
If(self.cri.cmd == cri.commands["read"], NextState("READ")),
If(self.csrs.o_get_fifo_space.re, NextState("GET_FIFO_SPACE"))
)
fsm.act("WRITE",

View File

@ -51,6 +51,17 @@ class MessageEncoder(Module, AutoCSR):
# # #
read_wait_event = cri.i_status[2]
read_wait_event_r = Signal()
read_done = Signal()
read_overflow = Signal()
self.sync += read_wait_event_r.eq(read_wait_event)
self.comb += \
If(read_wait_event_r & ~read_wait_event,
If(~cri.i_status[0], read_done.eq(1)),
If(cri.i_status[1], read_overflow.eq(1))
)
input_output_stb = Signal()
input_output = Record(input_output_layout)
self.comb += [
@ -66,8 +77,7 @@ class MessageEncoder(Module, AutoCSR):
input_output.timestamp.eq(cri.i_timestamp),
input_output.data.eq(cri.i_data)
),
input_output_stb.eq((cri.cmd == cri_commands["write"]) |
(cri.cmd == cri_commands["read"]))
input_output_stb.eq((cri.cmd == cri_commands["write"]) | read_done)
]
exception_stb = Signal()
@ -77,14 +87,22 @@ class MessageEncoder(Module, AutoCSR):
exception.channel.eq(cri.chan_sel),
exception.rtio_counter.eq(cri.counter),
]
for ename in ("o_underflow_reset", "o_sequence_error_reset",
"o_collision_reset", "i_overflow_reset"):
self.comb += \
If(cri.cmd == cri_commands[ename],
exception_stb.eq(1),
exception.exception_type.eq(
getattr(ExceptionType, ename).value)
)
just_written = Signal()
self.sync += just_written.eq(cri.cmd == cri_commands["write"])
self.comb += [
If(just_written & cri.o_status[1],
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_underflow.value)
),
If(just_written & cri.o_status[2],
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.o_sequence_error.value)
),
If(read_overflow,
exception_stb.eq(1),
exception.exception_type.eq(ExceptionType.i_overflow.value)
)
]
stopped = Record(stopped_layout)
self.comb += [

View File

@ -290,6 +290,7 @@ class Core(Module, AutoCSR):
self.cri = cri.Interface()
self.reset = CSR()
self.reset_phy = CSR()
self.async_error = CSR(2)
self.comb += self.cri.arb_gnt.eq(1)
# Clocking/Reset
@ -321,11 +322,31 @@ class Core(Module, AutoCSR):
# Managers
self.submodules.counter = RTIOCounter(len(self.cri.timestamp) - fine_ts_width)
i_datas, i_timestamps = [], []
# Collision is not an asynchronous error with local RTIO, but
# we treat it as such for consistency with DRTIO, where collisions
# are reported by the satellites.
o_underflow = Signal()
o_sequence_error = Signal()
o_collision = Signal()
o_busy = Signal()
self.sync.rsys += [
If(self.cri.cmd == cri.commands["write"],
o_underflow.eq(0),
o_sequence_error.eq(0),
),
If(self.async_error.re,
If(self.async_error.r[0], o_collision.eq(0)),
If(self.async_error.r[1], o_busy.eq(0)),
)
]
o_statuses, i_statuses = [], []
i_datas, i_timestamps = [], []
i_ack = Signal()
sel = self.cri.chan_sel[:16]
for n, channel in enumerate(channels):
if isinstance(channel, LogChannel):
o_statuses.append(1)
i_datas.append(0)
i_timestamps.append(0)
i_statuses.append(0)
@ -347,29 +368,13 @@ class Core(Module, AutoCSR):
self.comb += o_manager.we.eq(selected & (self.cri.cmd == cri.commands["write"]))
underflow = Signal()
sequence_error = Signal()
collision = Signal()
busy = Signal()
self.sync.rsys += [
If(self.cri.cmd == cri.commands["o_underflow_reset"],
underflow.eq(0)),
If(self.cri.cmd == cri.commands["o_sequence_error_reset"],
sequence_error.eq(0)),
If(self.cri.cmd == cri.commands["o_collision_reset"],
collision.eq(0)),
If(self.cri.cmd == cri.commands["o_busy_reset"],
busy.eq(0)),
If(o_manager.underflow, underflow.eq(1)),
If(o_manager.sequence_error, sequence_error.eq(1)),
If(o_manager.collision, collision.eq(1)),
If(o_manager.busy, busy.eq(1))
If(o_manager.underflow, o_underflow.eq(1)),
If(o_manager.sequence_error, o_sequence_error.eq(1)),
If(o_manager.collision, o_collision.eq(1)),
If(o_manager.busy, o_busy.eq(1))
]
o_statuses.append(Cat(~o_manager.writable,
underflow,
sequence_error,
collision,
busy))
o_statuses.append(o_manager.writable)
if channel.interface.i is not None:
i_manager = _InputManager(channel.interface.i, self.counter,
@ -386,42 +391,49 @@ class Core(Module, AutoCSR):
else:
i_timestamps.append(0)
self.comb += i_manager.re.eq(selected & (self.cri.cmd == cri.commands["read"]))
overflow = Signal()
self.sync.rsys += [
If(selected & (self.cri.cmd == cri.commands["i_overflow_reset"]),
If(selected & i_ack,
overflow.eq(0)),
If(i_manager.overflow,
overflow.eq(1))
]
i_statuses.append(Cat(i_manager.readable, overflow))
self.comb += i_manager.re.eq(selected & i_ack & ~overflow)
i_statuses.append(Cat(i_manager.readable & ~overflow, overflow))
else:
i_datas.append(0)
i_timestamps.append(0)
i_statuses.append(0)
i_status_raw = Signal(2)
self.sync.rsys += i_status_raw.eq(Array(i_statuses)[sel])
o_status_raw = Signal()
self.comb += [
o_status_raw.eq(Array(o_statuses)[sel]),
self.cri.o_status.eq(Cat(
~o_status_raw, o_underflow, o_sequence_error)),
self.async_error.w.eq(Cat(o_collision, o_busy))
]
i_status_raw = Signal(2)
self.comb += i_status_raw.eq(Array(i_statuses)[sel])
input_timeout = Signal.like(self.cri.timestamp)
input_pending = Signal()
self.sync.rsys += [
i_ack.eq(0),
If(i_ack,
self.cri.i_status.eq(Cat(~i_status_raw[0], i_status_raw[1], 0)),
self.cri.i_data.eq(Array(i_datas)[sel]),
self.cri.i_timestamp.eq(Array(i_timestamps)[sel]),
),
If((self.cri.counter >= input_timeout) | (i_status_raw != 0),
If(input_pending, i_ack.eq(1)),
input_pending.eq(0)
),
If(self.cri.cmd == cri.commands["read_request"],
If(self.cri.cmd == cri.commands["read"],
input_timeout.eq(self.cri.timestamp),
input_pending.eq(1)
input_pending.eq(1),
self.cri.i_status.eq(0b100)
)
]
self.comb += [
self.cri.i_data.eq(Array(i_datas)[sel]),
self.cri.i_timestamp.eq(Array(i_timestamps)[sel]),
self.cri.o_status.eq(Array(o_statuses)[sel]),
self.cri.i_status.eq(Cat(~i_status_raw[0], i_status_raw[1], input_pending)),
self.cri.counter.eq(self.counter.value_sys << fine_ts_width)
]
self.comb += self.cri.counter.eq(self.counter.value_sys << fine_ts_width)

View File

@ -12,15 +12,7 @@ commands = {
"write": 1,
# i_status should have the "wait for status" bit set until
# an event is available, or timestamp is reached.
"read_request": 2,
# consume the read event
"read": 3,
"o_underflow_reset": 4,
"o_sequence_error_reset": 5,
"o_collision_reset": 6,
"o_busy_reset": 7,
"i_overflow_reset": 8
"read": 2
}
@ -28,7 +20,7 @@ layout = [
("arb_req", 1, DIR_M_TO_S),
("arb_gnt", 1, DIR_S_TO_M),
("cmd", 4, DIR_M_TO_S),
("cmd", 2, DIR_M_TO_S),
# 8 MSBs of chan_sel are used to select core
("chan_sel", 24, DIR_M_TO_S),
("timestamp", 64, DIR_M_TO_S),
@ -36,13 +28,14 @@ layout = [
("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S),
# o_status bits:
# <0:wait> <1:underflow> <2:sequence_error> <3:collision> <4:busy>
("o_status", 5, DIR_S_TO_M),
# <0:wait> <1:underflow> <2:sequence_error>
("o_status", 3, DIR_S_TO_M),
("i_data", 32, DIR_S_TO_M),
("i_timestamp", 64, DIR_S_TO_M),
# i_status bits:
# <0:wait for event (command timeout)> <1:overflow> <2:wait for status>
# <0> and <1> are mutually exclusive. <1> has higher priority.
("i_status", 3, DIR_S_TO_M),
("counter", 64, DIR_S_TO_M)
@ -66,16 +59,11 @@ class KernelInitiator(Module, AutoCSR):
self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16)
self.o_we = CSR()
self.o_status = CSRStatus(5)
self.o_underflow_reset = CSR()
self.o_sequence_error_reset = CSR()
self.o_collision_reset = CSR()
self.o_busy_reset = CSR()
self.o_status = CSRStatus(3)
self.i_data = CSRStatus(32)
self.i_timestamp = CSRStatus(64)
self.i_request = CSR()
self.i_re = CSR()
self.i_status = CSRStatus(3)
self.i_overflow_reset = CSR()
@ -94,13 +82,7 @@ class KernelInitiator(Module, AutoCSR):
self.cri.cmd.eq(commands["nop"]),
If(self.o_we.re, self.cri.cmd.eq(commands["write"])),
If(self.i_request.re, self.cri.cmd.eq(commands["read_request"])),
If(self.i_re.re, self.cri.cmd.eq(commands["read"])),
If(self.o_underflow_reset.re, self.cri.cmd.eq(commands["o_underflow_reset"])),
If(self.o_sequence_error_reset.re, self.cri.cmd.eq(commands["o_sequence_error_reset"])),
If(self.o_collision_reset.re, self.cri.cmd.eq(commands["o_collision_reset"])),
If(self.o_busy_reset.re, self.cri.cmd.eq(commands["o_busy_reset"])),
If(self.i_overflow_reset.re, self.cri.cmd.eq(commands["i_overflow_reset"])),
If(self.i_request.re, self.cri.cmd.eq(commands["read"])),
self.cri.chan_sel.eq(self.chan_sel.storage),
self.cri.timestamp.eq(self.timestamp.storage),

View File

@ -247,11 +247,9 @@ class CRIMaster(Module, AutoCSR):
self.arb_req = CSRStorage()
self.arb_gnt = CSRStatus()
self.error_status = CSRStatus(5) # same encoding as RTIO status
self.error_status = CSRStatus(3) # same encoding as RTIO status
self.error_underflow_reset = CSR()
self.error_sequence_error_reset = CSR()
self.error_collision_reset = CSR()
self.error_busy_reset = CSR()
self.error_channel = CSRStatus(24)
self.error_timestamp = CSRStatus(64)
@ -268,9 +266,8 @@ class CRIMaster(Module, AutoCSR):
self.arb_gnt.status.eq(self.cri.arb_gnt)
]
error_set = Signal(4)
for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset,
self.error_collision_reset, self.error_busy_reset]):
error_set = Signal(2)
for i, rcsr in enumerate([self.error_underflow_reset, self.error_sequence_error_reset]):
# bit 0 is RTIO wait and always 0 here
bit = i + 1
self.sync += [
@ -320,16 +317,12 @@ class CRIMaster(Module, AutoCSR):
NextState("IDLE")
),
If(self.cri.o_status[1], NextState("UNDERFLOW")),
If(self.cri.o_status[2], NextState("SEQUENCE_ERROR")),
If(self.cri.o_status[3], NextState("COLLISION")),
If(self.cri.o_status[4], NextState("BUSY"))
If(self.cri.o_status[2], NextState("SEQUENCE_ERROR"))
)
for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR",
"COLLISION", "BUSY"]):
for n, name in enumerate(["UNDERFLOW", "SEQUENCE_ERROR"]):
fsm.act(name,
self.busy.eq(1),
error_set.eq(1 << n),
self.cri.cmd.eq(cri.commands["o_" + name.lower() + "_reset"]),
self.sink.ack.eq(1),
NextState("IDLE")
)

View File

@ -384,13 +384,14 @@ class CoredeviceTest(ExperimentCase):
with self.assertRaises(RTIOSequenceError):
self.execute(SequenceError)
def test_collision(self):
with self.assertRaises(RTIOCollision):
self.execute(Collision)
# TODO: capture core device log
# def test_collision(self):
# with self.assertRaises(RTIOCollision):
# self.execute(Collision)
def test_address_collision(self):
with self.assertRaises(RTIOCollision):
self.execute(AddressCollision)
# def test_address_collision(self):
# with self.assertRaises(RTIOCollision):
# self.execute(AddressCollision)
def test_watchdog(self):
# watchdog only works on the device

View File

@ -1,84 +0,0 @@
from artiq.experiment import *
from artiq.test.hardware_testbench import ExperimentCase
class WrongError(Exception):
pass
class Collision(EnvExperiment):
def build(self):
self.setattr_device("core")
self.setattr_device("spi0")
@kernel
def run(self):
self.core.reset()
t = now_mu()
try:
self.spi0.set_config_mu()
except RTIOBusy:
raise WrongError()
at_mu(t)
self.spi0.set_config_mu()
class Busy(EnvExperiment):
def build(self):
self.setattr_device("core")
self.setattr_device("spi0")
self.setattr_device("led")
@kernel
def run(self):
self.core.reset()
try:
self.spi0.set_config_mu()
t = now_mu()
self.spi0.set_config_mu()
at_mu(t + self.spi0.ref_period_mu)
self.spi0.set_config_mu() # causes the error
self.led.on()
self.led.sync() # registers the error
self.core.break_realtime()
except RTIOBusy:
raise WrongError() # we don't expect RTIOBusy so far
self.spi0.set_config_mu() # raises the error
class DrainErrors(EnvExperiment):
def build(self):
self.setattr_device("core")
self.setattr_device("spi0")
self.setattr_device("led")
@kernel
def run(self):
self.core.reset()
while True:
try:
self.core.break_realtime()
delay(100*us)
self.spi0.set_config_mu()
self.led.on()
self.led.sync()
self.core.break_realtime()
self.spi0.set_config_mu()
self.led.off()
return
except:
pass
class SPITest(ExperimentCase):
def tearDown(self):
self.execute(DrainErrors)
ExperimentCase.tearDown(self)
def test_collision(self):
with self.assertRaises(RTIOCollision):
self.execute(Collision)
def test_busy(self):
with self.assertRaises(RTIOBusy):
self.execute(Busy)

View File

@ -117,6 +117,25 @@ To track down ``RTIOUnderflows`` in an experiment there are a few approaches:
code.
* The :any:`integrated logic analyzer <core-device-rtio-analyzer-tool>` shows the timeline context that lead to the exception. The analyzer is always active and supports plotting of RTIO slack. RTIO slack is the difference between timeline cursor and wall clock time (``now - rtio_counter``).
Collisions
----------
A collision happens when more than one event is submitted on a given channel with the same coarse timestamp, and that channel does not implement replacement behavior or the fine timestamps are different.
Coarse timestamps correspond to the RTIO system clock (typically around 125MHz) whereas fine timestamps correspond to the RTIO SERDES clock (typically around 1GHz). Different channels may have different ratios between the coarse and fine timestamp clock frequencies.
The offending event is discarded and the RTIO core keeps operating.
This error is reported asynchronously via the core device log: for performance reasons with DRTIO, the CPU does not wait for an error report from the satellite after writing an event. Therefore, it is not possible to raise an exception precisely.
Busy errors
-----------
A busy error happens when at least one output event could not be executed because the channel was already busy executing a previous event.
The offending event was discarded.
This error is reported asynchronously via the core device log.
Input channels and events
-------------------------