drtio: report satellite errors through firmware

This commit is contained in:
Sebastien Bourdeauducq 2017-04-01 12:18:00 +08:00
parent 8b98e1ea6d
commit 0a687b7902
10 changed files with 143 additions and 138 deletions

View File

@ -144,37 +144,23 @@ pub mod drtio {
}
}
// keep this in sync with error_codes in rt_packets.py
fn str_packet_error(err_code: u8) -> &'static str {
match err_code {
0 => "Received packet of an unknown type",
1 => "Satellite reported reception of a packet of an unknown type",
2 => "Received truncated packet",
3 => "Satellite reported reception of a truncated packet",
4 => "Satellite reported write overflow",
5 => "Satellite reported write underflow",
_ => "Unknown error code"
}
}
fn poll_errors() -> bool {
unsafe {
if csr::drtio::packet_err_present_read() != 0 {
let err_code = csr::drtio::packet_err_code_read();
error!("packet error {} ({})", err_code, str_packet_error(err_code));
csr::drtio::packet_err_present_write(1)
}
if csr::drtio::o_fifo_space_timeout_read() != 0 {
error!("timeout attempting to get remote FIFO space");
csr::drtio::o_fifo_space_timeout_write(1)
}
}
false
}
pub fn error_thread(io: Io) {
// HACK
io.until(poll_errors).unwrap();
loop {
unsafe {
io.until(|| csr::drtio::protocol_error_read() != 0).unwrap();
let errors = csr::drtio::protocol_error_read();
if errors & 1 != 0 {
error!("received packet of an unknown type");
}
if errors & 2 != 0 {
error!("received truncated packet");
}
if errors & 4 != 0 {
error!("timeout attempting to get remote FIFO space");
}
csr::drtio::protocol_error_write(errors);
}
}
}
}

View File

@ -69,6 +69,27 @@ fn process_aux_packets() {
}
fn process_errors() {
let errors;
unsafe {
errors = board::csr::drtio::protocol_error_read();
board::csr::drtio::protocol_error_write(errors);
}
if errors & 1 != 0 {
error!("received packet of an unknown type");
}
if errors & 2 != 0 {
error!("received truncated packet");
}
if errors & 4 != 0 {
error!("write underflow");
}
if errors & 8 != 0 {
error!("write overflow");
}
}
#[cfg(rtio_frequency = "62.5")]
const SI5324_SETTINGS: board::si5324::FrequencySettings
= board::si5324::FrequencySettings {
@ -111,10 +132,13 @@ fn startup() {
board::si5324::setup(&SI5324_SETTINGS).expect("cannot initialize si5324");
loop {
while !drtio_link_is_up() {}
while !drtio_link_is_up() {
process_errors();
}
info!("link is up, switching to recovered clock");
board::si5324::select_ext_input(true).expect("failed to switch clocks");
while drtio_link_is_up() {
process_errors();
process_aux_packets();
}
info!("link is down, switching to local crystal clock");

View File

@ -4,7 +4,8 @@ from migen import *
from migen.genlib.cdc import ElasticBuffer
from artiq.gateware.drtio import (link_layer, aux_controller,
rt_packet_satellite, rt_ios_satellite,
rt_packet_satellite, rt_ios_satellite,
rt_errors_satellite,
rt_packet_master, rt_controller_master)
@ -63,6 +64,9 @@ class DRTIOSatellite(Module):
self.submodules.ios = rt_ios_satellite.IOS(
self.rt_packet, channels, fine_ts_width, full_ts_width)
self.submodules.rt_errors = rt_errors_satellite.RTErrorsSatellite(
self.rt_packet, self.ios)
self.clock_domains.cd_rio = ClockDomain()
self.clock_domains.cd_rio_phy = ClockDomain()
self.comb += [
@ -77,7 +81,7 @@ class DRTIOSatellite(Module):
def get_csrs(self):
return (self.link_layer.get_csrs() + self.link_stats.get_csrs() +
self.aux_controller.get_csrs())
self.rt_errors.get_csrs() + self.aux_controller.get_csrs())
class DRTIOMaster(Module):

View File

@ -13,6 +13,8 @@ from artiq.gateware.rtio import cri
class _CSRs(AutoCSR):
def __init__(self):
self.protocol_error = CSR(3)
self.chan_sel_override = CSRStorage(16)
self.chan_sel_override_en = CSRStorage()
@ -29,7 +31,6 @@ class _CSRs(AutoCSR):
self.o_dbg_fifo_space_req_cnt = CSRStatus(32)
self.o_reset_channel_status = CSR()
self.o_wait = CSRStatus()
self.o_fifo_space_timeout = CSR()
class RTController(Module):
@ -38,6 +39,24 @@ class RTController(Module):
self.cri = cri.Interface()
self.comb += self.cri.arb_gnt.eq(1)
# protocol errors
err_unknown_packet_type = Signal()
err_packet_truncated = Signal()
signal_fifo_space_timeout = Signal()
err_fifo_space_timeout = Signal()
self.sync.sys_with_rst += [
If(self.csrs.protocol_error.re,
If(self.csrs.protocol_error.r[0], err_unknown_packet_type.eq(0)),
If(self.csrs.protocol_error.r[1], err_packet_truncated.eq(0)),
If(self.csrs.protocol_error.r[2], err_fifo_space_timeout.eq(0))
),
If(rt_packet.err_unknown_packet_type, err_unknown_packet_type.eq(1)),
If(rt_packet.err_packet_truncated, err_packet_truncated.eq(1)),
If(signal_fifo_space_timeout, err_fifo_space_timeout.eq(1))
]
self.comb += self.csrs.protocol_error.w.eq(
Cat(err_unknown_packet_type, err_packet_truncated, err_fifo_space_timeout))
# channel selection
chan_sel = Signal(16)
self.comb += chan_sel.eq(
@ -135,11 +154,6 @@ class RTController(Module):
If(o_sequence_error_set, o_status_sequence_error.eq(1))
]
signal_fifo_space_timeout = Signal()
self.sync.sys_with_rst += [
If(self.csrs.o_fifo_space_timeout.re, self.csrs.o_fifo_space_timeout.w.eq(0)),
If(signal_fifo_space_timeout, self.csrs.o_fifo_space_timeout.w.eq(1))
]
timeout_counter = WaitTimer(8191)
self.submodules += timeout_counter
@ -273,9 +287,6 @@ class RTManager(Module, AutoCSR):
def __init__(self, rt_packet):
self.request_echo = CSR()
self.packet_err_present = CSR()
self.packet_err_code = CSRStatus(8)
self.update_packet_cnt = CSR()
self.packet_cnt_tx = CSRStatus(32)
self.packet_cnt_rx = CSRStatus(32)
@ -288,12 +299,6 @@ class RTManager(Module, AutoCSR):
If(self.request_echo.re, rt_packet.echo_stb.eq(1))
]
self.comb += [
self.packet_err_present.w.eq(rt_packet.error_not),
rt_packet.error_not_ack.eq(self.packet_err_present.re),
self.packet_err_code.status.eq(rt_packet.error_code)
]
self.sync += \
If(self.update_packet_cnt.re,
self.packet_cnt_tx.status.eq(rt_packet.packet_cnt_tx),

View File

@ -0,0 +1,36 @@
"""Protocol error reporting for satellites."""
from migen import *
from migen.genlib.cdc import PulseSynchronizer
from misoc.interconnect.csr import *
class RTErrorsSatellite(Module, AutoCSR):
def __init__(self, rt_packet, ios):
self.protocol_error = CSR(4)
# The master is normally responsible for avoiding output overflows and
# output underflows.
# Error reports here are only for diagnosing internal ARTIQ bugs.
unknown_packet_type = Signal()
packet_truncated = Signal()
write_overflow = Signal()
write_underflow = Signal()
self.comb += self.protocol_error.w.eq(
Cat(unknown_packet_type, packet_truncated,
write_underflow, write_overflow))
for n, (target, source) in enumerate([
(unknown_packet_type, rt_packet.unknown_packet_type),
(packet_truncated, rt_packet.packet_truncated),
(write_underflow, ios.write_underflow),
(write_overflow, ios.write_overflow)]):
ps = PulseSynchronizer("rtio", "sys")
self.submodules += ps
self.comb += ps.i.eq(source)
self.sync += [
If(self.protocol_error.re & self.protocol_error.r[n], target.eq(0)),
If(ps.o, target.eq(1))
]

View File

@ -9,6 +9,9 @@ from artiq.gateware.rtio import rtlink
class IOS(Module):
def __init__(self, rt_packet, channels, max_fine_ts_width, full_ts_width):
self.write_underflow = Signal()
self.write_overflow = Signal()
self.rt_packet = rt_packet
self.max_fine_ts_width = max_fine_ts_width
@ -21,6 +24,10 @@ class IOS(Module):
)
self.comb += rt_packet.tsc_input.eq(self.tsc)
self.sync.rio += [
self.write_underflow.eq(0),
self.write_overflow.eq(0)
]
for n, channel in enumerate(channels):
self.add_output(n, channel)
self.add_input(n, channel)
@ -70,15 +77,11 @@ class IOS(Module):
self.comb += fifo.we.eq(rt_packet.write_stb
& (rt_packet.write_channel == n))
self.sync.rio += [
If(rt_packet.write_overflow_ack,
rt_packet.write_overflow.eq(0)),
If(rt_packet.write_underflow_ack,
rt_packet.write_underflow.eq(0)),
If(fifo.we,
If(~fifo.writable, rt_packet.write_overflow.eq(1)),
If(rt_packet.write_timestamp[max_fine_ts_width:] < (tsc_comp + 4),
rt_packet.write_underflow.eq(1)
)
self.write_underflow.eq(1)
),
If(~fifo.writable, self.write_overflow.eq(1))
)
]
if data_width:

View File

@ -116,10 +116,9 @@ class RTPacketMaster(Module):
self.reset_ack = Signal()
self.reset_phy = Signal()
# errors
self.error_not = Signal()
self.error_not_ack = Signal()
self.error_code = Signal(8)
# rx errors
self.err_unknown_packet_type = Signal()
self.err_packet_truncated = Signal()
# packet counters
self.packet_cnt_tx = Signal(32)
@ -235,12 +234,6 @@ class RTPacketMaster(Module):
self.echo_stb, self.echo_ack, None,
echo_stb, echo_ack, None)
error_not = Signal()
error_code = Signal(8)
self.submodules += _CrossDomainNotification("rtio_rx",
error_not, error_code,
self.error_not, self.error_not_ack, self.error_code)
read_not = Signal()
read_no_event = Signal()
read_is_overflow = Signal()
@ -259,6 +252,14 @@ class RTPacketMaster(Module):
read_timestamp.eq(rx_dp.packet_as["read_reply"].timestamp)
]
err_unknown_packet_type = PulseSynchronizer("rtio_rx", "sys")
err_packet_truncated = PulseSynchronizer("rtio_rx", "sys")
self.submodules += err_unknown_packet_type, err_packet_truncated
self.comb += [
self.err_unknown_packet_type.eq(err_unknown_packet_type.o),
self.err_packet_truncated.eq(err_packet_truncated.o)
]
# TX FSM
tx_fsm = ClockDomainsRenamer("rtio")(FSM(reset_state="IDLE"))
self.submodules += tx_fsm
@ -367,30 +368,20 @@ class RTPacketMaster(Module):
rx_dp.packet_buffer_load.eq(1),
If(rx_dp.packet_last,
Case(rx_dp.packet_type, {
rx_plm.types["error"]: NextState("ERROR"),
rx_plm.types["echo_reply"]: echo_received_now.eq(1),
rx_plm.types["fifo_space_reply"]: NextState("FIFO_SPACE"),
rx_plm.types["read_reply"]: NextState("READ_REPLY"),
rx_plm.types["read_reply_noevent"]: NextState("READ_REPLY_NOEVENT"),
"default": [
error_not.eq(1),
error_code.eq(error_codes["unknown_type_local"])
]
"default": err_unknown_packet_type.i.eq(1)
})
).Else(
ongoing_packet_next.eq(1)
)
),
If(~rx_dp.frame_r & ongoing_packet,
error_not.eq(1),
error_code.eq(error_codes["truncated_local"])
err_packet_truncated.i.eq(1)
)
)
rx_fsm.act("ERROR",
error_not.eq(1),
error_code.eq(rx_dp.packet_as["error"].code),
NextState("INPUT")
)
rx_fsm.act("FIFO_SPACE",
fifo_space_not.eq(1),
fifo_space.eq(rx_dp.packet_as["fifo_space_reply"].space),

View File

@ -8,6 +8,9 @@ from artiq.gateware.drtio.rt_serializer import *
class RTPacketSatellite(Module):
def __init__(self, link_layer):
self.unknown_packet_type = Signal()
self.packet_truncated = Signal()
self.tsc_load = Signal()
self.tsc_load_value = Signal(64)
self.tsc_input = Signal(64)
@ -25,9 +28,7 @@ class RTPacketSatellite(Module):
self.write_address = Signal(16)
self.write_data = Signal(512)
self.write_overflow = Signal()
self.write_overflow_ack = Signal()
self.write_underflow = Signal()
self.write_underflow_ack = Signal()
self.read_channel = Signal(16)
self.read_readable = Signal()
@ -68,19 +69,13 @@ class RTPacketSatellite(Module):
# RX->TX
echo_req = Signal()
err_set = Signal()
err_req = Signal()
err_ack = Signal()
fifo_space_set = Signal()
fifo_space_req = Signal()
fifo_space_ack = Signal()
self.sync += [
If(err_ack, err_req.eq(0)),
If(err_set, err_req.eq(1)),
If(fifo_space_ack, fifo_space_req.eq(0)),
If(fifo_space_set, fifo_space_req.eq(1)),
]
err_code = Signal(max=len(error_codes)+1)
# RX FSM
self.comb += [
@ -145,16 +140,13 @@ class RTPacketSatellite(Module):
rx_plm.types["write"]: NextState("WRITE"),
rx_plm.types["fifo_space_request"]: NextState("FIFO_SPACE"),
rx_plm.types["read_request"]: NextState("READ_REQUEST"),
"default": [
err_set.eq(1),
NextValue(err_code, error_codes["unknown_type_remote"])]
"default": self.unknown_packet_type.eq(1)
})
).Else(
ongoing_packet_next.eq(1)
),
If(~rx_dp.frame_r & ongoing_packet,
err_set.eq(1),
NextValue(err_code, error_codes["truncated_remote"])
self.packet_truncated.eq(1)
)
)
)
@ -178,8 +170,7 @@ class RTPacketSatellite(Module):
).Else(
write_data_buffer_load.eq(1),
If(~rx_dp.frame_r,
err_set.eq(1),
NextValue(err_code, error_codes["truncated_remote"]),
self.packet_truncated.eq(1),
NextState("INPUT")
)
)
@ -202,14 +193,11 @@ class RTPacketSatellite(Module):
tx_fsm.act("IDLE",
If(echo_req, NextState("ECHO")),
If(fifo_space_req, NextState("FIFO_SPACE")),
If(self.write_overflow, NextState("ERROR_WRITE_OVERFLOW")),
If(self.write_underflow, NextState("ERROR_WRITE_UNDERFLOW")),
If(~read_request_wait & read_request_pending,
If(read_request_timeout, NextState("READ_TIMEOUT")),
If(self.read_overflow, NextState("READ_OVERFLOW")),
If(self.read_readable, NextState("READ"))
),
If(err_req, NextState("ERROR"))
)
)
tx_fsm.act("ECHO",
@ -222,16 +210,6 @@ class RTPacketSatellite(Module):
tx_dp.send("fifo_space_reply", space=self.fifo_space),
If(tx_dp.packet_last, NextState("IDLE"))
)
tx_fsm.act("ERROR_WRITE_OVERFLOW",
self.write_overflow_ack.eq(1),
tx_dp.send("error", code=error_codes["write_overflow"]),
If(tx_dp.packet_last, NextState("IDLE"))
)
tx_fsm.act("ERROR_WRITE_UNDERFLOW",
self.write_underflow_ack.eq(1),
tx_dp.send("error", code=error_codes["write_underflow"]),
If(tx_dp.packet_last, NextState("IDLE"))
)
tx_fsm.act("READ_TIMEOUT",
tx_dp.send("read_reply_noevent", overflow=0),
@ -256,9 +234,3 @@ class RTPacketSatellite(Module):
NextState("IDLE")
)
)
tx_fsm.act("ERROR",
err_ack.eq(1),
tx_dp.send("error", code=err_code),
If(tx_dp.packet_last, NextState("IDLE"))
)

View File

@ -4,7 +4,7 @@ from types import SimpleNamespace
from migen import *
__all__ = ["ReceiveDatapath", "TransmitDatapath", "error_codes",
__all__ = ["ReceiveDatapath", "TransmitDatapath",
"get_m2s_layouts", "get_s2m_layouts"]
@ -64,7 +64,6 @@ def get_m2s_layouts(alignment):
def get_s2m_layouts(alignment):
plm = PacketLayoutManager(alignment)
plm.add_type("error", ("code", 8))
plm.add_type("echo_reply")
plm.add_type("fifo_space_reply", ("space", 16))
@ -75,20 +74,6 @@ def get_s2m_layouts(alignment):
return plm
# keep this in sync with str_packet_error in rtio_mgt.rs
error_codes = {
"unknown_type_local": 0,
"unknown_type_remote": 1,
"truncated_local": 2,
"truncated_remote": 3,
# The transmitter is normally responsible for avoiding
# overflows and underflows. Those error reports are only
# for diagnosing internal ARTIQ bugs.
"write_overflow": 4,
"write_underflow": 5
}
class ReceiveDatapath(Module):
def __init__(self, frame, data, plm):
ws = len(data)

View File

@ -78,6 +78,7 @@ class TestFullStack(unittest.TestCase):
kcsrs = dut.master_ki
csrs = dut.master.rt_controller.csrs
mgr = dut.master.rt_manager
saterr = dut.satellite.rt_errors
ttl_changes = []
correct_ttl_changes = [
@ -176,24 +177,22 @@ class TestFullStack(unittest.TestCase):
self.assertGreater(max_wlen, 5)
def test_tsc_error():
err_present = yield from mgr.packet_err_present.read()
self.assertEqual(err_present, 0)
errors = yield from saterr.protocol_error.read()
self.assertEqual(errors, 0)
yield from csrs.tsc_correction.write(100000000)
yield from csrs.set_time.write(1)
for i in range(15):
yield
delay(10000*8)
yield from write(0, 1)
for i in range(10):
for i in range(12):
yield
err_present = yield from mgr.packet_err_present.read()
err_code = yield from mgr.packet_err_code.read()
self.assertEqual(err_present, 1)
self.assertEqual(err_code, rt_serializer.error_codes["write_underflow"])
yield from mgr.packet_err_present.write(1)
errors = yield from saterr.protocol_error.read()
self.assertEqual(errors, 4) # write underflow
yield from saterr.protocol_error.write(errors)
yield
err_present = yield from mgr.packet_err_present.read()
self.assertEqual(err_present, 0)
errors = yield from saterr.protocol_error.read()
self.assertEqual(errors, 0)
def wait_ttl_events():
while len(ttl_changes) < len(correct_ttl_changes):