drtio: raise RTIOLinkError if operation fails due to link lost (#942)

This commit is contained in:
Sebastien Bourdeauducq 2018-03-04 01:02:53 +08:00
parent ba74013e3e
commit 928d5dc9b3
11 changed files with 112 additions and 62 deletions

View File

@ -90,6 +90,13 @@ class RTIOOverflow(Exception):
artiq_builtin = True
class RTIOLinkError(Exception):
"""Raised with a RTIO operation could not be completed due to a DRTIO link
being down.
"""
artiq_builtin = True
class DMAError(Exception):
"""Raised when performing an invalid DMA operation."""
artiq_builtin = True

View File

@ -384,13 +384,21 @@ extern fn dma_playback(timestamp: i64, ptr: i32) {
while csr::rtio_dma::enable_read() != 0 {}
csr::cri_con::selected_write(0);
if csr::rtio_dma::underflow_read() != 0 {
let error = csr::rtio_dma::error_read();
if error != 0 {
let timestamp = csr::rtio_dma::error_timestamp_read();
let channel = csr::rtio_dma::error_channel_read();
csr::rtio_dma::underflow_write(1);
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0)
csr::rtio_dma::error_write(1);
if error & 1 != 0 {
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0);
}
if error & 2 != 0 {
raise!("RTIOLinkError",
"RTIO output link error at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0);
}
}
}
}

View File

@ -9,9 +9,11 @@ mod imp {
pub const RTIO_O_STATUS_WAIT: u8 = 1;
pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2;
pub const RTIO_O_STATUS_LINK_ERROR: u8 = 4;
pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1;
pub const RTIO_I_STATUS_OVERFLOW: u8 = 2;
pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4;
pub const RTIO_I_STATUS_LINK_ERROR: u8 = 8;
pub extern fn init() {
send(&RtioInitRequest);
@ -45,7 +47,12 @@ mod imp {
if status & RTIO_O_STATUS_UNDERFLOW != 0 {
raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}, slack {2} mu",
timestamp, channel as i64, timestamp - get_counter())
timestamp, channel as i64, timestamp - get_counter());
}
if status & RTIO_O_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO output link error at {0} mu, channel {1}",
timestamp, channel as i64, 0);
}
}
@ -101,6 +108,11 @@ mod imp {
if status & RTIO_I_STATUS_WAIT_EVENT != 0 {
return !0
}
if status & RTIO_I_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO input link error on channel {0}",
channel as i64, 0, 0);
}
csr::rtio::i_timestamp_read()
}
@ -123,6 +135,11 @@ mod imp {
"RTIO input overflow on channel {0}",
channel as i64, 0, 0);
}
if status & RTIO_I_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO input link error on channel {0}",
channel as i64, 0, 0);
}
rtio_i_data_read(0) as i32
}

View File

@ -50,14 +50,21 @@ pub mod drtio {
fn link_rx_up(linkno: u8) -> bool {
let linkno = linkno as usize;
unsafe {
(csr::DRTIO[linkno].link_status_read)() == 1
(csr::DRTIO[linkno].rx_up_read)() == 1
}
}
fn link_reset(linkno: u8) {
fn link_up(linkno: u8) -> bool {
let linkno = linkno as usize;
unsafe {
(csr::DRTIO[linkno].reset_write)(1);
(csr::DRTIO[linkno].link_up_read)() == 1
}
}
fn set_link_up(linkno: u8, up: bool) {
let linkno = linkno as usize;
unsafe {
(csr::DRTIO[linkno].link_up_write)(if up { 1 } else { 0 });
}
}
@ -132,21 +139,6 @@ pub mod drtio {
}
}
// FIXME: use csr::DRTIO.len(), maybe get rid of static mut as well.
static mut LINK_UP: [bool; 16] = [false; 16];
fn link_up(linkno: u8) -> bool {
unsafe {
LINK_UP[linkno as usize]
}
}
fn set_link_up(linkno: u8, up: bool) {
unsafe {
LINK_UP[linkno as usize] = up
}
}
pub fn link_thread(io: Io) {
loop {
for linkno in 0..csr::DRTIO.len() {
@ -164,14 +156,13 @@ pub mod drtio {
/* link was previously down */
if link_rx_up(linkno) {
info!("[LINK#{}] link RX became up, pinging", linkno);
link_reset(linkno);
let ping_count = ping_remote(linkno, &io);
if ping_count > 0 {
info!("[LINK#{}] remote replied after {} packets", linkno, ping_count);
set_link_up(linkno, true);
init_buffer_space(linkno);
sync_tsc(linkno);
info!("[LINK#{}] link initialization completed", linkno);
set_link_up(linkno, true);
} else {
info!("[LINK#{}] ping failed", linkno);
}

View File

@ -206,9 +206,9 @@ const SI5324_SETTINGS: si5324::FrequencySettings
crystal_ref: true
};
fn drtio_link_is_up() -> bool {
fn drtio_link_rx_up() -> bool {
unsafe {
(csr::DRTIO[0].link_status_read)() == 1
(csr::DRTIO[0].rx_up_read)() == 1
}
}
@ -231,14 +231,14 @@ fn startup() {
}
loop {
while !drtio_link_is_up() {
while !drtio_link_rx_up() {
process_errors();
}
info!("link is up, switching to recovered clock");
si5324::select_ext_input(true).expect("failed to switch clocks");
drtio_reset(false);
drtio_reset_phy(false);
while drtio_link_is_up() {
while drtio_link_rx_up() {
process_errors();
process_aux_packets();
}

View File

@ -224,7 +224,7 @@ class LinkLayerRX(Module):
class LinkLayer(Module, AutoCSR):
def __init__(self, encoder, decoders):
self.link_status = CSRStatus()
self.rx_up = CSRStatus()
self.rx_disable = CSRStorage()
self.tx_force_aux_zero = CSRStorage()
self.tx_force_rt_zero = CSRStorage()
@ -254,14 +254,14 @@ class LinkLayer(Module, AutoCSR):
# # #
ready = Signal()
ready_r = Signal()
self.sync.rtio += ready_r.eq(ready)
ready_rx = Signal()
ready_r.attr.add("no_retiming")
rx_up = Signal()
rx_up_r = Signal()
self.sync.rtio += rx_up_r.eq(rx_up)
rx_up_rx = Signal()
rx_up_r.attr.add("no_retiming")
self.specials += [
MultiReg(ready_r, ready_rx, "rtio_rx"),
MultiReg(ready_r, self.link_status.status)]
MultiReg(rx_up_r, rx_up_rx, "rtio_rx"),
MultiReg(rx_up_r, self.rx_up.status)]
tx_force_aux_zero_rtio = Signal()
tx_force_rt_zero_rtio = Signal()
@ -286,11 +286,11 @@ class LinkLayer(Module, AutoCSR):
# to be recaptured by RXSynchronizer.
self.sync.rtio_rx += [
self.rx_aux_stb.eq(rx.aux_stb),
self.rx_aux_frame.eq(rx.aux_frame & ready_rx & ~rx_disable_rx),
self.rx_aux_frame_perm.eq(rx.aux_frame & ready_rx),
self.rx_aux_frame.eq(rx.aux_frame & rx_up_rx & ~rx_disable_rx),
self.rx_aux_frame_perm.eq(rx.aux_frame & rx_up_rx),
self.rx_aux_data.eq(rx.aux_data),
self.rx_rt_frame.eq(rx.rt_frame & ready_rx & ~rx_disable_rx),
self.rx_rt_frame_perm.eq(rx.rt_frame & ready_rx),
self.rx_rt_frame.eq(rx.rt_frame & rx_up_rx & ~rx_disable_rx),
self.rx_rt_frame_perm.eq(rx.rt_frame & rx_up_rx),
self.rx_rt_data.eq(rx.rt_data)
]
@ -308,7 +308,7 @@ class LinkLayer(Module, AutoCSR):
If(wait_scrambler.done, NextState("READY"))
)
fsm.act("READY",
ready.eq(1),
rx_up.eq(1),
If(~self.rx_ready, NextState("WAIT_RX_READY"))
)

View File

@ -13,7 +13,7 @@ from artiq.gateware.rtio import cri
class _CSRs(AutoCSR):
def __init__(self):
self.reset = CSR()
self.link_up = CSRStorage()
self.protocol_error = CSR(3)
@ -21,7 +21,6 @@ class _CSRs(AutoCSR):
self.set_time = CSR()
self.underflow_margin = CSRStorage(16, reset=200)
self.o_get_buffer_space = CSR()
self.o_dbg_buffer_space = CSRStatus(16)
self.o_dbg_buffer_space_req_cnt = CSRStatus(32)
@ -53,7 +52,7 @@ class RTController(Module):
# reset
local_reset = Signal(reset=1)
self.sync += local_reset.eq(self.csrs.reset.re)
self.sync += local_reset.eq(~self.csrs.link_up.storage)
local_reset.attr.add("no_retiming")
self.clock_domains.cd_sys_with_rst = ClockDomain()
self.clock_domains.cd_rtio_with_rst = ClockDomain()
@ -64,6 +63,11 @@ class RTController(Module):
self.comb += self.cd_rtio_with_rst.clk.eq(ClockSignal("rtio"))
self.specials += AsyncResetSynchronizer(self.cd_rtio_with_rst, local_reset)
self.comb += [
self.cri.o_status[2].eq(~self.csrs.link_up.storage),
self.cri.i_status[3].eq(~self.csrs.link_up.storage)
]
# protocol errors
err_unknown_packet_type = Signal()
err_packet_truncated = Signal()

View File

@ -25,8 +25,8 @@ layout = [
("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S),
# o_status bits:
# <0:wait> <1:underflow>
("o_status", 2, DIR_S_TO_M),
# <0:wait> <1:underflow> <2:link error>
("o_status", 3, DIR_S_TO_M),
# targets may optionally report a pessimistic estimate of the number
# of outputs events that can be written without waiting.
("o_buffer_space", 16, DIR_S_TO_M),
@ -35,8 +35,9 @@ layout = [
("i_timestamp", 64, DIR_S_TO_M),
# i_status bits:
# <0:wait for event (command timeout)> <1:overflow> <2:wait for status>
# <3:link error>
# <0> and <1> are mutually exclusive. <1> has higher priority.
("i_status", 3, DIR_S_TO_M),
("i_status", 4, DIR_S_TO_M),
# value of the timestamp counter transferred into the CRI clock domain.
# monotonic, may lag behind the counter in the IO clock domain, but
@ -61,12 +62,12 @@ class KernelInitiator(Module, AutoCSR):
self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16)
self.o_we = CSR()
self.o_status = CSRStatus(2)
self.o_status = CSRStatus(3)
self.i_data = CSRStatus(32)
self.i_timestamp = CSRStatus(64)
self.i_request = CSR()
self.i_status = CSRStatus(3)
self.i_status = CSRStatus(4)
self.i_overflow_reset = CSR()
self.counter = CSRStatus(64)

View File

@ -242,7 +242,7 @@ class TimeOffset(Module, AutoCSR):
class CRIMaster(Module, AutoCSR):
def __init__(self):
self.underflow = CSR()
self.error = CSR(2)
self.error_channel = CSRStatus(24)
self.error_timestamp = CSRStatus(64)
@ -255,14 +255,21 @@ class CRIMaster(Module, AutoCSR):
# # #
underflow_trigger = Signal()
link_error_trigger = Signal()
self.sync += [
If(underflow_trigger,
self.underflow.w.eq(1),
self.error.w.eq(1),
self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address)
),
If(self.underflow.re, self.underflow.w.eq(0))
If(link_error_trigger,
self.error.w.eq(2),
self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address)
),
If(self.error.re, self.error.w.eq(0))
]
self.comb += [
@ -276,7 +283,7 @@ class CRIMaster(Module, AutoCSR):
self.submodules += fsm
fsm.act("IDLE",
If(~self.underflow.w,
If(self.error.w == 0,
If(self.sink.stb,
If(self.sink.eop,
# last packet contains dummy data, discard it
@ -301,7 +308,8 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1),
NextState("IDLE")
),
If(self.cri.o_status[1], NextState("UNDERFLOW"))
If(self.cri.o_status[1], NextState("UNDERFLOW")),
If(self.cri.o_status[2], NextState("LINK_ERROR"))
)
fsm.act("UNDERFLOW",
self.busy.eq(1),
@ -309,6 +317,12 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1),
NextState("IDLE")
)
fsm.act("LINK_ERROR",
self.busy.eq(1),
link_error_trigger.eq(1),
self.sink.ack.eq(1),
NextState("IDLE")
)
class DMA(Module):

View File

@ -55,6 +55,7 @@ class DUT(Module):
self.submodules.master = DRTIOMaster(self.transceivers.alice,
fine_ts_width=0)
self.submodules.master_ki = rtio.KernelInitiator(self.master.cri)
self.master.rt_controller.csrs.link_up.storage.reset = 1
rx_synchronizer = DummyRXSynchronizer()
self.submodules.phy0 = ttl_simple.Output(self.ttl0)
@ -81,7 +82,7 @@ class OutputsTestbench:
def init(self):
yield from self.dut.master.rt_controller.csrs.underflow_margin.write(100)
while not (yield from self.dut.master.link_layer.link_status.read()):
while not (yield from self.dut.master.link_layer.rx_up.read()):
yield
yield from self.get_buffer_space()
@ -113,8 +114,10 @@ class OutputsTestbench:
wlen = 0
while status:
status = yield from kcsrs.o_status.read()
if status & 2:
if status & 0x2:
raise RTIOUnderflow
if status & 0x4:
raise RTIOLinkError
yield
wlen += 1
return wlen
@ -251,11 +254,13 @@ class TestFullStack(unittest.TestCase):
return "timeout"
if status & 0x2:
return "overflow"
if status & 0x8:
return "link error"
return ((yield from kcsrs.i_data.read()),
(yield from kcsrs.i_timestamp.read()))
def test():
while not (yield from dut.master.link_layer.link_status.read()):
while not (yield from dut.master.link_layer.rx_up.read()):
yield
i1 = yield from get_input(10)
@ -281,7 +286,7 @@ class TestFullStack(unittest.TestCase):
mgr = dut.master.rt_manager
def test():
while not (yield from dut.master.link_layer.link_status.read()):
while not (yield from dut.master.link_layer.rx_up.read()):
yield
yield from mgr.update_packet_cnt.write(1)

View File

@ -5,7 +5,7 @@ import itertools
from migen import *
from misoc.interconnect import wishbone
from artiq.coredevice.exceptions import RTIOUnderflow
from artiq.coredevice.exceptions import RTIOUnderflow, RTIOLinkError
from artiq.gateware import rtio
from artiq.gateware.rtio import dma, cri
from artiq.gateware.rtio.phy import ttl_simple
@ -57,8 +57,11 @@ def do_dma(dut, address):
yield
while ((yield from dut.enable.read())):
yield
if (yield from dut.cri_master.underflow.read()):
error = yield from dut.cri_master.underflow.read()
if error & 1:
raise RTIOUnderflow
if error & 2:
raise RTIOLinkError
test_writes1 = [