From 928d5dc9b30e9db1329db76cd2b8b60f79d36dcc Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sun, 4 Mar 2018 01:02:53 +0800 Subject: [PATCH] drtio: raise RTIOLinkError if operation fails due to link lost (#942) --- artiq/coredevice/exceptions.py | 7 +++++ artiq/firmware/ksupport/lib.rs | 18 ++++++++---- artiq/firmware/ksupport/rtio.rs | 19 +++++++++++- artiq/firmware/runtime/rtio_mgt.rs | 31 +++++++------------- artiq/firmware/satman/main.rs | 8 ++--- artiq/gateware/drtio/link_layer.py | 26 ++++++++-------- artiq/gateware/drtio/rt_controller_master.py | 10 +++++-- artiq/gateware/rtio/cri.py | 11 +++---- artiq/gateware/rtio/dma.py | 24 +++++++++++---- artiq/gateware/test/drtio/test_full_stack.py | 13 +++++--- artiq/gateware/test/rtio/test_dma.py | 7 +++-- 11 files changed, 112 insertions(+), 62 deletions(-) diff --git a/artiq/coredevice/exceptions.py b/artiq/coredevice/exceptions.py index 440be95f3..f0ce65291 100644 --- a/artiq/coredevice/exceptions.py +++ b/artiq/coredevice/exceptions.py @@ -90,6 +90,13 @@ class RTIOOverflow(Exception): artiq_builtin = True +class RTIOLinkError(Exception): + """Raised with a RTIO operation could not be completed due to a DRTIO link + being down. + """ + artiq_builtin = True + + class DMAError(Exception): """Raised when performing an invalid DMA operation.""" artiq_builtin = True diff --git a/artiq/firmware/ksupport/lib.rs b/artiq/firmware/ksupport/lib.rs index 517dac0e9..068f83761 100644 --- a/artiq/firmware/ksupport/lib.rs +++ b/artiq/firmware/ksupport/lib.rs @@ -384,13 +384,21 @@ extern fn dma_playback(timestamp: i64, ptr: i32) { while csr::rtio_dma::enable_read() != 0 {} csr::cri_con::selected_write(0); - if csr::rtio_dma::underflow_read() != 0 { + let error = csr::rtio_dma::error_read(); + if error != 0 { let timestamp = csr::rtio_dma::error_timestamp_read(); let channel = csr::rtio_dma::error_channel_read(); - csr::rtio_dma::underflow_write(1); - raise!("RTIOUnderflow", - "RTIO underflow at {0} mu, channel {1}", - timestamp as i64, channel as i64, 0) + csr::rtio_dma::error_write(1); + if error & 1 != 0 { + raise!("RTIOUnderflow", + "RTIO underflow at {0} mu, channel {1}", + timestamp as i64, channel as i64, 0); + } + if error & 2 != 0 { + raise!("RTIOLinkError", + "RTIO output link error at {0} mu, channel {1}", + timestamp as i64, channel as i64, 0); + } } } } diff --git a/artiq/firmware/ksupport/rtio.rs b/artiq/firmware/ksupport/rtio.rs index 4d8b251a0..7af22169c 100644 --- a/artiq/firmware/ksupport/rtio.rs +++ b/artiq/firmware/ksupport/rtio.rs @@ -9,9 +9,11 @@ mod imp { pub const RTIO_O_STATUS_WAIT: u8 = 1; pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2; + pub const RTIO_O_STATUS_LINK_ERROR: u8 = 4; pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1; pub const RTIO_I_STATUS_OVERFLOW: u8 = 2; pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4; + pub const RTIO_I_STATUS_LINK_ERROR: u8 = 8; pub extern fn init() { send(&RtioInitRequest); @@ -45,7 +47,12 @@ mod imp { if status & RTIO_O_STATUS_UNDERFLOW != 0 { raise!("RTIOUnderflow", "RTIO underflow at {0} mu, channel {1}, slack {2} mu", - timestamp, channel as i64, timestamp - get_counter()) + timestamp, channel as i64, timestamp - get_counter()); + } + if status & RTIO_O_STATUS_LINK_ERROR != 0 { + raise!("RTIOLinkError", + "RTIO output link error at {0} mu, channel {1}", + timestamp, channel as i64, 0); } } @@ -101,6 +108,11 @@ mod imp { if status & RTIO_I_STATUS_WAIT_EVENT != 0 { return !0 } + if status & RTIO_I_STATUS_LINK_ERROR != 0 { + raise!("RTIOLinkError", + "RTIO input link error on channel {0}", + channel as i64, 0, 0); + } csr::rtio::i_timestamp_read() } @@ -123,6 +135,11 @@ mod imp { "RTIO input overflow on channel {0}", channel as i64, 0, 0); } + if status & RTIO_I_STATUS_LINK_ERROR != 0 { + raise!("RTIOLinkError", + "RTIO input link error on channel {0}", + channel as i64, 0, 0); + } rtio_i_data_read(0) as i32 } diff --git a/artiq/firmware/runtime/rtio_mgt.rs b/artiq/firmware/runtime/rtio_mgt.rs index f05c99d09..490535fee 100644 --- a/artiq/firmware/runtime/rtio_mgt.rs +++ b/artiq/firmware/runtime/rtio_mgt.rs @@ -50,14 +50,21 @@ pub mod drtio { fn link_rx_up(linkno: u8) -> bool { let linkno = linkno as usize; unsafe { - (csr::DRTIO[linkno].link_status_read)() == 1 + (csr::DRTIO[linkno].rx_up_read)() == 1 } } - fn link_reset(linkno: u8) { + fn link_up(linkno: u8) -> bool { let linkno = linkno as usize; unsafe { - (csr::DRTIO[linkno].reset_write)(1); + (csr::DRTIO[linkno].link_up_read)() == 1 + } + } + + fn set_link_up(linkno: u8, up: bool) { + let linkno = linkno as usize; + unsafe { + (csr::DRTIO[linkno].link_up_write)(if up { 1 } else { 0 }); } } @@ -132,21 +139,6 @@ pub mod drtio { } } - // FIXME: use csr::DRTIO.len(), maybe get rid of static mut as well. - static mut LINK_UP: [bool; 16] = [false; 16]; - - fn link_up(linkno: u8) -> bool { - unsafe { - LINK_UP[linkno as usize] - } - } - - fn set_link_up(linkno: u8, up: bool) { - unsafe { - LINK_UP[linkno as usize] = up - } - } - pub fn link_thread(io: Io) { loop { for linkno in 0..csr::DRTIO.len() { @@ -164,14 +156,13 @@ pub mod drtio { /* link was previously down */ if link_rx_up(linkno) { info!("[LINK#{}] link RX became up, pinging", linkno); - link_reset(linkno); let ping_count = ping_remote(linkno, &io); if ping_count > 0 { info!("[LINK#{}] remote replied after {} packets", linkno, ping_count); + set_link_up(linkno, true); init_buffer_space(linkno); sync_tsc(linkno); info!("[LINK#{}] link initialization completed", linkno); - set_link_up(linkno, true); } else { info!("[LINK#{}] ping failed", linkno); } diff --git a/artiq/firmware/satman/main.rs b/artiq/firmware/satman/main.rs index 195471457..7e069b752 100644 --- a/artiq/firmware/satman/main.rs +++ b/artiq/firmware/satman/main.rs @@ -206,9 +206,9 @@ const SI5324_SETTINGS: si5324::FrequencySettings crystal_ref: true }; -fn drtio_link_is_up() -> bool { +fn drtio_link_rx_up() -> bool { unsafe { - (csr::DRTIO[0].link_status_read)() == 1 + (csr::DRTIO[0].rx_up_read)() == 1 } } @@ -231,14 +231,14 @@ fn startup() { } loop { - while !drtio_link_is_up() { + while !drtio_link_rx_up() { process_errors(); } info!("link is up, switching to recovered clock"); si5324::select_ext_input(true).expect("failed to switch clocks"); drtio_reset(false); drtio_reset_phy(false); - while drtio_link_is_up() { + while drtio_link_rx_up() { process_errors(); process_aux_packets(); } diff --git a/artiq/gateware/drtio/link_layer.py b/artiq/gateware/drtio/link_layer.py index 50f2ca6a3..a4779e912 100644 --- a/artiq/gateware/drtio/link_layer.py +++ b/artiq/gateware/drtio/link_layer.py @@ -224,7 +224,7 @@ class LinkLayerRX(Module): class LinkLayer(Module, AutoCSR): def __init__(self, encoder, decoders): - self.link_status = CSRStatus() + self.rx_up = CSRStatus() self.rx_disable = CSRStorage() self.tx_force_aux_zero = CSRStorage() self.tx_force_rt_zero = CSRStorage() @@ -254,14 +254,14 @@ class LinkLayer(Module, AutoCSR): # # # - ready = Signal() - ready_r = Signal() - self.sync.rtio += ready_r.eq(ready) - ready_rx = Signal() - ready_r.attr.add("no_retiming") + rx_up = Signal() + rx_up_r = Signal() + self.sync.rtio += rx_up_r.eq(rx_up) + rx_up_rx = Signal() + rx_up_r.attr.add("no_retiming") self.specials += [ - MultiReg(ready_r, ready_rx, "rtio_rx"), - MultiReg(ready_r, self.link_status.status)] + MultiReg(rx_up_r, rx_up_rx, "rtio_rx"), + MultiReg(rx_up_r, self.rx_up.status)] tx_force_aux_zero_rtio = Signal() tx_force_rt_zero_rtio = Signal() @@ -286,11 +286,11 @@ class LinkLayer(Module, AutoCSR): # to be recaptured by RXSynchronizer. self.sync.rtio_rx += [ self.rx_aux_stb.eq(rx.aux_stb), - self.rx_aux_frame.eq(rx.aux_frame & ready_rx & ~rx_disable_rx), - self.rx_aux_frame_perm.eq(rx.aux_frame & ready_rx), + self.rx_aux_frame.eq(rx.aux_frame & rx_up_rx & ~rx_disable_rx), + self.rx_aux_frame_perm.eq(rx.aux_frame & rx_up_rx), self.rx_aux_data.eq(rx.aux_data), - self.rx_rt_frame.eq(rx.rt_frame & ready_rx & ~rx_disable_rx), - self.rx_rt_frame_perm.eq(rx.rt_frame & ready_rx), + self.rx_rt_frame.eq(rx.rt_frame & rx_up_rx & ~rx_disable_rx), + self.rx_rt_frame_perm.eq(rx.rt_frame & rx_up_rx), self.rx_rt_data.eq(rx.rt_data) ] @@ -308,7 +308,7 @@ class LinkLayer(Module, AutoCSR): If(wait_scrambler.done, NextState("READY")) ) fsm.act("READY", - ready.eq(1), + rx_up.eq(1), If(~self.rx_ready, NextState("WAIT_RX_READY")) ) diff --git a/artiq/gateware/drtio/rt_controller_master.py b/artiq/gateware/drtio/rt_controller_master.py index cb2fcdab6..0cfaaeafc 100644 --- a/artiq/gateware/drtio/rt_controller_master.py +++ b/artiq/gateware/drtio/rt_controller_master.py @@ -13,7 +13,7 @@ from artiq.gateware.rtio import cri class _CSRs(AutoCSR): def __init__(self): - self.reset = CSR() + self.link_up = CSRStorage() self.protocol_error = CSR(3) @@ -21,7 +21,6 @@ class _CSRs(AutoCSR): self.set_time = CSR() self.underflow_margin = CSRStorage(16, reset=200) - self.o_get_buffer_space = CSR() self.o_dbg_buffer_space = CSRStatus(16) self.o_dbg_buffer_space_req_cnt = CSRStatus(32) @@ -53,7 +52,7 @@ class RTController(Module): # reset local_reset = Signal(reset=1) - self.sync += local_reset.eq(self.csrs.reset.re) + self.sync += local_reset.eq(~self.csrs.link_up.storage) local_reset.attr.add("no_retiming") self.clock_domains.cd_sys_with_rst = ClockDomain() self.clock_domains.cd_rtio_with_rst = ClockDomain() @@ -64,6 +63,11 @@ class RTController(Module): self.comb += self.cd_rtio_with_rst.clk.eq(ClockSignal("rtio")) self.specials += AsyncResetSynchronizer(self.cd_rtio_with_rst, local_reset) + self.comb += [ + self.cri.o_status[2].eq(~self.csrs.link_up.storage), + self.cri.i_status[3].eq(~self.csrs.link_up.storage) + ] + # protocol errors err_unknown_packet_type = Signal() err_packet_truncated = Signal() diff --git a/artiq/gateware/rtio/cri.py b/artiq/gateware/rtio/cri.py index 4885c0219..c85958baa 100644 --- a/artiq/gateware/rtio/cri.py +++ b/artiq/gateware/rtio/cri.py @@ -25,8 +25,8 @@ layout = [ ("o_data", 512, DIR_M_TO_S), ("o_address", 16, DIR_M_TO_S), # o_status bits: - # <0:wait> <1:underflow> - ("o_status", 2, DIR_S_TO_M), + # <0:wait> <1:underflow> <2:link error> + ("o_status", 3, DIR_S_TO_M), # targets may optionally report a pessimistic estimate of the number # of outputs events that can be written without waiting. ("o_buffer_space", 16, DIR_S_TO_M), @@ -35,8 +35,9 @@ layout = [ ("i_timestamp", 64, DIR_S_TO_M), # i_status bits: # <0:wait for event (command timeout)> <1:overflow> <2:wait for status> + # <3:link error> # <0> and <1> are mutually exclusive. <1> has higher priority. - ("i_status", 3, DIR_S_TO_M), + ("i_status", 4, DIR_S_TO_M), # value of the timestamp counter transferred into the CRI clock domain. # monotonic, may lag behind the counter in the IO clock domain, but @@ -61,12 +62,12 @@ class KernelInitiator(Module, AutoCSR): self.o_data = CSRStorage(512, write_from_dev=True) self.o_address = CSRStorage(16) self.o_we = CSR() - self.o_status = CSRStatus(2) + self.o_status = CSRStatus(3) self.i_data = CSRStatus(32) self.i_timestamp = CSRStatus(64) self.i_request = CSR() - self.i_status = CSRStatus(3) + self.i_status = CSRStatus(4) self.i_overflow_reset = CSR() self.counter = CSRStatus(64) diff --git a/artiq/gateware/rtio/dma.py b/artiq/gateware/rtio/dma.py index a8b7c9195..735d52f54 100644 --- a/artiq/gateware/rtio/dma.py +++ b/artiq/gateware/rtio/dma.py @@ -242,7 +242,7 @@ class TimeOffset(Module, AutoCSR): class CRIMaster(Module, AutoCSR): def __init__(self): - self.underflow = CSR() + self.error = CSR(2) self.error_channel = CSRStatus(24) self.error_timestamp = CSRStatus(64) @@ -255,14 +255,21 @@ class CRIMaster(Module, AutoCSR): # # # underflow_trigger = Signal() + link_error_trigger = Signal() self.sync += [ If(underflow_trigger, - self.underflow.w.eq(1), + self.error.w.eq(1), self.error_channel.status.eq(self.sink.channel), self.error_timestamp.status.eq(self.sink.timestamp), self.error_address.status.eq(self.sink.address) ), - If(self.underflow.re, self.underflow.w.eq(0)) + If(link_error_trigger, + self.error.w.eq(2), + self.error_channel.status.eq(self.sink.channel), + self.error_timestamp.status.eq(self.sink.timestamp), + self.error_address.status.eq(self.sink.address) + ), + If(self.error.re, self.error.w.eq(0)) ] self.comb += [ @@ -276,7 +283,7 @@ class CRIMaster(Module, AutoCSR): self.submodules += fsm fsm.act("IDLE", - If(~self.underflow.w, + If(self.error.w == 0, If(self.sink.stb, If(self.sink.eop, # last packet contains dummy data, discard it @@ -301,7 +308,8 @@ class CRIMaster(Module, AutoCSR): self.sink.ack.eq(1), NextState("IDLE") ), - If(self.cri.o_status[1], NextState("UNDERFLOW")) + If(self.cri.o_status[1], NextState("UNDERFLOW")), + If(self.cri.o_status[2], NextState("LINK_ERROR")) ) fsm.act("UNDERFLOW", self.busy.eq(1), @@ -309,6 +317,12 @@ class CRIMaster(Module, AutoCSR): self.sink.ack.eq(1), NextState("IDLE") ) + fsm.act("LINK_ERROR", + self.busy.eq(1), + link_error_trigger.eq(1), + self.sink.ack.eq(1), + NextState("IDLE") + ) class DMA(Module): diff --git a/artiq/gateware/test/drtio/test_full_stack.py b/artiq/gateware/test/drtio/test_full_stack.py index 905d188d5..45d6a4441 100644 --- a/artiq/gateware/test/drtio/test_full_stack.py +++ b/artiq/gateware/test/drtio/test_full_stack.py @@ -55,6 +55,7 @@ class DUT(Module): self.submodules.master = DRTIOMaster(self.transceivers.alice, fine_ts_width=0) self.submodules.master_ki = rtio.KernelInitiator(self.master.cri) + self.master.rt_controller.csrs.link_up.storage.reset = 1 rx_synchronizer = DummyRXSynchronizer() self.submodules.phy0 = ttl_simple.Output(self.ttl0) @@ -81,7 +82,7 @@ class OutputsTestbench: def init(self): yield from self.dut.master.rt_controller.csrs.underflow_margin.write(100) - while not (yield from self.dut.master.link_layer.link_status.read()): + while not (yield from self.dut.master.link_layer.rx_up.read()): yield yield from self.get_buffer_space() @@ -113,8 +114,10 @@ class OutputsTestbench: wlen = 0 while status: status = yield from kcsrs.o_status.read() - if status & 2: + if status & 0x2: raise RTIOUnderflow + if status & 0x4: + raise RTIOLinkError yield wlen += 1 return wlen @@ -251,11 +254,13 @@ class TestFullStack(unittest.TestCase): return "timeout" if status & 0x2: return "overflow" + if status & 0x8: + return "link error" return ((yield from kcsrs.i_data.read()), (yield from kcsrs.i_timestamp.read())) def test(): - while not (yield from dut.master.link_layer.link_status.read()): + while not (yield from dut.master.link_layer.rx_up.read()): yield i1 = yield from get_input(10) @@ -281,7 +286,7 @@ class TestFullStack(unittest.TestCase): mgr = dut.master.rt_manager def test(): - while not (yield from dut.master.link_layer.link_status.read()): + while not (yield from dut.master.link_layer.rx_up.read()): yield yield from mgr.update_packet_cnt.write(1) diff --git a/artiq/gateware/test/rtio/test_dma.py b/artiq/gateware/test/rtio/test_dma.py index d0e74b5ea..9ad6fd6d9 100644 --- a/artiq/gateware/test/rtio/test_dma.py +++ b/artiq/gateware/test/rtio/test_dma.py @@ -5,7 +5,7 @@ import itertools from migen import * from misoc.interconnect import wishbone -from artiq.coredevice.exceptions import RTIOUnderflow +from artiq.coredevice.exceptions import RTIOUnderflow, RTIOLinkError from artiq.gateware import rtio from artiq.gateware.rtio import dma, cri from artiq.gateware.rtio.phy import ttl_simple @@ -57,8 +57,11 @@ def do_dma(dut, address): yield while ((yield from dut.enable.read())): yield - if (yield from dut.cri_master.underflow.read()): + error = yield from dut.cri_master.underflow.read() + if error & 1: raise RTIOUnderflow + if error & 2: + raise RTIOLinkError test_writes1 = [