drtio: raise RTIOLinkError if operation fails due to link lost (#942)

This commit is contained in:
Sebastien Bourdeauducq 2018-03-04 01:02:53 +08:00
parent ba74013e3e
commit 928d5dc9b3
11 changed files with 112 additions and 62 deletions

View File

@ -90,6 +90,13 @@ class RTIOOverflow(Exception):
artiq_builtin = True artiq_builtin = True
class RTIOLinkError(Exception):
"""Raised with a RTIO operation could not be completed due to a DRTIO link
being down.
"""
artiq_builtin = True
class DMAError(Exception): class DMAError(Exception):
"""Raised when performing an invalid DMA operation.""" """Raised when performing an invalid DMA operation."""
artiq_builtin = True artiq_builtin = True

View File

@ -384,13 +384,21 @@ extern fn dma_playback(timestamp: i64, ptr: i32) {
while csr::rtio_dma::enable_read() != 0 {} while csr::rtio_dma::enable_read() != 0 {}
csr::cri_con::selected_write(0); csr::cri_con::selected_write(0);
if csr::rtio_dma::underflow_read() != 0 { let error = csr::rtio_dma::error_read();
if error != 0 {
let timestamp = csr::rtio_dma::error_timestamp_read(); let timestamp = csr::rtio_dma::error_timestamp_read();
let channel = csr::rtio_dma::error_channel_read(); let channel = csr::rtio_dma::error_channel_read();
csr::rtio_dma::underflow_write(1); csr::rtio_dma::error_write(1);
if error & 1 != 0 {
raise!("RTIOUnderflow", raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}", "RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0) timestamp as i64, channel as i64, 0);
}
if error & 2 != 0 {
raise!("RTIOLinkError",
"RTIO output link error at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0);
}
} }
} }
} }

View File

@ -9,9 +9,11 @@ mod imp {
pub const RTIO_O_STATUS_WAIT: u8 = 1; pub const RTIO_O_STATUS_WAIT: u8 = 1;
pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2; pub const RTIO_O_STATUS_UNDERFLOW: u8 = 2;
pub const RTIO_O_STATUS_LINK_ERROR: u8 = 4;
pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1; pub const RTIO_I_STATUS_WAIT_EVENT: u8 = 1;
pub const RTIO_I_STATUS_OVERFLOW: u8 = 2; pub const RTIO_I_STATUS_OVERFLOW: u8 = 2;
pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4; pub const RTIO_I_STATUS_WAIT_STATUS: u8 = 4;
pub const RTIO_I_STATUS_LINK_ERROR: u8 = 8;
pub extern fn init() { pub extern fn init() {
send(&RtioInitRequest); send(&RtioInitRequest);
@ -45,7 +47,12 @@ mod imp {
if status & RTIO_O_STATUS_UNDERFLOW != 0 { if status & RTIO_O_STATUS_UNDERFLOW != 0 {
raise!("RTIOUnderflow", raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}, slack {2} mu", "RTIO underflow at {0} mu, channel {1}, slack {2} mu",
timestamp, channel as i64, timestamp - get_counter()) timestamp, channel as i64, timestamp - get_counter());
}
if status & RTIO_O_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO output link error at {0} mu, channel {1}",
timestamp, channel as i64, 0);
} }
} }
@ -101,6 +108,11 @@ mod imp {
if status & RTIO_I_STATUS_WAIT_EVENT != 0 { if status & RTIO_I_STATUS_WAIT_EVENT != 0 {
return !0 return !0
} }
if status & RTIO_I_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO input link error on channel {0}",
channel as i64, 0, 0);
}
csr::rtio::i_timestamp_read() csr::rtio::i_timestamp_read()
} }
@ -123,6 +135,11 @@ mod imp {
"RTIO input overflow on channel {0}", "RTIO input overflow on channel {0}",
channel as i64, 0, 0); channel as i64, 0, 0);
} }
if status & RTIO_I_STATUS_LINK_ERROR != 0 {
raise!("RTIOLinkError",
"RTIO input link error on channel {0}",
channel as i64, 0, 0);
}
rtio_i_data_read(0) as i32 rtio_i_data_read(0) as i32
} }

View File

@ -50,14 +50,21 @@ pub mod drtio {
fn link_rx_up(linkno: u8) -> bool { fn link_rx_up(linkno: u8) -> bool {
let linkno = linkno as usize; let linkno = linkno as usize;
unsafe { unsafe {
(csr::DRTIO[linkno].link_status_read)() == 1 (csr::DRTIO[linkno].rx_up_read)() == 1
} }
} }
fn link_reset(linkno: u8) { fn link_up(linkno: u8) -> bool {
let linkno = linkno as usize; let linkno = linkno as usize;
unsafe { unsafe {
(csr::DRTIO[linkno].reset_write)(1); (csr::DRTIO[linkno].link_up_read)() == 1
}
}
fn set_link_up(linkno: u8, up: bool) {
let linkno = linkno as usize;
unsafe {
(csr::DRTIO[linkno].link_up_write)(if up { 1 } else { 0 });
} }
} }
@ -132,21 +139,6 @@ pub mod drtio {
} }
} }
// FIXME: use csr::DRTIO.len(), maybe get rid of static mut as well.
static mut LINK_UP: [bool; 16] = [false; 16];
fn link_up(linkno: u8) -> bool {
unsafe {
LINK_UP[linkno as usize]
}
}
fn set_link_up(linkno: u8, up: bool) {
unsafe {
LINK_UP[linkno as usize] = up
}
}
pub fn link_thread(io: Io) { pub fn link_thread(io: Io) {
loop { loop {
for linkno in 0..csr::DRTIO.len() { for linkno in 0..csr::DRTIO.len() {
@ -164,14 +156,13 @@ pub mod drtio {
/* link was previously down */ /* link was previously down */
if link_rx_up(linkno) { if link_rx_up(linkno) {
info!("[LINK#{}] link RX became up, pinging", linkno); info!("[LINK#{}] link RX became up, pinging", linkno);
link_reset(linkno);
let ping_count = ping_remote(linkno, &io); let ping_count = ping_remote(linkno, &io);
if ping_count > 0 { if ping_count > 0 {
info!("[LINK#{}] remote replied after {} packets", linkno, ping_count); info!("[LINK#{}] remote replied after {} packets", linkno, ping_count);
set_link_up(linkno, true);
init_buffer_space(linkno); init_buffer_space(linkno);
sync_tsc(linkno); sync_tsc(linkno);
info!("[LINK#{}] link initialization completed", linkno); info!("[LINK#{}] link initialization completed", linkno);
set_link_up(linkno, true);
} else { } else {
info!("[LINK#{}] ping failed", linkno); info!("[LINK#{}] ping failed", linkno);
} }

View File

@ -206,9 +206,9 @@ const SI5324_SETTINGS: si5324::FrequencySettings
crystal_ref: true crystal_ref: true
}; };
fn drtio_link_is_up() -> bool { fn drtio_link_rx_up() -> bool {
unsafe { unsafe {
(csr::DRTIO[0].link_status_read)() == 1 (csr::DRTIO[0].rx_up_read)() == 1
} }
} }
@ -231,14 +231,14 @@ fn startup() {
} }
loop { loop {
while !drtio_link_is_up() { while !drtio_link_rx_up() {
process_errors(); process_errors();
} }
info!("link is up, switching to recovered clock"); info!("link is up, switching to recovered clock");
si5324::select_ext_input(true).expect("failed to switch clocks"); si5324::select_ext_input(true).expect("failed to switch clocks");
drtio_reset(false); drtio_reset(false);
drtio_reset_phy(false); drtio_reset_phy(false);
while drtio_link_is_up() { while drtio_link_rx_up() {
process_errors(); process_errors();
process_aux_packets(); process_aux_packets();
} }

View File

@ -224,7 +224,7 @@ class LinkLayerRX(Module):
class LinkLayer(Module, AutoCSR): class LinkLayer(Module, AutoCSR):
def __init__(self, encoder, decoders): def __init__(self, encoder, decoders):
self.link_status = CSRStatus() self.rx_up = CSRStatus()
self.rx_disable = CSRStorage() self.rx_disable = CSRStorage()
self.tx_force_aux_zero = CSRStorage() self.tx_force_aux_zero = CSRStorage()
self.tx_force_rt_zero = CSRStorage() self.tx_force_rt_zero = CSRStorage()
@ -254,14 +254,14 @@ class LinkLayer(Module, AutoCSR):
# # # # # #
ready = Signal() rx_up = Signal()
ready_r = Signal() rx_up_r = Signal()
self.sync.rtio += ready_r.eq(ready) self.sync.rtio += rx_up_r.eq(rx_up)
ready_rx = Signal() rx_up_rx = Signal()
ready_r.attr.add("no_retiming") rx_up_r.attr.add("no_retiming")
self.specials += [ self.specials += [
MultiReg(ready_r, ready_rx, "rtio_rx"), MultiReg(rx_up_r, rx_up_rx, "rtio_rx"),
MultiReg(ready_r, self.link_status.status)] MultiReg(rx_up_r, self.rx_up.status)]
tx_force_aux_zero_rtio = Signal() tx_force_aux_zero_rtio = Signal()
tx_force_rt_zero_rtio = Signal() tx_force_rt_zero_rtio = Signal()
@ -286,11 +286,11 @@ class LinkLayer(Module, AutoCSR):
# to be recaptured by RXSynchronizer. # to be recaptured by RXSynchronizer.
self.sync.rtio_rx += [ self.sync.rtio_rx += [
self.rx_aux_stb.eq(rx.aux_stb), self.rx_aux_stb.eq(rx.aux_stb),
self.rx_aux_frame.eq(rx.aux_frame & ready_rx & ~rx_disable_rx), self.rx_aux_frame.eq(rx.aux_frame & rx_up_rx & ~rx_disable_rx),
self.rx_aux_frame_perm.eq(rx.aux_frame & ready_rx), self.rx_aux_frame_perm.eq(rx.aux_frame & rx_up_rx),
self.rx_aux_data.eq(rx.aux_data), self.rx_aux_data.eq(rx.aux_data),
self.rx_rt_frame.eq(rx.rt_frame & ready_rx & ~rx_disable_rx), self.rx_rt_frame.eq(rx.rt_frame & rx_up_rx & ~rx_disable_rx),
self.rx_rt_frame_perm.eq(rx.rt_frame & ready_rx), self.rx_rt_frame_perm.eq(rx.rt_frame & rx_up_rx),
self.rx_rt_data.eq(rx.rt_data) self.rx_rt_data.eq(rx.rt_data)
] ]
@ -308,7 +308,7 @@ class LinkLayer(Module, AutoCSR):
If(wait_scrambler.done, NextState("READY")) If(wait_scrambler.done, NextState("READY"))
) )
fsm.act("READY", fsm.act("READY",
ready.eq(1), rx_up.eq(1),
If(~self.rx_ready, NextState("WAIT_RX_READY")) If(~self.rx_ready, NextState("WAIT_RX_READY"))
) )

View File

@ -13,7 +13,7 @@ from artiq.gateware.rtio import cri
class _CSRs(AutoCSR): class _CSRs(AutoCSR):
def __init__(self): def __init__(self):
self.reset = CSR() self.link_up = CSRStorage()
self.protocol_error = CSR(3) self.protocol_error = CSR(3)
@ -21,7 +21,6 @@ class _CSRs(AutoCSR):
self.set_time = CSR() self.set_time = CSR()
self.underflow_margin = CSRStorage(16, reset=200) self.underflow_margin = CSRStorage(16, reset=200)
self.o_get_buffer_space = CSR() self.o_get_buffer_space = CSR()
self.o_dbg_buffer_space = CSRStatus(16) self.o_dbg_buffer_space = CSRStatus(16)
self.o_dbg_buffer_space_req_cnt = CSRStatus(32) self.o_dbg_buffer_space_req_cnt = CSRStatus(32)
@ -53,7 +52,7 @@ class RTController(Module):
# reset # reset
local_reset = Signal(reset=1) local_reset = Signal(reset=1)
self.sync += local_reset.eq(self.csrs.reset.re) self.sync += local_reset.eq(~self.csrs.link_up.storage)
local_reset.attr.add("no_retiming") local_reset.attr.add("no_retiming")
self.clock_domains.cd_sys_with_rst = ClockDomain() self.clock_domains.cd_sys_with_rst = ClockDomain()
self.clock_domains.cd_rtio_with_rst = ClockDomain() self.clock_domains.cd_rtio_with_rst = ClockDomain()
@ -64,6 +63,11 @@ class RTController(Module):
self.comb += self.cd_rtio_with_rst.clk.eq(ClockSignal("rtio")) self.comb += self.cd_rtio_with_rst.clk.eq(ClockSignal("rtio"))
self.specials += AsyncResetSynchronizer(self.cd_rtio_with_rst, local_reset) self.specials += AsyncResetSynchronizer(self.cd_rtio_with_rst, local_reset)
self.comb += [
self.cri.o_status[2].eq(~self.csrs.link_up.storage),
self.cri.i_status[3].eq(~self.csrs.link_up.storage)
]
# protocol errors # protocol errors
err_unknown_packet_type = Signal() err_unknown_packet_type = Signal()
err_packet_truncated = Signal() err_packet_truncated = Signal()

View File

@ -25,8 +25,8 @@ layout = [
("o_data", 512, DIR_M_TO_S), ("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S), ("o_address", 16, DIR_M_TO_S),
# o_status bits: # o_status bits:
# <0:wait> <1:underflow> # <0:wait> <1:underflow> <2:link error>
("o_status", 2, DIR_S_TO_M), ("o_status", 3, DIR_S_TO_M),
# targets may optionally report a pessimistic estimate of the number # targets may optionally report a pessimistic estimate of the number
# of outputs events that can be written without waiting. # of outputs events that can be written without waiting.
("o_buffer_space", 16, DIR_S_TO_M), ("o_buffer_space", 16, DIR_S_TO_M),
@ -35,8 +35,9 @@ layout = [
("i_timestamp", 64, DIR_S_TO_M), ("i_timestamp", 64, DIR_S_TO_M),
# i_status bits: # i_status bits:
# <0:wait for event (command timeout)> <1:overflow> <2:wait for status> # <0:wait for event (command timeout)> <1:overflow> <2:wait for status>
# <3:link error>
# <0> and <1> are mutually exclusive. <1> has higher priority. # <0> and <1> are mutually exclusive. <1> has higher priority.
("i_status", 3, DIR_S_TO_M), ("i_status", 4, DIR_S_TO_M),
# value of the timestamp counter transferred into the CRI clock domain. # value of the timestamp counter transferred into the CRI clock domain.
# monotonic, may lag behind the counter in the IO clock domain, but # monotonic, may lag behind the counter in the IO clock domain, but
@ -61,12 +62,12 @@ class KernelInitiator(Module, AutoCSR):
self.o_data = CSRStorage(512, write_from_dev=True) self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16) self.o_address = CSRStorage(16)
self.o_we = CSR() self.o_we = CSR()
self.o_status = CSRStatus(2) self.o_status = CSRStatus(3)
self.i_data = CSRStatus(32) self.i_data = CSRStatus(32)
self.i_timestamp = CSRStatus(64) self.i_timestamp = CSRStatus(64)
self.i_request = CSR() self.i_request = CSR()
self.i_status = CSRStatus(3) self.i_status = CSRStatus(4)
self.i_overflow_reset = CSR() self.i_overflow_reset = CSR()
self.counter = CSRStatus(64) self.counter = CSRStatus(64)

View File

@ -242,7 +242,7 @@ class TimeOffset(Module, AutoCSR):
class CRIMaster(Module, AutoCSR): class CRIMaster(Module, AutoCSR):
def __init__(self): def __init__(self):
self.underflow = CSR() self.error = CSR(2)
self.error_channel = CSRStatus(24) self.error_channel = CSRStatus(24)
self.error_timestamp = CSRStatus(64) self.error_timestamp = CSRStatus(64)
@ -255,14 +255,21 @@ class CRIMaster(Module, AutoCSR):
# # # # # #
underflow_trigger = Signal() underflow_trigger = Signal()
link_error_trigger = Signal()
self.sync += [ self.sync += [
If(underflow_trigger, If(underflow_trigger,
self.underflow.w.eq(1), self.error.w.eq(1),
self.error_channel.status.eq(self.sink.channel), self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp), self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address) self.error_address.status.eq(self.sink.address)
), ),
If(self.underflow.re, self.underflow.w.eq(0)) If(link_error_trigger,
self.error.w.eq(2),
self.error_channel.status.eq(self.sink.channel),
self.error_timestamp.status.eq(self.sink.timestamp),
self.error_address.status.eq(self.sink.address)
),
If(self.error.re, self.error.w.eq(0))
] ]
self.comb += [ self.comb += [
@ -276,7 +283,7 @@ class CRIMaster(Module, AutoCSR):
self.submodules += fsm self.submodules += fsm
fsm.act("IDLE", fsm.act("IDLE",
If(~self.underflow.w, If(self.error.w == 0,
If(self.sink.stb, If(self.sink.stb,
If(self.sink.eop, If(self.sink.eop,
# last packet contains dummy data, discard it # last packet contains dummy data, discard it
@ -301,7 +308,8 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1), self.sink.ack.eq(1),
NextState("IDLE") NextState("IDLE")
), ),
If(self.cri.o_status[1], NextState("UNDERFLOW")) If(self.cri.o_status[1], NextState("UNDERFLOW")),
If(self.cri.o_status[2], NextState("LINK_ERROR"))
) )
fsm.act("UNDERFLOW", fsm.act("UNDERFLOW",
self.busy.eq(1), self.busy.eq(1),
@ -309,6 +317,12 @@ class CRIMaster(Module, AutoCSR):
self.sink.ack.eq(1), self.sink.ack.eq(1),
NextState("IDLE") NextState("IDLE")
) )
fsm.act("LINK_ERROR",
self.busy.eq(1),
link_error_trigger.eq(1),
self.sink.ack.eq(1),
NextState("IDLE")
)
class DMA(Module): class DMA(Module):

View File

@ -55,6 +55,7 @@ class DUT(Module):
self.submodules.master = DRTIOMaster(self.transceivers.alice, self.submodules.master = DRTIOMaster(self.transceivers.alice,
fine_ts_width=0) fine_ts_width=0)
self.submodules.master_ki = rtio.KernelInitiator(self.master.cri) self.submodules.master_ki = rtio.KernelInitiator(self.master.cri)
self.master.rt_controller.csrs.link_up.storage.reset = 1
rx_synchronizer = DummyRXSynchronizer() rx_synchronizer = DummyRXSynchronizer()
self.submodules.phy0 = ttl_simple.Output(self.ttl0) self.submodules.phy0 = ttl_simple.Output(self.ttl0)
@ -81,7 +82,7 @@ class OutputsTestbench:
def init(self): def init(self):
yield from self.dut.master.rt_controller.csrs.underflow_margin.write(100) yield from self.dut.master.rt_controller.csrs.underflow_margin.write(100)
while not (yield from self.dut.master.link_layer.link_status.read()): while not (yield from self.dut.master.link_layer.rx_up.read()):
yield yield
yield from self.get_buffer_space() yield from self.get_buffer_space()
@ -113,8 +114,10 @@ class OutputsTestbench:
wlen = 0 wlen = 0
while status: while status:
status = yield from kcsrs.o_status.read() status = yield from kcsrs.o_status.read()
if status & 2: if status & 0x2:
raise RTIOUnderflow raise RTIOUnderflow
if status & 0x4:
raise RTIOLinkError
yield yield
wlen += 1 wlen += 1
return wlen return wlen
@ -251,11 +254,13 @@ class TestFullStack(unittest.TestCase):
return "timeout" return "timeout"
if status & 0x2: if status & 0x2:
return "overflow" return "overflow"
if status & 0x8:
return "link error"
return ((yield from kcsrs.i_data.read()), return ((yield from kcsrs.i_data.read()),
(yield from kcsrs.i_timestamp.read())) (yield from kcsrs.i_timestamp.read()))
def test(): def test():
while not (yield from dut.master.link_layer.link_status.read()): while not (yield from dut.master.link_layer.rx_up.read()):
yield yield
i1 = yield from get_input(10) i1 = yield from get_input(10)
@ -281,7 +286,7 @@ class TestFullStack(unittest.TestCase):
mgr = dut.master.rt_manager mgr = dut.master.rt_manager
def test(): def test():
while not (yield from dut.master.link_layer.link_status.read()): while not (yield from dut.master.link_layer.rx_up.read()):
yield yield
yield from mgr.update_packet_cnt.write(1) yield from mgr.update_packet_cnt.write(1)

View File

@ -5,7 +5,7 @@ import itertools
from migen import * from migen import *
from misoc.interconnect import wishbone from misoc.interconnect import wishbone
from artiq.coredevice.exceptions import RTIOUnderflow from artiq.coredevice.exceptions import RTIOUnderflow, RTIOLinkError
from artiq.gateware import rtio from artiq.gateware import rtio
from artiq.gateware.rtio import dma, cri from artiq.gateware.rtio import dma, cri
from artiq.gateware.rtio.phy import ttl_simple from artiq.gateware.rtio.phy import ttl_simple
@ -57,8 +57,11 @@ def do_dma(dut, address):
yield yield
while ((yield from dut.enable.read())): while ((yield from dut.enable.read())):
yield yield
if (yield from dut.cri_master.underflow.read()): error = yield from dut.cri_master.underflow.read()
if error & 1:
raise RTIOUnderflow raise RTIOUnderflow
if error & 2:
raise RTIOLinkError
test_writes1 = [ test_writes1 = [