gateware,runtime: optimize RTIO output interface

* reduce address to 8 bits
* merge core, channel and address into 32-bit pre-computable "target"
* merge we register into data register
This commit is contained in:
Sebastien Bourdeauducq 2018-11-07 23:39:55 +08:00
parent ad0254c17b
commit 3d0c3cc1cf
5 changed files with 28 additions and 37 deletions

View File

@ -3,13 +3,12 @@ from artiq.language.types import TInt64, TInt32, TNone, TList
@syscall(flags={"nowrite"})
def rtio_output(time_mu: TInt64, channel: TInt32, addr: TInt32, data: TInt32
) -> TNone:
def rtio_output(time_mu: TInt64, target: TInt32, data: TInt32) -> TNone:
raise NotImplementedError("syscall not simulated")
@syscall(flags={"nowrite"})
def rtio_output_wide(time_mu: TInt64, channel: TInt32, addr: TInt32,
def rtio_output_wide(time_mu: TInt64, target: TInt32,
data: TList(TInt32)) -> TNone:
raise NotImplementedError("syscall not simulated")

View File

@ -301,10 +301,10 @@ extern fn dma_record_stop(duration: i64) {
#[unwind(aborts)]
#[inline(always)]
unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
unsafe fn dma_record_output_prepare(timestamp: i64, target: i32,
words: usize) -> &'static mut [u8] {
// See gateware/rtio/dma.py.
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2;
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/1;
let length = HEADER_LENGTH + /*data*/words * 4;
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
@ -319,9 +319,9 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
header.copy_from_slice(&[
(length >> 0) as u8,
(channel >> 0) as u8,
(channel >> 8) as u8,
(channel >> 16) as u8,
(target >> 8) as u8,
(target >> 16) as u8,
(target >> 24) as u8,
(timestamp >> 0) as u8,
(timestamp >> 8) as u8,
(timestamp >> 16) as u8,
@ -330,17 +330,16 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
(timestamp >> 40) as u8,
(timestamp >> 48) as u8,
(timestamp >> 56) as u8,
(address >> 0) as u8,
(address >> 8) as u8,
(target >> 0) as u8,
]);
data
}
#[unwind(aborts)]
extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) {
extern fn dma_record_output(timestamp: i64, target: i32, word: i32) {
unsafe {
let data = dma_record_output_prepare(timestamp, channel, address, 1);
let data = dma_record_output_prepare(timestamp, target, 1);
data.copy_from_slice(&[
(word >> 0) as u8,
(word >> 8) as u8,
@ -351,11 +350,11 @@ extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i3
}
#[unwind(aborts)]
extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice<i32>) {
extern fn dma_record_output_wide(timestamp: i64, target: i32, words: CSlice<i32>) {
assert!(words.len() <= 16); // enforce the hardware limit
unsafe {
let mut data = dma_record_output_prepare(timestamp, channel, address, 1);
let mut data = dma_record_output_prepare(timestamp, target, 1);
for word in words.as_ref().iter() {
data[..4].copy_from_slice(&[
(word >> 0) as u8,

View File

@ -36,6 +36,7 @@ mod imp {
}
}
// writing the LSB of o_data (offset=0) triggers the RTIO write
#[inline(always)]
pub unsafe fn rtio_o_data_write(offset: usize, data: u32) {
write_volatile(
@ -66,41 +67,37 @@ mod imp {
}
}
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {
pub extern fn output(timestamp: i64, target: i32, data: i32) {
unsafe {
csr::rtio::chan_sel_write(channel as _);
csr::rtio::target_write(target as u32);
// writing timestamp clears o_data
csr::rtio::timestamp_write(timestamp as u64);
csr::rtio::o_address_write(addr as _);
rtio_o_data_write(0, data as _);
csr::rtio::o_we_write(1);
let status = csr::rtio::o_status_read();
if status != 0 {
process_exceptional_status(timestamp, channel, status);
process_exceptional_status(timestamp, target >> 8, status);
}
}
}
pub extern fn output_wide(timestamp: i64, channel: i32, addr: i32, data: CSlice<i32>) {
pub extern fn output_wide(timestamp: i64, target: i32, data: CSlice<i32>) {
unsafe {
csr::rtio::chan_sel_write(channel as _);
csr::rtio::target_write(target as u32);
// writing timestamp clears o_data
csr::rtio::timestamp_write(timestamp as u64);
csr::rtio::o_address_write(addr as _);
for i in 0..data.len() {
rtio_o_data_write(i, data[i] as _)
}
csr::rtio::o_we_write(1);
let status = csr::rtio::o_status_read();
if status != 0 {
process_exceptional_status(timestamp, channel, status);
process_exceptional_status(timestamp, target >> 8, status);
}
}
}
pub extern fn input_timestamp(timeout: i64, channel: i32) -> u64 {
unsafe {
csr::rtio::chan_sel_write(channel as _);
csr::rtio::target_write((channel as u32) << 8);
csr::rtio::timestamp_write(timeout as u64);
csr::rtio::i_request_write(1);
@ -130,7 +127,7 @@ mod imp {
pub extern fn input_data(channel: i32) -> i32 {
unsafe {
csr::rtio::chan_sel_write(channel as _);
csr::rtio::target_write((channel as u32) << 8);
csr::rtio::timestamp_write(0xffffffff_ffffffff);
csr::rtio::i_request_write(1);
@ -158,7 +155,7 @@ mod imp {
#[cfg(has_rtio_log)]
pub fn log(timestamp: i64, data: &[u8]) {
unsafe {
csr::rtio::chan_sel_write(csr::CONFIG_RTIO_LOG_CHANNEL);
csr::rtio::target_write(csr::CONFIG_RTIO_LOG_CHANNEL << 8);
csr::rtio::timestamp_write(timestamp as u64);
let mut word: u32 = 0;
@ -167,14 +164,12 @@ mod imp {
word |= data[i] as u32;
if i % 4 == 3 {
rtio_o_data_write(0, word);
csr::rtio::o_we_write(1);
word = 0;
}
}
if word != 0 {
rtio_o_data_write(0, word);
csr::rtio::o_we_write(1);
}
}
}

View File

@ -32,7 +32,7 @@ layout = [
("timestamp", 64, DIR_M_TO_S),
("o_data", 512, DIR_M_TO_S),
("o_address", 16, DIR_M_TO_S),
("o_address", 8, DIR_M_TO_S),
# o_status bits:
# <0:wait> <1:underflow> <2:destination unreachable>
("o_status", 3, DIR_S_TO_M),
@ -60,7 +60,7 @@ class Interface(Record):
class KernelInitiator(Module, AutoCSR):
def __init__(self, tsc, cri=None):
self.chan_sel = CSRStorage(24)
self.target = CSRStorage(32)
# monotonic, may lag behind the counter in the IO clock domain, but
# not be ahead of it.
self.timestamp = CSRStorage(64)
@ -69,8 +69,6 @@ class KernelInitiator(Module, AutoCSR):
# zero-extension of output event data by the gateware. When staging an
# output event, always write timestamp before o_data.
self.o_data = CSRStorage(512, write_from_dev=True)
self.o_address = CSRStorage(16)
self.o_we = CSR()
self.o_status = CSRStatus(3)
self.i_data = CSRStatus(32)
@ -90,14 +88,14 @@ class KernelInitiator(Module, AutoCSR):
self.comb += [
self.cri.cmd.eq(commands["nop"]),
If(self.o_we.re, self.cri.cmd.eq(commands["write"])),
If(self.o_data.re, self.cri.cmd.eq(commands["write"])),
If(self.i_request.re, self.cri.cmd.eq(commands["read"])),
self.cri.chan_sel.eq(self.chan_sel.storage),
self.cri.chan_sel.eq(self.target.storage[8:]),
self.cri.timestamp.eq(self.timestamp.storage),
self.cri.o_data.eq(self.o_data.storage),
self.cri.o_address.eq(self.o_address.storage),
self.cri.o_address.eq(self.target.storage[:8]),
self.o_status.status.eq(self.cri.o_status),
self.i_data.status.eq(self.cri.i_data),

View File

@ -148,7 +148,7 @@ record_layout = [
("length", 8), # of whole record (header+data)
("channel", 24),
("timestamp", 64),
("address", 16),
("address", 8),
("data", 512) # variable length
]