From 3d0c3cc1cf84a1b6688afa4b41ba0c7789717915 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Wed, 7 Nov 2018 23:39:55 +0800 Subject: [PATCH] gateware,runtime: optimize RTIO output interface * reduce address to 8 bits * merge core, channel and address into 32-bit pre-computable "target" * merge we register into data register --- artiq/coredevice/rtio.py | 5 ++--- artiq/firmware/ksupport/lib.rs | 21 ++++++++++----------- artiq/firmware/ksupport/rtio.rs | 25 ++++++++++--------------- artiq/gateware/rtio/cri.py | 12 +++++------- artiq/gateware/rtio/dma.py | 2 +- 5 files changed, 28 insertions(+), 37 deletions(-) diff --git a/artiq/coredevice/rtio.py b/artiq/coredevice/rtio.py index 471a48c31..3445b6baf 100644 --- a/artiq/coredevice/rtio.py +++ b/artiq/coredevice/rtio.py @@ -3,13 +3,12 @@ from artiq.language.types import TInt64, TInt32, TNone, TList @syscall(flags={"nowrite"}) -def rtio_output(time_mu: TInt64, channel: TInt32, addr: TInt32, data: TInt32 - ) -> TNone: +def rtio_output(time_mu: TInt64, target: TInt32, data: TInt32) -> TNone: raise NotImplementedError("syscall not simulated") @syscall(flags={"nowrite"}) -def rtio_output_wide(time_mu: TInt64, channel: TInt32, addr: TInt32, +def rtio_output_wide(time_mu: TInt64, target: TInt32, data: TList(TInt32)) -> TNone: raise NotImplementedError("syscall not simulated") diff --git a/artiq/firmware/ksupport/lib.rs b/artiq/firmware/ksupport/lib.rs index 35ac00eb4..946f6fe94 100644 --- a/artiq/firmware/ksupport/lib.rs +++ b/artiq/firmware/ksupport/lib.rs @@ -301,10 +301,10 @@ extern fn dma_record_stop(duration: i64) { #[unwind(aborts)] #[inline(always)] -unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32, +unsafe fn dma_record_output_prepare(timestamp: i64, target: i32, words: usize) -> &'static mut [u8] { // See gateware/rtio/dma.py. - const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2; + const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/1; let length = HEADER_LENGTH + /*data*/words * 4; if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length { @@ -319,9 +319,9 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32, header.copy_from_slice(&[ (length >> 0) as u8, - (channel >> 0) as u8, - (channel >> 8) as u8, - (channel >> 16) as u8, + (target >> 8) as u8, + (target >> 16) as u8, + (target >> 24) as u8, (timestamp >> 0) as u8, (timestamp >> 8) as u8, (timestamp >> 16) as u8, @@ -330,17 +330,16 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32, (timestamp >> 40) as u8, (timestamp >> 48) as u8, (timestamp >> 56) as u8, - (address >> 0) as u8, - (address >> 8) as u8, + (target >> 0) as u8, ]); data } #[unwind(aborts)] -extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) { +extern fn dma_record_output(timestamp: i64, target: i32, word: i32) { unsafe { - let data = dma_record_output_prepare(timestamp, channel, address, 1); + let data = dma_record_output_prepare(timestamp, target, 1); data.copy_from_slice(&[ (word >> 0) as u8, (word >> 8) as u8, @@ -351,11 +350,11 @@ extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i3 } #[unwind(aborts)] -extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice) { +extern fn dma_record_output_wide(timestamp: i64, target: i32, words: CSlice) { assert!(words.len() <= 16); // enforce the hardware limit unsafe { - let mut data = dma_record_output_prepare(timestamp, channel, address, 1); + let mut data = dma_record_output_prepare(timestamp, target, 1); for word in words.as_ref().iter() { data[..4].copy_from_slice(&[ (word >> 0) as u8, diff --git a/artiq/firmware/ksupport/rtio.rs b/artiq/firmware/ksupport/rtio.rs index 1324c0353..04cfd0fce 100644 --- a/artiq/firmware/ksupport/rtio.rs +++ b/artiq/firmware/ksupport/rtio.rs @@ -36,6 +36,7 @@ mod imp { } } + // writing the LSB of o_data (offset=0) triggers the RTIO write #[inline(always)] pub unsafe fn rtio_o_data_write(offset: usize, data: u32) { write_volatile( @@ -66,41 +67,37 @@ mod imp { } } - pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) { + pub extern fn output(timestamp: i64, target: i32, data: i32) { unsafe { - csr::rtio::chan_sel_write(channel as _); + csr::rtio::target_write(target as u32); // writing timestamp clears o_data csr::rtio::timestamp_write(timestamp as u64); - csr::rtio::o_address_write(addr as _); rtio_o_data_write(0, data as _); - csr::rtio::o_we_write(1); let status = csr::rtio::o_status_read(); if status != 0 { - process_exceptional_status(timestamp, channel, status); + process_exceptional_status(timestamp, target >> 8, status); } } } - pub extern fn output_wide(timestamp: i64, channel: i32, addr: i32, data: CSlice) { + pub extern fn output_wide(timestamp: i64, target: i32, data: CSlice) { unsafe { - csr::rtio::chan_sel_write(channel as _); + csr::rtio::target_write(target as u32); // writing timestamp clears o_data csr::rtio::timestamp_write(timestamp as u64); - csr::rtio::o_address_write(addr as _); for i in 0..data.len() { rtio_o_data_write(i, data[i] as _) } - csr::rtio::o_we_write(1); let status = csr::rtio::o_status_read(); if status != 0 { - process_exceptional_status(timestamp, channel, status); + process_exceptional_status(timestamp, target >> 8, status); } } } pub extern fn input_timestamp(timeout: i64, channel: i32) -> u64 { unsafe { - csr::rtio::chan_sel_write(channel as _); + csr::rtio::target_write((channel as u32) << 8); csr::rtio::timestamp_write(timeout as u64); csr::rtio::i_request_write(1); @@ -130,7 +127,7 @@ mod imp { pub extern fn input_data(channel: i32) -> i32 { unsafe { - csr::rtio::chan_sel_write(channel as _); + csr::rtio::target_write((channel as u32) << 8); csr::rtio::timestamp_write(0xffffffff_ffffffff); csr::rtio::i_request_write(1); @@ -158,7 +155,7 @@ mod imp { #[cfg(has_rtio_log)] pub fn log(timestamp: i64, data: &[u8]) { unsafe { - csr::rtio::chan_sel_write(csr::CONFIG_RTIO_LOG_CHANNEL); + csr::rtio::target_write(csr::CONFIG_RTIO_LOG_CHANNEL << 8); csr::rtio::timestamp_write(timestamp as u64); let mut word: u32 = 0; @@ -167,14 +164,12 @@ mod imp { word |= data[i] as u32; if i % 4 == 3 { rtio_o_data_write(0, word); - csr::rtio::o_we_write(1); word = 0; } } if word != 0 { rtio_o_data_write(0, word); - csr::rtio::o_we_write(1); } } } diff --git a/artiq/gateware/rtio/cri.py b/artiq/gateware/rtio/cri.py index adafa8d29..bd1e336b3 100644 --- a/artiq/gateware/rtio/cri.py +++ b/artiq/gateware/rtio/cri.py @@ -32,7 +32,7 @@ layout = [ ("timestamp", 64, DIR_M_TO_S), ("o_data", 512, DIR_M_TO_S), - ("o_address", 16, DIR_M_TO_S), + ("o_address", 8, DIR_M_TO_S), # o_status bits: # <0:wait> <1:underflow> <2:destination unreachable> ("o_status", 3, DIR_S_TO_M), @@ -60,7 +60,7 @@ class Interface(Record): class KernelInitiator(Module, AutoCSR): def __init__(self, tsc, cri=None): - self.chan_sel = CSRStorage(24) + self.target = CSRStorage(32) # monotonic, may lag behind the counter in the IO clock domain, but # not be ahead of it. self.timestamp = CSRStorage(64) @@ -69,8 +69,6 @@ class KernelInitiator(Module, AutoCSR): # zero-extension of output event data by the gateware. When staging an # output event, always write timestamp before o_data. self.o_data = CSRStorage(512, write_from_dev=True) - self.o_address = CSRStorage(16) - self.o_we = CSR() self.o_status = CSRStatus(3) self.i_data = CSRStatus(32) @@ -90,14 +88,14 @@ class KernelInitiator(Module, AutoCSR): self.comb += [ self.cri.cmd.eq(commands["nop"]), - If(self.o_we.re, self.cri.cmd.eq(commands["write"])), + If(self.o_data.re, self.cri.cmd.eq(commands["write"])), If(self.i_request.re, self.cri.cmd.eq(commands["read"])), - self.cri.chan_sel.eq(self.chan_sel.storage), + self.cri.chan_sel.eq(self.target.storage[8:]), self.cri.timestamp.eq(self.timestamp.storage), self.cri.o_data.eq(self.o_data.storage), - self.cri.o_address.eq(self.o_address.storage), + self.cri.o_address.eq(self.target.storage[:8]), self.o_status.status.eq(self.cri.o_status), self.i_data.status.eq(self.cri.i_data), diff --git a/artiq/gateware/rtio/dma.py b/artiq/gateware/rtio/dma.py index 735d52f54..a538bb978 100644 --- a/artiq/gateware/rtio/dma.py +++ b/artiq/gateware/rtio/dma.py @@ -148,7 +148,7 @@ record_layout = [ ("length", 8), # of whole record (header+data) ("channel", 24), ("timestamp", 64), - ("address", 16), + ("address", 8), ("data", 512) # variable length ]