forked from M-Labs/artiq
gateware,runtime: optimize RTIO output interface
* reduce address to 8 bits * merge core, channel and address into 32-bit pre-computable "target" * merge we register into data register
This commit is contained in:
parent
ad0254c17b
commit
3d0c3cc1cf
|
@ -3,13 +3,12 @@ from artiq.language.types import TInt64, TInt32, TNone, TList
|
||||||
|
|
||||||
|
|
||||||
@syscall(flags={"nowrite"})
|
@syscall(flags={"nowrite"})
|
||||||
def rtio_output(time_mu: TInt64, channel: TInt32, addr: TInt32, data: TInt32
|
def rtio_output(time_mu: TInt64, target: TInt32, data: TInt32) -> TNone:
|
||||||
) -> TNone:
|
|
||||||
raise NotImplementedError("syscall not simulated")
|
raise NotImplementedError("syscall not simulated")
|
||||||
|
|
||||||
|
|
||||||
@syscall(flags={"nowrite"})
|
@syscall(flags={"nowrite"})
|
||||||
def rtio_output_wide(time_mu: TInt64, channel: TInt32, addr: TInt32,
|
def rtio_output_wide(time_mu: TInt64, target: TInt32,
|
||||||
data: TList(TInt32)) -> TNone:
|
data: TList(TInt32)) -> TNone:
|
||||||
raise NotImplementedError("syscall not simulated")
|
raise NotImplementedError("syscall not simulated")
|
||||||
|
|
||||||
|
|
|
@ -301,10 +301,10 @@ extern fn dma_record_stop(duration: i64) {
|
||||||
|
|
||||||
#[unwind(aborts)]
|
#[unwind(aborts)]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
|
unsafe fn dma_record_output_prepare(timestamp: i64, target: i32,
|
||||||
words: usize) -> &'static mut [u8] {
|
words: usize) -> &'static mut [u8] {
|
||||||
// See gateware/rtio/dma.py.
|
// See gateware/rtio/dma.py.
|
||||||
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2;
|
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/1;
|
||||||
let length = HEADER_LENGTH + /*data*/words * 4;
|
let length = HEADER_LENGTH + /*data*/words * 4;
|
||||||
|
|
||||||
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
|
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
|
||||||
|
@ -319,9 +319,9 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
|
||||||
|
|
||||||
header.copy_from_slice(&[
|
header.copy_from_slice(&[
|
||||||
(length >> 0) as u8,
|
(length >> 0) as u8,
|
||||||
(channel >> 0) as u8,
|
(target >> 8) as u8,
|
||||||
(channel >> 8) as u8,
|
(target >> 16) as u8,
|
||||||
(channel >> 16) as u8,
|
(target >> 24) as u8,
|
||||||
(timestamp >> 0) as u8,
|
(timestamp >> 0) as u8,
|
||||||
(timestamp >> 8) as u8,
|
(timestamp >> 8) as u8,
|
||||||
(timestamp >> 16) as u8,
|
(timestamp >> 16) as u8,
|
||||||
|
@ -330,17 +330,16 @@ unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
|
||||||
(timestamp >> 40) as u8,
|
(timestamp >> 40) as u8,
|
||||||
(timestamp >> 48) as u8,
|
(timestamp >> 48) as u8,
|
||||||
(timestamp >> 56) as u8,
|
(timestamp >> 56) as u8,
|
||||||
(address >> 0) as u8,
|
(target >> 0) as u8,
|
||||||
(address >> 8) as u8,
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
data
|
data
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unwind(aborts)]
|
#[unwind(aborts)]
|
||||||
extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) {
|
extern fn dma_record_output(timestamp: i64, target: i32, word: i32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
let data = dma_record_output_prepare(timestamp, channel, address, 1);
|
let data = dma_record_output_prepare(timestamp, target, 1);
|
||||||
data.copy_from_slice(&[
|
data.copy_from_slice(&[
|
||||||
(word >> 0) as u8,
|
(word >> 0) as u8,
|
||||||
(word >> 8) as u8,
|
(word >> 8) as u8,
|
||||||
|
@ -351,11 +350,11 @@ extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i3
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unwind(aborts)]
|
#[unwind(aborts)]
|
||||||
extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice<i32>) {
|
extern fn dma_record_output_wide(timestamp: i64, target: i32, words: CSlice<i32>) {
|
||||||
assert!(words.len() <= 16); // enforce the hardware limit
|
assert!(words.len() <= 16); // enforce the hardware limit
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
let mut data = dma_record_output_prepare(timestamp, channel, address, 1);
|
let mut data = dma_record_output_prepare(timestamp, target, 1);
|
||||||
for word in words.as_ref().iter() {
|
for word in words.as_ref().iter() {
|
||||||
data[..4].copy_from_slice(&[
|
data[..4].copy_from_slice(&[
|
||||||
(word >> 0) as u8,
|
(word >> 0) as u8,
|
||||||
|
|
|
@ -36,6 +36,7 @@ mod imp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writing the LSB of o_data (offset=0) triggers the RTIO write
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub unsafe fn rtio_o_data_write(offset: usize, data: u32) {
|
pub unsafe fn rtio_o_data_write(offset: usize, data: u32) {
|
||||||
write_volatile(
|
write_volatile(
|
||||||
|
@ -66,41 +67,37 @@ mod imp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub extern fn output(timestamp: i64, channel: i32, addr: i32, data: i32) {
|
pub extern fn output(timestamp: i64, target: i32, data: i32) {
|
||||||
unsafe {
|
unsafe {
|
||||||
csr::rtio::chan_sel_write(channel as _);
|
csr::rtio::target_write(target as u32);
|
||||||
// writing timestamp clears o_data
|
// writing timestamp clears o_data
|
||||||
csr::rtio::timestamp_write(timestamp as u64);
|
csr::rtio::timestamp_write(timestamp as u64);
|
||||||
csr::rtio::o_address_write(addr as _);
|
|
||||||
rtio_o_data_write(0, data as _);
|
rtio_o_data_write(0, data as _);
|
||||||
csr::rtio::o_we_write(1);
|
|
||||||
let status = csr::rtio::o_status_read();
|
let status = csr::rtio::o_status_read();
|
||||||
if status != 0 {
|
if status != 0 {
|
||||||
process_exceptional_status(timestamp, channel, status);
|
process_exceptional_status(timestamp, target >> 8, status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub extern fn output_wide(timestamp: i64, channel: i32, addr: i32, data: CSlice<i32>) {
|
pub extern fn output_wide(timestamp: i64, target: i32, data: CSlice<i32>) {
|
||||||
unsafe {
|
unsafe {
|
||||||
csr::rtio::chan_sel_write(channel as _);
|
csr::rtio::target_write(target as u32);
|
||||||
// writing timestamp clears o_data
|
// writing timestamp clears o_data
|
||||||
csr::rtio::timestamp_write(timestamp as u64);
|
csr::rtio::timestamp_write(timestamp as u64);
|
||||||
csr::rtio::o_address_write(addr as _);
|
|
||||||
for i in 0..data.len() {
|
for i in 0..data.len() {
|
||||||
rtio_o_data_write(i, data[i] as _)
|
rtio_o_data_write(i, data[i] as _)
|
||||||
}
|
}
|
||||||
csr::rtio::o_we_write(1);
|
|
||||||
let status = csr::rtio::o_status_read();
|
let status = csr::rtio::o_status_read();
|
||||||
if status != 0 {
|
if status != 0 {
|
||||||
process_exceptional_status(timestamp, channel, status);
|
process_exceptional_status(timestamp, target >> 8, status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub extern fn input_timestamp(timeout: i64, channel: i32) -> u64 {
|
pub extern fn input_timestamp(timeout: i64, channel: i32) -> u64 {
|
||||||
unsafe {
|
unsafe {
|
||||||
csr::rtio::chan_sel_write(channel as _);
|
csr::rtio::target_write((channel as u32) << 8);
|
||||||
csr::rtio::timestamp_write(timeout as u64);
|
csr::rtio::timestamp_write(timeout as u64);
|
||||||
csr::rtio::i_request_write(1);
|
csr::rtio::i_request_write(1);
|
||||||
|
|
||||||
|
@ -130,7 +127,7 @@ mod imp {
|
||||||
|
|
||||||
pub extern fn input_data(channel: i32) -> i32 {
|
pub extern fn input_data(channel: i32) -> i32 {
|
||||||
unsafe {
|
unsafe {
|
||||||
csr::rtio::chan_sel_write(channel as _);
|
csr::rtio::target_write((channel as u32) << 8);
|
||||||
csr::rtio::timestamp_write(0xffffffff_ffffffff);
|
csr::rtio::timestamp_write(0xffffffff_ffffffff);
|
||||||
csr::rtio::i_request_write(1);
|
csr::rtio::i_request_write(1);
|
||||||
|
|
||||||
|
@ -158,7 +155,7 @@ mod imp {
|
||||||
#[cfg(has_rtio_log)]
|
#[cfg(has_rtio_log)]
|
||||||
pub fn log(timestamp: i64, data: &[u8]) {
|
pub fn log(timestamp: i64, data: &[u8]) {
|
||||||
unsafe {
|
unsafe {
|
||||||
csr::rtio::chan_sel_write(csr::CONFIG_RTIO_LOG_CHANNEL);
|
csr::rtio::target_write(csr::CONFIG_RTIO_LOG_CHANNEL << 8);
|
||||||
csr::rtio::timestamp_write(timestamp as u64);
|
csr::rtio::timestamp_write(timestamp as u64);
|
||||||
|
|
||||||
let mut word: u32 = 0;
|
let mut word: u32 = 0;
|
||||||
|
@ -167,14 +164,12 @@ mod imp {
|
||||||
word |= data[i] as u32;
|
word |= data[i] as u32;
|
||||||
if i % 4 == 3 {
|
if i % 4 == 3 {
|
||||||
rtio_o_data_write(0, word);
|
rtio_o_data_write(0, word);
|
||||||
csr::rtio::o_we_write(1);
|
|
||||||
word = 0;
|
word = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if word != 0 {
|
if word != 0 {
|
||||||
rtio_o_data_write(0, word);
|
rtio_o_data_write(0, word);
|
||||||
csr::rtio::o_we_write(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ layout = [
|
||||||
("timestamp", 64, DIR_M_TO_S),
|
("timestamp", 64, DIR_M_TO_S),
|
||||||
|
|
||||||
("o_data", 512, DIR_M_TO_S),
|
("o_data", 512, DIR_M_TO_S),
|
||||||
("o_address", 16, DIR_M_TO_S),
|
("o_address", 8, DIR_M_TO_S),
|
||||||
# o_status bits:
|
# o_status bits:
|
||||||
# <0:wait> <1:underflow> <2:destination unreachable>
|
# <0:wait> <1:underflow> <2:destination unreachable>
|
||||||
("o_status", 3, DIR_S_TO_M),
|
("o_status", 3, DIR_S_TO_M),
|
||||||
|
@ -60,7 +60,7 @@ class Interface(Record):
|
||||||
|
|
||||||
class KernelInitiator(Module, AutoCSR):
|
class KernelInitiator(Module, AutoCSR):
|
||||||
def __init__(self, tsc, cri=None):
|
def __init__(self, tsc, cri=None):
|
||||||
self.chan_sel = CSRStorage(24)
|
self.target = CSRStorage(32)
|
||||||
# monotonic, may lag behind the counter in the IO clock domain, but
|
# monotonic, may lag behind the counter in the IO clock domain, but
|
||||||
# not be ahead of it.
|
# not be ahead of it.
|
||||||
self.timestamp = CSRStorage(64)
|
self.timestamp = CSRStorage(64)
|
||||||
|
@ -69,8 +69,6 @@ class KernelInitiator(Module, AutoCSR):
|
||||||
# zero-extension of output event data by the gateware. When staging an
|
# zero-extension of output event data by the gateware. When staging an
|
||||||
# output event, always write timestamp before o_data.
|
# output event, always write timestamp before o_data.
|
||||||
self.o_data = CSRStorage(512, write_from_dev=True)
|
self.o_data = CSRStorage(512, write_from_dev=True)
|
||||||
self.o_address = CSRStorage(16)
|
|
||||||
self.o_we = CSR()
|
|
||||||
self.o_status = CSRStatus(3)
|
self.o_status = CSRStatus(3)
|
||||||
|
|
||||||
self.i_data = CSRStatus(32)
|
self.i_data = CSRStatus(32)
|
||||||
|
@ -90,14 +88,14 @@ class KernelInitiator(Module, AutoCSR):
|
||||||
|
|
||||||
self.comb += [
|
self.comb += [
|
||||||
self.cri.cmd.eq(commands["nop"]),
|
self.cri.cmd.eq(commands["nop"]),
|
||||||
If(self.o_we.re, self.cri.cmd.eq(commands["write"])),
|
If(self.o_data.re, self.cri.cmd.eq(commands["write"])),
|
||||||
If(self.i_request.re, self.cri.cmd.eq(commands["read"])),
|
If(self.i_request.re, self.cri.cmd.eq(commands["read"])),
|
||||||
|
|
||||||
self.cri.chan_sel.eq(self.chan_sel.storage),
|
self.cri.chan_sel.eq(self.target.storage[8:]),
|
||||||
self.cri.timestamp.eq(self.timestamp.storage),
|
self.cri.timestamp.eq(self.timestamp.storage),
|
||||||
|
|
||||||
self.cri.o_data.eq(self.o_data.storage),
|
self.cri.o_data.eq(self.o_data.storage),
|
||||||
self.cri.o_address.eq(self.o_address.storage),
|
self.cri.o_address.eq(self.target.storage[:8]),
|
||||||
self.o_status.status.eq(self.cri.o_status),
|
self.o_status.status.eq(self.cri.o_status),
|
||||||
|
|
||||||
self.i_data.status.eq(self.cri.i_data),
|
self.i_data.status.eq(self.cri.i_data),
|
||||||
|
|
|
@ -148,7 +148,7 @@ record_layout = [
|
||||||
("length", 8), # of whole record (header+data)
|
("length", 8), # of whole record (header+data)
|
||||||
("channel", 24),
|
("channel", 24),
|
||||||
("timestamp", 64),
|
("timestamp", 64),
|
||||||
("address", 16),
|
("address", 8),
|
||||||
("data", 512) # variable length
|
("data", 512) # variable length
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue