diff --git a/src/gateware/acpki.py b/src/gateware/acpki.py index 4f66523..df8436e 100644 --- a/src/gateware/acpki.py +++ b/src/gateware/acpki.py @@ -7,8 +7,7 @@ from misoc.interconnect.csr import * from artiq.gateware import rtio - -OUT_BURST_LEN = 4 +OUT_BURST_LEN = 10 IN_BURST_LEN = 4 @@ -98,7 +97,7 @@ class Engine(Module, AutoCSR): ### Write self.comb += [ w.data.eq(self.din), - aw.addr.eq(self.addr_base.storage+32), # Write to next cache line + aw.addr.eq(self.addr_base.storage+96), w.strb.eq(0xff), aw.burst.eq(axi.Burst.incr.value), aw.len.eq(IN_BURST_LEN-1), # Number of transfers in burst minus 1 @@ -178,7 +177,7 @@ class KernelInitiator(Module, AutoCSR): evento_latched = Signal() evento_latched_d = Signal() self.specials += MultiReg(evento, evento_latched) - self.sync += evento_latched_d.eq(evento_latched) + self.sync += [evento_latched_d.eq(evento_latched)] self.comb += self.engine.trigger_stb.eq(self.enable.storage & (evento_latched != evento_latched_d)) cri = self.cri @@ -191,16 +190,24 @@ class KernelInitiator(Module, AutoCSR): cmd_read.eq(cmd == 1) ] + out_len = Signal(8) dout_cases = {} dout_cases[0] = [ cmd.eq(self.engine.dout[:8]), + out_len.eq(self.engine.dout[8:16]), cri.chan_sel.eq(self.engine.dout[40:]), cri.o_address.eq(self.engine.dout[32:40]) ] + for i in range(8): + target = cri.o_data[i*64:(i+1)*64] + dout_cases[0] += [If(i >= self.engine.dout[8:16], target.eq(0))] + dout_cases[1] = [ cri.o_timestamp.eq(self.engine.dout) ] - dout_cases[2] = [cri.o_data.eq(self.engine.dout)] # only lowest 64 bits + for i in range(8): + target = cri.o_data[i*64:(i+1)*64] + dout_cases[i+2] = [target.eq(self.engine.dout)] self.sync += [ cri.cmd.eq(rtio.cri.commands["nop"]), @@ -226,7 +233,7 @@ class KernelInitiator(Module, AutoCSR): ) fsm.act("WAIT_OUT_CYCLE", self.engine.din_ready.eq(0), - If(self.engine.dout_stb & (self.engine.dout_index == 3), + If(self.engine.dout_stb & (self.engine.dout_index == out_len + 2), NextState("WAIT_READY") ) ) diff --git a/src/runtime/src/kernel/core1.rs b/src/runtime/src/kernel/core1.rs index 15cf156..7985bb4 100644 --- a/src/runtime/src/kernel/core1.rs +++ b/src/runtime/src/kernel/core1.rs @@ -14,7 +14,7 @@ use libcortex_a9::{ use libboard_zynq::{mpcore, gic}; use libsupport_zynq::ram; use dyld::{self, Library}; -use crate::eh_artiq; +use crate::{eh_artiq, rtio}; use super::{ api::resolve, rpc::rpc_send_async, @@ -151,6 +151,12 @@ pub fn main_core1() { INIT_LOCK.lock(); core0_tx.reset(); core1_tx.reset(); + if !KERNEL_IMAGE.is_null() { + // indicates forceful termination of previous kernel + KERNEL_IMAGE = core::ptr::null(); + debug!("rtio init"); + rtio::init(); + } dma::init_dma_recorder(); } *CHANNEL_0TO1.lock() = Some(core0_tx); diff --git a/src/runtime/src/rtio_acp.rs b/src/runtime/src/rtio_acp.rs index 7cc0b6e..727933d 100644 --- a/src/runtime/src/rtio_acp.rs +++ b/src/runtime/src/rtio_acp.rs @@ -1,8 +1,9 @@ -use cslice::CSlice; +use cslice::{CSlice, AsCSlice}; use vcell::VolatileCell; -use libcortex_a9::asm; - +use libcortex_a9::{asm, cache::dcci}; +use log::debug; use crate::artiq_raise; +use core::sync::atomic::{fence, Ordering}; use crate::pl::csr; @@ -20,33 +21,33 @@ pub struct TimestampedData { data: i32, } -#[repr(C, align(32))] +#[repr(C, align(64))] struct Transaction { request_cmd: i8, - padding0: i8, - padding1: i8, - padding2: i8, + data_width: i8, + padding0: [i8; 2], request_target: i32, request_timestamp: i64, - request_data: i64, - padding: i64, + request_data: [i32; 16], + padding1: [i64; 2], reply_status: VolatileCell, reply_data: VolatileCell, - reply_timestamp: VolatileCell + reply_timestamp: VolatileCell, + padding2: [i64; 2], } static mut TRANSACTION_BUFFER: Transaction = Transaction { request_cmd: 0, - padding0: 0, - padding1: 0, - padding2: 0, + data_width: 0, request_target: 0, request_timestamp: 0, - request_data: 0, - padding: 0, + request_data: [0; 16], reply_status: VolatileCell::new(0), reply_data: VolatileCell::new(0), - reply_timestamp: VolatileCell::new(0) + reply_timestamp: VolatileCell::new(0), + padding0: [0; 2], + padding1: [0; 2], + padding2: [0; 2] }; pub extern fn init() { @@ -54,6 +55,8 @@ pub extern fn init() { csr::rtio_core::reset_write(1); csr::rtio::engine_addr_base_write(&TRANSACTION_BUFFER as *const Transaction as u32); csr::rtio::enable_write(1); + debug!("Set reply status"); + TRANSACTION_BUFFER.reply_status.set(0x1000); } } @@ -104,17 +107,40 @@ unsafe fn process_exceptional_status(channel: i32, status: i32) { pub extern fn output(target: i32, data: i32) { unsafe { + let mut status; + loop { + status = TRANSACTION_BUFFER.reply_status.get(); + if status != 0 { + break; + } + } + + let status = status & !0x10000; + if status != 0 { + process_exceptional_status(target >> 8, status); + } // Clear status so we can observe response TRANSACTION_BUFFER.reply_status.set(0); + // volatile are not used temporarily to allow the compiler to optimize better... + // probably would use it back later. TRANSACTION_BUFFER.request_cmd = 0; + TRANSACTION_BUFFER.data_width = 1; TRANSACTION_BUFFER.request_target = target; TRANSACTION_BUFFER.request_timestamp = NOW; - TRANSACTION_BUFFER.request_data = data as i64; + TRANSACTION_BUFFER.request_data[0] = data; - asm::dmb(); + fence(Ordering::SeqCst); asm::sev(); + dcci(&TRANSACTION_BUFFER.reply_status); + // asm::wfe(); + // optimize cache... + // asm::wfe(); + } +} +pub extern fn output_wide(target: i32, data: CSlice) { + unsafe { let mut status; loop { status = TRANSACTION_BUFFER.reply_status.get(); @@ -127,12 +153,19 @@ pub extern fn output(target: i32, data: i32) { if status != 0 { process_exceptional_status(target >> 8, status); } - } -} + // Clear status so we can observe response + TRANSACTION_BUFFER.reply_status.set(0); -pub extern fn output_wide(target: i32, data: CSlice) { - // TODO - unimplemented!(); + TRANSACTION_BUFFER.request_cmd = 0; + TRANSACTION_BUFFER.data_width = data.len() as i8; + TRANSACTION_BUFFER.request_target = target; + TRANSACTION_BUFFER.request_timestamp = NOW; + TRANSACTION_BUFFER.request_data[..data.len()].copy_from_slice(data.as_ref()); + + fence(Ordering::SeqCst); + asm::sev(); + dcci(&TRANSACTION_BUFFER.reply_status); + } } pub extern fn input_timestamp(timeout: i64, channel: i32) -> i64 { @@ -144,8 +177,9 @@ pub extern fn input_timestamp(timeout: i64, channel: i32) -> i64 { TRANSACTION_BUFFER.request_timestamp = NOW; TRANSACTION_BUFFER.request_target = channel << 8; - asm::dmb(); + fence(Ordering::SeqCst); asm::sev(); + dcci(&TRANSACTION_BUFFER.reply_status); let mut status; loop { @@ -181,8 +215,9 @@ pub extern fn input_data(channel: i32) -> i32 { TRANSACTION_BUFFER.request_timestamp = -1; TRANSACTION_BUFFER.request_target = channel << 8; - asm::dmb(); + fence(Ordering::SeqCst); asm::sev(); + dcci(&TRANSACTION_BUFFER.reply_status); let mut status; loop { @@ -215,8 +250,9 @@ pub extern fn input_timestamped_data(timeout: i64, channel: i32) -> TimestampedD TRANSACTION_BUFFER.request_timestamp = timeout; TRANSACTION_BUFFER.request_target = channel << 8; - asm::dmb(); + fence(Ordering::SeqCst); asm::sev(); + dcci(&TRANSACTION_BUFFER.reply_status); let mut status; loop {