diff --git a/artiq/firmware/ksupport/lib.rs b/artiq/firmware/ksupport/lib.rs index bddbe3651..fd2027185 100644 --- a/artiq/firmware/ksupport/lib.rs +++ b/artiq/firmware/ksupport/lib.rs @@ -238,11 +238,33 @@ extern fn i2c_read(busno: i32, ack: bool) -> i32 { recv!(&I2cReadReply { data } => data) as i32 } -static mut DMA_RECORDING: bool = false; +const DMA_BUFFER_SIZE: usize = 64 * 1024; + +struct DmaRecorder { + active: bool, + #[allow(dead_code)] + padding: [u8; 3], //https://github.com/rust-lang/rust/issues/41315 + data_len: usize, + buffer: [u8; DMA_BUFFER_SIZE], +} + +static mut DMA_RECORDER: DmaRecorder = DmaRecorder { + active: false, + padding: [0; 3], + data_len: 0, + buffer: [0; DMA_BUFFER_SIZE], +}; + +fn dma_record_flush() { + unsafe { + send(&DmaRecordAppend(&DMA_RECORDER.buffer[..DMA_RECORDER.data_len])); + DMA_RECORDER.data_len = 0; + } +} extern fn dma_record_start() { unsafe { - if DMA_RECORDING { + if DMA_RECORDER.active { raise!("DMAError", "DMA is already recording") } @@ -252,7 +274,7 @@ extern fn dma_record_start() { library.rebind(b"rtio_output_wide", dma_record_output_wide as *const () as u32).unwrap(); - DMA_RECORDING = true; + DMA_RECORDER.active = true; send(&DmaRecordStart); } } @@ -261,7 +283,9 @@ extern fn dma_record_stop(name: CSlice, duration: i64) { let name = str::from_utf8(name.as_ref()).unwrap(); unsafe { - if !DMA_RECORDING { + dma_record_flush(); + + if !DMA_RECORDER.active { raise!("DMAError", "DMA is not recording") } @@ -271,7 +295,7 @@ extern fn dma_record_stop(name: CSlice, duration: i64) { library.rebind(b"rtio_output_wide", rtio::output_wide as *const () as u32).unwrap(); - DMA_RECORDING = false; + DMA_RECORDER.active = false; send(&DmaRecordStop { name: name, duration: duration as u64 @@ -279,23 +303,56 @@ extern fn dma_record_stop(name: CSlice, duration: i64) { } } -extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, data: i32) { - send(&DmaRecordAppend { - timestamp: timestamp as u64, - channel: channel as u32, - address: address as u32, - data: &[data as u32] - }) +extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) { + dma_record_output_wide(timestamp, channel, address, [word].as_c_slice()) } -extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, data: CSlice) { - assert!(data.len() <= 16); // enforce the hardware limit - send(&DmaRecordAppend { - timestamp: timestamp as u64, - channel: channel as u32, - address: address as u32, - data: unsafe { mem::transmute::<&[i32], &[u32]>(data.as_ref()) } - }) +extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice) { + assert!(words.len() <= 16); // enforce the hardware limit + + // See gateware/rtio/dma.py. + let header_length = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2; + let length = header_length + /*data*/words.len() * 4; + + let header = [ + (length >> 0) as u8, + (channel >> 0) as u8, + (channel >> 8) as u8, + (channel >> 16) as u8, + (timestamp >> 0) as u8, + (timestamp >> 8) as u8, + (timestamp >> 16) as u8, + (timestamp >> 24) as u8, + (timestamp >> 32) as u8, + (timestamp >> 40) as u8, + (timestamp >> 48) as u8, + (timestamp >> 56) as u8, + (address >> 0) as u8, + (address >> 8) as u8, + ]; + + let mut data = [0; 16 * 4]; + for (i, &word) in words.as_ref().iter().enumerate() { + let part = [ + (word >> 0) as u8, + (word >> 8) as u8, + (word >> 16) as u8, + (word >> 24) as u8, + ]; + data[i * 4..(i + 1) * 4].copy_from_slice(&part[..]); + } + let data = &data[..words.len() * 4]; + + unsafe { + if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length { + dma_record_flush() + } + let mut dst = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len.. + DMA_RECORDER.data_len + length]; + dst[..header_length].copy_from_slice(&header[..]); + dst[header_length..].copy_from_slice(&data[..]); + DMA_RECORDER.data_len += length; + } } extern fn dma_erase(name: CSlice) { diff --git a/artiq/firmware/libproto/kernel_proto.rs b/artiq/firmware/libproto/kernel_proto.rs index 36adc462d..cc8ca12ba 100644 --- a/artiq/firmware/libproto/kernel_proto.rs +++ b/artiq/firmware/libproto/kernel_proto.rs @@ -6,7 +6,6 @@ pub const KERNELCPU_PAYLOAD_ADDRESS: usize = 0x40840000; pub const KERNELCPU_LAST_ADDRESS: usize = 0x4fffffff; pub const KSUPPORT_HEADER_SIZE: usize = 0x80; -#[repr(C)] #[derive(Debug, Clone)] pub struct Exception<'a> { pub name: &'a str, @@ -30,12 +29,7 @@ pub enum Message<'a> { RtioInitRequest, DmaRecordStart, - DmaRecordAppend { - timestamp: u64, - channel: u32, - address: u32, - data: &'a [u32] - }, + DmaRecordAppend(&'a [u8]), DmaRecordStop { name: &'a str, duration: u64 diff --git a/artiq/firmware/runtime/rtio_dma.rs b/artiq/firmware/runtime/rtio_dma.rs index 118048ec2..e0dc86e41 100644 --- a/artiq/firmware/runtime/rtio_dma.rs +++ b/artiq/firmware/runtime/rtio_dma.rs @@ -31,36 +31,8 @@ impl Manager { self.recording = Vec::new(); } - pub fn record_append(&mut self, timestamp: u64, channel: u32, - address: u32, data: &[u32]) { - let writer = &mut self.recording; - // See gateware/rtio/dma.py. - let length = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2 + - /*data*/data.len() * 4; - writer.write_all(&[ - (length >> 0) as u8, - (channel >> 0) as u8, - (channel >> 8) as u8, - (channel >> 16) as u8, - (timestamp >> 0) as u8, - (timestamp >> 8) as u8, - (timestamp >> 16) as u8, - (timestamp >> 24) as u8, - (timestamp >> 32) as u8, - (timestamp >> 40) as u8, - (timestamp >> 48) as u8, - (timestamp >> 56) as u8, - (address >> 0) as u8, - (address >> 8) as u8, - ]).unwrap(); - for &word in data { - writer.write_all(&[ - (word >> 0) as u8, - (word >> 8) as u8, - (word >> 16) as u8, - (word >> 24) as u8, - ]).unwrap(); - } + pub fn record_append(&mut self, data: &[u8]) { + self.recording.write_all(data).unwrap(); } pub fn record_stop(&mut self, name: &str, duration: u64) { diff --git a/artiq/firmware/runtime/session.rs b/artiq/firmware/runtime/session.rs index 871cd15e4..db58ed655 100644 --- a/artiq/firmware/runtime/session.rs +++ b/artiq/firmware/runtime/session.rs @@ -129,6 +129,13 @@ fn host_write(stream: &mut Write, reply: host::Reply) -> io::Result<()> { fn kern_send(io: &Io, request: &kern::Message) -> io::Result<()> { match request { &kern::LoadRequest(_) => debug!("comm->kern LoadRequest(...)"), + &kern::DmaPlaybackReply { trace, duration } => { + if trace.map(|data| data.len() > 100).unwrap_or(false) { + debug!("comm->kern DmaPlaybackReply {{ trace: ..., duration: {:?} }}", duration) + } else { + debug!("comm->kern {:?}", request) + } + } _ => debug!("comm->kern {:?}", request) } unsafe { mailbox::send(request as *const _ as usize) } @@ -150,6 +157,13 @@ fn kern_recv_dotrace(reply: &kern::Message) { match reply { &kern::Log(_) => debug!("comm<-kern Log(...)"), &kern::LogSlice(_) => debug!("comm<-kern LogSlice(...)"), + &kern::DmaRecordAppend(data) => { + if data.len() > 100 { + debug!("comm<-kern DmaRecordAppend(...)") + } else { + debug!("comm<-kern {:?}", reply) + } + } _ => debug!("comm<-kern {:?}", reply) } } @@ -384,8 +398,8 @@ fn process_kern_message(io: &Io, mut stream: Option<&mut TcpStream>, session.congress.dma_manager.record_start(); kern_acknowledge() } - &kern::DmaRecordAppend { timestamp, channel, address, data } => { - session.congress.dma_manager.record_append(timestamp, channel, address, data); + &kern::DmaRecordAppend(data) => { + session.congress.dma_manager.record_append(data); kern_acknowledge() } &kern::DmaRecordStop { name, duration } => { diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 8f954e575..d0a1bb09b 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -488,6 +488,18 @@ class _DMA(EnvExperiment): delay(100*ns) self.ttl1.off() + @kernel + def record_many(self, n): + t1 = self.core.get_rtio_counter_mu() + with self.core_dma.record(self.trace_name): + for i in range(n//2): + delay(100*ns) + self.ttl1.on() + delay(100*ns) + self.ttl1.off() + t2 = self.core.get_rtio_counter_mu() + self.set_dataset("dma_record_time", self.core.mu_to_seconds(t2 - t1)) + @kernel def replay(self): self.core.break_realtime() @@ -555,3 +567,11 @@ class DMATest(ExperimentCase): exp.record() exp.replay_delta() self.assertEqual(exp.delta, 200) + + def test_dma_record_time(self): + exp = self.create(_DMA) + count = 20000 + exp.record_many(count) + dt = self.dataset_mgr.get("dma_record_time") + print("dt={}, dt/count={}".format(dt, dt/count)) + self.assertLess(dt/count, 15*us)