From 38d60100ffa14c1a8ac0818aabd13288d02ca459 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 12 Aug 2018 19:12:36 +0000 Subject: [PATCH] firmware: optimize dma_record_output. This removes a number of bounds checks and adds a fast path for outputting exactly one word to DMA, which is the most common operation. --- artiq/firmware/ksupport/lib.rs | 67 ++++++++++++++++++------------ artiq/test/coredevice/test_rtio.py | 2 +- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/artiq/firmware/ksupport/lib.rs b/artiq/firmware/ksupport/lib.rs index bd4552dd2..d4a6615d2 100644 --- a/artiq/firmware/ksupport/lib.rs +++ b/artiq/firmware/ksupport/lib.rs @@ -1,5 +1,5 @@ #![feature(lang_items, asm, panic_unwind, libc, unwind_attributes, - panic_implementation, panic_info_message)] + panic_implementation, panic_info_message, nll)] #![no_std] extern crate libc; @@ -300,19 +300,24 @@ extern fn dma_record_stop(duration: i64) { } #[unwind(aborts)] -extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) { - dma_record_output_wide(timestamp, channel, address, [word].as_c_slice()) -} - -#[unwind(aborts)] -extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice) { - assert!(words.len() <= 16); // enforce the hardware limit - +#[inline(always)] +unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32, + words: usize) -> &'static mut [u8] { // See gateware/rtio/dma.py. - let header_length = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2; - let length = header_length + /*data*/words.len() * 4; + const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2; + let length = HEADER_LENGTH + /*data*/words * 4; - let header = [ + if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length { + dma_record_flush() + } + + let record = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len.. + DMA_RECORDER.data_len + length]; + DMA_RECORDER.data_len += length; + + let (header, data) = record.split_at_mut(HEADER_LENGTH); + + header.copy_from_slice(&[ (length >> 0) as u8, (channel >> 0) as u8, (channel >> 8) as u8, @@ -327,29 +332,39 @@ extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, wor (timestamp >> 56) as u8, (address >> 0) as u8, (address >> 8) as u8, - ]; + ]); - let mut data = [0; 16 * 4]; - for (i, &word) in words.as_ref().iter().enumerate() { - let part = [ + data +} + +#[unwind(aborts)] +extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) { + unsafe { + let data = dma_record_output_prepare(timestamp, channel, address, 1); + data.copy_from_slice(&[ (word >> 0) as u8, (word >> 8) as u8, (word >> 16) as u8, (word >> 24) as u8, - ]; - data[i * 4..(i + 1) * 4].copy_from_slice(&part[..]); + ]); } - let data = &data[..words.len() * 4]; +} + +#[unwind(aborts)] +extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice) { + assert!(words.len() <= 16); // enforce the hardware limit unsafe { - if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length { - dma_record_flush() + let mut data = dma_record_output_prepare(timestamp, channel, address, 1); + for word in words.as_ref().iter() { + data[..4].copy_from_slice(&[ + (word >> 0) as u8, + (word >> 8) as u8, + (word >> 16) as u8, + (word >> 24) as u8, + ]); + data = &mut data[4..]; } - let dst = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len.. - DMA_RECORDER.data_len + length]; - dst[..header_length].copy_from_slice(&header[..]); - dst[header_length..].copy_from_slice(&data[..]); - DMA_RECORDER.data_len += length; } } diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 8a2a7f6e6..7556302be 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -641,7 +641,7 @@ class DMATest(ExperimentCase): exp.record_many(count) dt = self.dataset_mgr.get("dma_record_time") print("dt={}, dt/count={}".format(dt, dt/count)) - self.assertLess(dt/count, 20*us) + self.assertLess(dt/count, 11*us) def test_dma_playback_time(self): # Skip on Kasli until #946 is resolved.