From eb350c3459a97c0276b2d0f138302ead5c098fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Kulik?= Date: Mon, 24 Aug 2020 15:32:04 +0200 Subject: [PATCH 01/26] Drive SFP0 TX_DISABLE low during startup (as was in Kasli v1.1). Fixes Ethernet on SFP modules with pullup on this line. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Kulik --- artiq/firmware/runtime/main.rs | 3 +++ artiq/firmware/satman/main.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/artiq/firmware/runtime/main.rs b/artiq/firmware/runtime/main.rs index fda9d37ba..73660b611 100644 --- a/artiq/firmware/runtime/main.rs +++ b/artiq/firmware/runtime/main.rs @@ -107,6 +107,9 @@ fn startup() { io_expander1 = board_misoc::io_expander::IoExpander::new(1); io_expander0.init().expect("I2C I/O expander #0 initialization failed"); io_expander1.init().expect("I2C I/O expander #1 initialization failed"); + io_expander0.set_oe(0, 1 << 1).unwrap(); + io_expander0.set(0, 1, false); + io_expander0.service().unwrap(); } rtio_clocking::init(); diff --git a/artiq/firmware/satman/main.rs b/artiq/firmware/satman/main.rs index 9e2144370..60f0f4c0a 100644 --- a/artiq/firmware/satman/main.rs +++ b/artiq/firmware/satman/main.rs @@ -475,6 +475,9 @@ pub extern fn main() -> i32 { io_expander1.set(1, 7, true); io_expander1.service().unwrap(); } + io_expander0.set_oe(0, 1 << 1).unwrap(); + io_expander0.set(0, 1, false); + io_expander0.service().unwrap(); } #[cfg(has_si5324)] From cfddc1329410b135f480b0a40e8108e43da060d1 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 18 Aug 2020 16:29:28 +0800 Subject: [PATCH 02/26] test: fixed test_performance Added more tests and use normal rpc instead of async rpc. Async RPC does not represent the real throughput which is limited by the hardware and the network. Normal RPC which requires a response from the remote is closer to real usecases. --- artiq/test/coredevice/test_performance.py | 87 ++++++++++++++++++++--- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index 071c5d27c..c9ab98b01 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -1,6 +1,7 @@ import os import time import unittest +import numpy from artiq.experiment import * from artiq.test.hardware_testbench import ExperimentCase @@ -15,13 +16,29 @@ class _Transfer(EnvExperiment): def source(self) -> TBytes: return self.data - @rpc(flags={"async"}) - def sink(self, data): - assert data == self.data + @rpc + def source_byte_list(self) -> TList(TBool): + return [True] * (1 << 15) - @rpc(flags={"async"}) + @rpc + def source_list(self) -> TList(TInt32): + return [123] * (1 << 15) + + @rpc + def source_array(self) -> TArray(TInt32): + return numpy.array([0] * (1 << 15), numpy.int32) + + @rpc + def sink(self, data): + pass + + @rpc + def sink_list(self, data): + pass + + @rpc def sink_array(self, data): - assert data == [0]*(1 << 15) + pass @kernel def host_to_device(self): @@ -30,6 +47,27 @@ class _Transfer(EnvExperiment): t1 = self.core.get_rtio_counter_mu() return len(data)/self.core.mu_to_seconds(t1-t0) + @kernel + def host_to_device_list(self): + t0 = self.core.get_rtio_counter_mu() + data = self.source_list() + t1 = self.core.get_rtio_counter_mu() + return 4 * len(data)/self.core.mu_to_seconds(t1-t0) + + @kernel + def host_to_device_array(self): + t0 = self.core.get_rtio_counter_mu() + data = self.source_array() + t1 = self.core.get_rtio_counter_mu() + return 4 * len(data)/self.core.mu_to_seconds(t1-t0) + + @kernel + def host_to_device_byte_list(self): + t0 = self.core.get_rtio_counter_mu() + data = self.source_byte_list() + t1 = self.core.get_rtio_counter_mu() + return len(data)/self.core.mu_to_seconds(t1-t0) + @kernel def device_to_host(self): t0 = self.core.get_rtio_counter_mu() @@ -38,14 +76,23 @@ class _Transfer(EnvExperiment): return len(self.data)/self.core.mu_to_seconds(t1-t0) @kernel - def device_to_host_array(self): + def device_to_host_list(self): #data = [[0]*8 for _ in range(1 << 12)] data = [0]*(1 << 15) t0 = self.core.get_rtio_counter_mu() + self.sink_list(data) + t1 = self.core.get_rtio_counter_mu() + return ((len(data)*4) / + self.core.mu_to_seconds(t1-t0)) + + @kernel + def device_to_host_array(self): + data = self.source_array() + t0 = self.core.get_rtio_counter_mu() self.sink_array(data) t1 = self.core.get_rtio_counter_mu() - return ((len(data)*4)/ - self.core.mu_to_seconds(t1-t0)) + return ((len(data)*4) / + self.core.mu_to_seconds(t1-t0)) class TransferTest(ExperimentCase): @@ -55,12 +102,36 @@ class TransferTest(ExperimentCase): print(host_to_device_rate/(1024*1024), "MiB/s") self.assertGreater(host_to_device_rate, 2.0e6) + def test_host_to_device_byte_list(self): + exp = self.create(_Transfer) + host_to_device_rate = exp.host_to_device_byte_list() + print(host_to_device_rate/(1024*1024), "MiB/s") + self.assertGreater(host_to_device_rate, 2.0e6) + + def test_host_to_device_list(self): + exp = self.create(_Transfer) + host_to_device_rate = exp.host_to_device_list() + print(host_to_device_rate/(1024*1024), "MiB/s") + self.assertGreater(host_to_device_rate, 2.0e6) + + def test_host_to_device_array(self): + exp = self.create(_Transfer) + host_to_device_rate = exp.host_to_device_array() + print(host_to_device_rate/(1024*1024), "MiB/s") + self.assertGreater(host_to_device_rate, 2.0e6) + def test_device_to_host(self): exp = self.create(_Transfer) device_to_host_rate = exp.device_to_host() print(device_to_host_rate/(1024*1024), "MiB/s") self.assertGreater(device_to_host_rate, 2.2e6) + def test_device_to_host_list(self): + exp = self.create(_Transfer) + rate = exp.device_to_host_list() + print(rate/(1024*1024), "MiB/s") + self.assertGreater(rate, .15e6) + def test_device_to_host_array(self): exp = self.create(_Transfer) rate = exp.device_to_host_array() From 7181ff66a6fd7b3aecddf752b6c3eff5fddf5a7d Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 18 Aug 2020 17:01:28 +0800 Subject: [PATCH 03/26] compiler: improved rpc performance for list and array 1. Removed duplicated tags before each elements. 2. Use numpy functions to speedup parsing. --- artiq/coredevice/comm_kernel.py | 228 ++++++++++++++++++++++---------- 1 file changed, 158 insertions(+), 70 deletions(-) diff --git a/artiq/coredevice/comm_kernel.py b/artiq/coredevice/comm_kernel.py index 41bddd553..b28f79272 100644 --- a/artiq/coredevice/comm_kernel.py +++ b/artiq/coredevice/comm_kernel.py @@ -43,9 +43,11 @@ class Reply(Enum): class UnsupportedDevice(Exception): pass + class LoadError(Exception): pass + class RPCReturnValueError(ValueError): pass @@ -53,6 +55,105 @@ class RPCReturnValueError(ValueError): RPCKeyword = namedtuple('RPCKeyword', ['name', 'value']) +def _receive_fraction(kernel, embedding_map): + numerator = kernel._read_int64() + denominator = kernel._read_int64() + return Fraction(numerator, denominator) + + +def _receive_list(kernel, embedding_map): + length = kernel._read_int32() + tag = chr(kernel._read_int8()) + if tag == "b": + buffer = kernel._read(length) + return numpy.ndarray((length, ), 'B', buffer).tolist() + elif tag == "i": + buffer = kernel._read(4 * length) + return numpy.ndarray((length, ), '>i4', buffer).tolist() + elif tag == "I": + buffer = kernel._read(8 * length) + return numpy.ndarray((length, ), '>i8', buffer).tolist() + elif tag == "f": + buffer = kernel._read(8 * length) + return numpy.ndarray((length, ), '>d', buffer).tolist() + else: + fn = receivers[tag] + elems = [] + for _ in range(length): + # discard tag, as our device would still send the tag for each + # non-primitive elements. + kernel._read_int8() + item = fn(kernel, embedding_map) + elems.append(item) + return elems + + +def _receive_array(kernel, embedding_map): + num_dims = kernel._read_int8() + shape = tuple(kernel._read_int32() for _ in range(num_dims)) + tag = chr(kernel._read_int8()) + fn = receivers[tag] + length = numpy.prod(shape) + if tag == "b": + buffer = kernel._read(length) + elems = numpy.ndarray((length, ), 'B', buffer) + elif tag == "i": + buffer = kernel._read(4 * length) + elems = numpy.ndarray((length, ), '>i4', buffer) + elif tag == "I": + buffer = kernel._read(8 * length) + elems = numpy.ndarray((length, ), '>i8', buffer) + elif tag == "f": + buffer = kernel._read(8 * length) + elems = numpy.ndarray((length, ), '>d', buffer) + else: + fn = receivers[tag] + elems = [] + for _ in range(numpy.prod(shape)): + # discard the tag + kernel._read_int8() + item = fn(kernel, embedding_map) + elems.append(item) + elems = numpy.array(elems) + return elems.reshape(shape) + + +def _receive_range(kernel, embedding_map): + start = kernel._receive_rpc_value(embedding_map) + stop = kernel._receive_rpc_value(embedding_map) + step = kernel._receive_rpc_value(embedding_map) + return range(start, stop, step) + + +def _receive_keyword(kernel, embedding_map): + name = kernel._read_string() + value = kernel._receive_rpc_value(embedding_map) + return RPCKeyword(name, value) + + +receivers = { + "\x00": lambda kernel, embedding_map: kernel._rpc_sentinel, + "t": lambda kernel, embedding_map: + tuple(kernel._receive_rpc_value(embedding_map) + for _ in range(kernel._read_int8())), + "n": lambda kernel, embedding_map: None, + "b": lambda kernel, embedding_map: bool(kernel._read_int8()), + "i": lambda kernel, embedding_map: numpy.int32(kernel._read_int32()), + "I": lambda kernel, embedding_map: numpy.int32(kernel._read_int64()), + "f": lambda kernel, embedding_map: kernel._read_float64(), + "s": lambda kernel, embedding_map: kernel._read_string(), + "B": lambda kernel, embedding_map: kernel._read_bytes(), + "A": lambda kernel, embedding_map: kernel._read_bytes(), + "O": lambda kernel, embedding_map: + embedding_map.retrieve_object(kernel._read_int32()), + "F": _receive_fraction, + "l": _receive_list, + "a": _receive_array, + "r": _receive_range, + "k": _receive_keyword +} + + class CommKernelDummy: def __init__(self): pass @@ -247,50 +348,8 @@ class CommKernel: # See rpc_proto.rs and compiler/ir.py:rpc_tag. def _receive_rpc_value(self, embedding_map): tag = chr(self._read_int8()) - if tag == "\x00": - return self._rpc_sentinel - elif tag == "t": - length = self._read_int8() - return tuple(self._receive_rpc_value(embedding_map) for _ in range(length)) - elif tag == "n": - return None - elif tag == "b": - return bool(self._read_int8()) - elif tag == "i": - return numpy.int32(self._read_int32()) - elif tag == "I": - return numpy.int64(self._read_int64()) - elif tag == "f": - return self._read_float64() - elif tag == "F": - numerator = self._read_int64() - denominator = self._read_int64() - return Fraction(numerator, denominator) - elif tag == "s": - return self._read_string() - elif tag == "B": - return self._read_bytes() - elif tag == "A": - return self._read_bytes() - elif tag == "l": - length = self._read_int32() - return [self._receive_rpc_value(embedding_map) for _ in range(length)] - elif tag == "a": - num_dims = self._read_int8() - shape = tuple(self._read_int32() for _ in range(num_dims)) - elems = [self._receive_rpc_value(embedding_map) for _ in range(numpy.prod(shape))] - return numpy.array(elems).reshape(shape) - elif tag == "r": - start = self._receive_rpc_value(embedding_map) - stop = self._receive_rpc_value(embedding_map) - step = self._receive_rpc_value(embedding_map) - return range(start, stop, step) - elif tag == "k": - name = self._read_string() - value = self._receive_rpc_value(embedding_map) - return RPCKeyword(name, value) - elif tag == "O": - return embedding_map.retrieve_object(self._read_int32()) + if tag in receivers: + return receivers.get(tag)(self, embedding_map) else: raise IOError("Unknown RPC value tag: {}".format(repr(tag))) @@ -357,8 +416,8 @@ class CommKernel: self._write_float64(value) elif tag == "F": check(isinstance(value, Fraction) and - (-2**63 < value.numerator < 2**63-1) and - (-2**63 < value.denominator < 2**63-1), + (-2**63 < value.numerator < 2**63-1) and + (-2**63 < value.denominator < 2**63-1), lambda: "64-bit Fraction") self._write_int64(value.numerator) self._write_int64(value.denominator) @@ -378,21 +437,47 @@ class CommKernel: check(isinstance(value, list), lambda: "list") self._write_int32(len(value)) - for elt in value: - tags_copy = bytearray(tags) - self._send_rpc_value(tags_copy, elt, root, function) + tag_element = chr(tags[0]) + if tag_element == "b": + self._write(bytes(value)) + elif tag_element == "i": + array = numpy.array(value, '>i4') + self._write(array.tobytes()) + elif tag_element == "I": + array = numpy.array(value, '>i8') + self._write(array.tobytes()) + elif tag_element == "f": + array = numpy.array(value, '>d') + self._write(array.tobytes()) + else: + for elt in value: + tags_copy = bytearray(tags) + self._send_rpc_value(tags_copy, elt, root, function) self._skip_rpc_value(tags) elif tag == "a": check(isinstance(value, numpy.ndarray), lambda: "numpy.ndarray") num_dims = tags.pop(0) check(num_dims == len(value.shape), - lambda: "{}-dimensional numpy.ndarray".format(num_dims)) + lambda: "{}-dimensional numpy.ndarray".format(num_dims)) for s in value.shape: self._write_int32(s) - for elt in value.reshape((-1,), order="C"): - tags_copy = bytearray(tags) - self._send_rpc_value(tags_copy, elt, root, function) + tag_element = chr(tags[0]) + if tag_element == "b": + self._write(value.reshape((-1,), order="C").tobytes()) + elif tag_element == "i": + array = value.reshape((-1,), order="C").astype('>i4') + self._write(array.tobytes()) + elif tag_element == "I": + array = value.reshape((-1,), order="C").astype('>i8') + self._write(array.tobytes()) + elif tag_element == "f": + array = value.reshape((-1,), order="C").astype('>d') + self._write(array.tobytes()) + else: + for elt in value.reshape((-1,), order="C"): + tags_copy = bytearray(tags) + self._send_rpc_value(tags_copy, elt, root, function) self._skip_rpc_value(tags) elif tag == "r": check(isinstance(value, range), @@ -414,15 +499,15 @@ class CommKernel: return msg def _serve_rpc(self, embedding_map): - is_async = self._read_bool() - service_id = self._read_int32() + is_async = self._read_bool() + service_id = self._read_int32() args, kwargs = self._receive_rpc_args(embedding_map) - return_tags = self._read_bytes() + return_tags = self._read_bytes() if service_id == 0: - service = lambda obj, attr, value: setattr(obj, attr, value) + def service(obj, attr, value): return setattr(obj, attr, value) else: - service = embedding_map.retrieve_object(service_id) + service = embedding_map.retrieve_object(service_id) logger.debug("rpc service: [%d]%r%s %r %r -> %s", service_id, service, (" (async)" if is_async else ""), args, kwargs, return_tags) @@ -432,15 +517,18 @@ class CommKernel: try: result = service(*args, **kwargs) - logger.debug("rpc service: %d %r %r = %r", service_id, args, kwargs, result) + logger.debug("rpc service: %d %r %r = %r", + service_id, args, kwargs, result) self._write_header(Request.RPCReply) self._write_bytes(return_tags) - self._send_rpc_value(bytearray(return_tags), result, result, service) + self._send_rpc_value(bytearray(return_tags), + result, result, service) except RPCReturnValueError as exn: raise except Exception as exn: - logger.debug("rpc service: %d %r %r ! %r", service_id, args, kwargs, exn) + logger.debug("rpc service: %d %r %r ! %r", + service_id, args, kwargs, exn) self._write_header(Request.RPCException) @@ -479,23 +567,23 @@ class CommKernel: assert False self._write_string(filename) self._write_int32(line) - self._write_int32(-1) # column not known + self._write_int32(-1) # column not known self._write_string(function) def _serve_exception(self, embedding_map, symbolizer, demangler): - name = self._read_string() - message = self._read_string() - params = [self._read_int64() for _ in range(3)] + name = self._read_string() + message = self._read_string() + params = [self._read_int64() for _ in range(3)] - filename = self._read_string() - line = self._read_int32() - column = self._read_int32() - function = self._read_string() + filename = self._read_string() + line = self._read_int32() + column = self._read_int32() + function = self._read_string() backtrace = [self._read_int32() for _ in range(self._read_int32())] traceback = list(reversed(symbolizer(backtrace))) + \ - [(filename, line, column, *demangler([function]), None)] + [(filename, line, column, *demangler([function]), None)] core_exn = exceptions.CoreException(name, message, params, traceback) if core_exn.id == 0: From aac2194759892b8f384be4bb606ba2f21251b7ba Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 26 Aug 2020 11:32:11 +0800 Subject: [PATCH 04/26] Ported rpc changes to or1k --- artiq/firmware/libproto_artiq/lib.rs | 1 + artiq/firmware/libproto_artiq/rpc_proto.rs | 105 +++++++++++++++++++-- 2 files changed, 97 insertions(+), 9 deletions(-) diff --git a/artiq/firmware/libproto_artiq/lib.rs b/artiq/firmware/libproto_artiq/lib.rs index d8cbfe607..66c04d5e6 100644 --- a/artiq/firmware/libproto_artiq/lib.rs +++ b/artiq/firmware/libproto_artiq/lib.rs @@ -11,6 +11,7 @@ extern crate cslice; #[macro_use] extern crate log; +extern crate byteorder; extern crate io; extern crate dyld; diff --git a/artiq/firmware/libproto_artiq/rpc_proto.rs b/artiq/firmware/libproto_artiq/rpc_proto.rs index b35e6b905..84296a62c 100644 --- a/artiq/firmware/libproto_artiq/rpc_proto.rs +++ b/artiq/firmware/libproto_artiq/rpc_proto.rs @@ -1,6 +1,7 @@ use core::str; +use core::slice; use cslice::{CSlice, CMutSlice}; - +use byteorder::{NetworkEndian, ByteOrder}; use io::{ProtoRead, Read, Write, ProtoWrite, Error}; use self::tag::{Tag, TagIterator, split_tag}; @@ -53,13 +54,34 @@ unsafe fn recv_value(reader: &mut R, tag: Tag, data: &mut *mut (), struct List { elements: *mut (), length: u32 }; consume_value!(List, |ptr| { (*ptr).length = reader.read_u32()?; + let length = (*ptr).length as usize; let tag = it.clone().next().expect("truncated tag"); (*ptr).elements = alloc(tag.size() * (*ptr).length as usize)?; let mut data = (*ptr).elements; - for _ in 0..(*ptr).length as usize { - recv_value(reader, tag, &mut data, alloc)? + match tag { + Tag::Bool => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length); + reader.read_exact(dest)?; + }, + Tag::Int32 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i32, length); + NetworkEndian::from_slice_i32(dest); + }, + Tag::Int64 | Tag::Float64 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i64, length); + NetworkEndian::from_slice_i64(dest); + }, + _ => { + for _ in 0..length { + recv_value(reader, tag, &mut data, alloc)? + } + } } Ok(()) }) @@ -72,13 +94,34 @@ unsafe fn recv_value(reader: &mut R, tag: Tag, data: &mut *mut (), total_len *= len; consume_value!(u32, |ptr| *ptr = len ) } + let length = total_len as usize; let elt_tag = it.clone().next().expect("truncated tag"); *buffer = alloc(elt_tag.size() * total_len as usize)?; let mut data = *buffer; - for _ in 0..total_len { - recv_value(reader, elt_tag, &mut data, alloc)? + match elt_tag { + Tag::Bool => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length); + reader.read_exact(dest)?; + }, + Tag::Int32 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i32, length); + NetworkEndian::from_slice_i32(dest); + }, + Tag::Int64 | Tag::Float64 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i64, length); + NetworkEndian::from_slice_i64(dest); + }, + _ => { + for _ in 0..length { + recv_value(reader, elt_tag, &mut data, alloc)? + } + } } Ok(()) }) @@ -155,11 +198,33 @@ unsafe fn send_value(writer: &mut W, tag: Tag, data: &mut *const ()) #[repr(C)] struct List { elements: *const (), length: u32 }; consume_value!(List, |ptr| { + let length = (*ptr).length as usize; writer.write_u32((*ptr).length)?; let tag = it.clone().next().expect("truncated tag"); let mut data = (*ptr).elements; - for _ in 0..(*ptr).length as usize { - send_value(writer, tag, &mut data)?; + writer.write_u8(tag.as_u8())?; + match tag { + Tag::Bool => { + let slice = slice::from_raw_parts(data as *const u8, length); + writer.write_all(slice)?; + }, + Tag::Int32 => { + let slice = slice::from_raw_parts(data as *const u32, length); + for v in slice.iter() { + writer.write_u32(*v)?; + } + }, + Tag::Int64 | Tag::Float64 => { + let slice = slice::from_raw_parts(data as *const u64, length); + for v in slice.iter() { + writer.write_u64(*v)?; + } + }, + _ => { + for _ in 0..length { + send_value(writer, tag, &mut data)?; + } + } } Ok(()) }) @@ -176,9 +241,31 @@ unsafe fn send_value(writer: &mut W, tag: Tag, data: &mut *const ()) total_len *= *len; }) } + let length = total_len as usize; let mut data = *buffer; - for _ in 0..total_len as usize { - send_value(writer, elt_tag, &mut data)?; + writer.write_u8(elt_tag.as_u8())?; + match elt_tag { + Tag::Bool => { + let slice = slice::from_raw_parts(data as *const u8, length); + writer.write_all(slice)?; + }, + Tag::Int32 => { + let slice = slice::from_raw_parts(data as *const u32, length); + for v in slice.iter() { + writer.write_u32(*v)?; + } + }, + Tag::Int64 | Tag::Float64 => { + let slice = slice::from_raw_parts(data as *const u64, length); + for v in slice.iter() { + writer.write_u64(*v)?; + } + }, + _ => { + for _ in 0..length { + send_value(writer, elt_tag, &mut data)?; + } + } } Ok(()) }) From 26bc5d24058c36c85dea6328ee4207fcbd5e6fc1 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 26 Aug 2020 12:12:33 +0800 Subject: [PATCH 05/26] Updated release notes --- RELEASE_NOTES.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 7b9899d71..c6970f71d 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -11,6 +11,7 @@ Highlights: * Performance improvements: - #1432: SERDES TTL inputs can now detect edges on pulses that are shorter than the RTIO period + - Improved performance for kernel RPC involving list and array. * Coredevice SI to mu conversions now always return valid codes, or raise a `ValueError`. * Zotino now exposes `voltage_to_mu()` * `ad9910`: The maximum amplitude scale factor is now `0x3fff` (was `0x3ffe` From 7cf974a6a7e645ce0a3e58d55a369092646bd7f5 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 28 Aug 2020 12:25:23 +0800 Subject: [PATCH 06/26] comm_kernel: fix typo --- artiq/coredevice/comm_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artiq/coredevice/comm_kernel.py b/artiq/coredevice/comm_kernel.py index b28f79272..d5096cb69 100644 --- a/artiq/coredevice/comm_kernel.py +++ b/artiq/coredevice/comm_kernel.py @@ -139,7 +139,7 @@ receivers = { "n": lambda kernel, embedding_map: None, "b": lambda kernel, embedding_map: bool(kernel._read_int8()), "i": lambda kernel, embedding_map: numpy.int32(kernel._read_int32()), - "I": lambda kernel, embedding_map: numpy.int32(kernel._read_int64()), + "I": lambda kernel, embedding_map: numpy.int64(kernel._read_int64()), "f": lambda kernel, embedding_map: kernel._read_float64(), "s": lambda kernel, embedding_map: kernel._read_string(), "B": lambda kernel, embedding_map: kernel._read_bytes(), From 69f0699ebd8db2232a705f61a5655f99fba96edc Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 27 Aug 2020 13:06:09 +0800 Subject: [PATCH 07/26] test: improved test_performance 1. Added tests for small payload. 2. Added statistics. --- artiq/test/coredevice/test_performance.py | 310 +++++++++++++++------- 1 file changed, 220 insertions(+), 90 deletions(-) diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index c9ab98b01..3ca1f86ba 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -6,138 +6,268 @@ import numpy from artiq.experiment import * from artiq.test.hardware_testbench import ExperimentCase +# large: 1MB payload +# small: 1KB payload +bytes_large = b"\x00" * (1 << 20) +bytes_small = b"\x00" * (1 << 10) + +list_large = [123] * (1 << 18) +list_small = [123] * (1 << 8) + +array_large = numpy.array(list_large, numpy.int32) +array_small = numpy.array(list_small, numpy.int32) + +byte_list_large = [True] * (1 << 20) +byte_list_small = [True] * (1 << 10) + +received_bytes = 0 +time_start = 0 +time_end = 0 class _Transfer(EnvExperiment): def build(self): self.setattr_device("core") - self.data = b"\x00"*(10**6) + self.count = 10 + self.h2d = [0.0] * self.count + self.d2h = [0.0] * self.count @rpc - def source(self) -> TBytes: - return self.data + def get_bytes(self, large: TBool) -> TBytes: + if large: + return bytes_large + else: + return bytes_small @rpc - def source_byte_list(self) -> TList(TBool): - return [True] * (1 << 15) + def get_list(self, large: TBool) -> TList(TInt32): + if large: + return list_large + else: + return list_small @rpc - def source_list(self) -> TList(TInt32): - return [123] * (1 << 15) + def get_byte_list(self, large: TBool) -> TList(TBool): + if large: + return byte_list_large + else: + return byte_list_small @rpc - def source_array(self) -> TArray(TInt32): - return numpy.array([0] * (1 << 15), numpy.int32) + def get_array(self, large: TBool) -> TArray(TInt32): + if large: + return array_large + else: + return array_small + + @rpc + def get_string_list(self) -> TList(TStr): + return string_list @rpc def sink(self, data): pass - @rpc - def sink_list(self, data): - pass + @rpc(flags={"async"}) + def sink_async(self, data): + global received_bytes, time_start, time_end + if received_bytes == 0: + time_start = time.time() + received_bytes += len(data) + if received_bytes == (1024 ** 2)*128: + time_end = time.time() @rpc - def sink_array(self, data): - pass + def get_async_throughput(self) -> TFloat: + return 128.0 / (time_end - time_start) @kernel - def host_to_device(self): - t0 = self.core.get_rtio_counter_mu() - data = self.source() - t1 = self.core.get_rtio_counter_mu() - return len(data)/self.core.mu_to_seconds(t1-t0) + def test_bytes(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_bytes(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def host_to_device_list(self): - t0 = self.core.get_rtio_counter_mu() - data = self.source_list() - t1 = self.core.get_rtio_counter_mu() - return 4 * len(data)/self.core.mu_to_seconds(t1-t0) + def test_byte_list(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_byte_list(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def host_to_device_array(self): - t0 = self.core.get_rtio_counter_mu() - data = self.source_array() - t1 = self.core.get_rtio_counter_mu() - return 4 * len(data)/self.core.mu_to_seconds(t1-t0) + def test_list(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_list(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def host_to_device_byte_list(self): - t0 = self.core.get_rtio_counter_mu() - data = self.source_byte_list() - t1 = self.core.get_rtio_counter_mu() - return len(data)/self.core.mu_to_seconds(t1-t0) + def test_array(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_array(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def device_to_host(self): - t0 = self.core.get_rtio_counter_mu() - self.sink(self.data) - t1 = self.core.get_rtio_counter_mu() - return len(self.data)/self.core.mu_to_seconds(t1-t0) - - @kernel - def device_to_host_list(self): - #data = [[0]*8 for _ in range(1 << 12)] - data = [0]*(1 << 15) - t0 = self.core.get_rtio_counter_mu() - self.sink_list(data) - t1 = self.core.get_rtio_counter_mu() - return ((len(data)*4) / - self.core.mu_to_seconds(t1-t0)) - - @kernel - def device_to_host_array(self): - data = self.source_array() - t0 = self.core.get_rtio_counter_mu() - self.sink_array(data) - t1 = self.core.get_rtio_counter_mu() - return ((len(data)*4) / - self.core.mu_to_seconds(t1-t0)) - + def test_async(self): + data = self.get_bytes(True) + for _ in range(128): + self.sink_async(data) + return self.get_async_throughput() class TransferTest(ExperimentCase): - def test_host_to_device(self): - exp = self.create(_Transfer) - host_to_device_rate = exp.host_to_device() - print(host_to_device_rate/(1024*1024), "MiB/s") - self.assertGreater(host_to_device_rate, 2.0e6) + @classmethod + def setUpClass(self): + self.results = [] - def test_host_to_device_byte_list(self): - exp = self.create(_Transfer) - host_to_device_rate = exp.host_to_device_byte_list() - print(host_to_device_rate/(1024*1024), "MiB/s") - self.assertGreater(host_to_device_rate, 2.0e6) + @classmethod + def tearDownClass(self): + if len(self.results) == 0: + return + max_length = max(max(len(row[0]) for row in self.results), len("Test")) - def test_host_to_device_list(self): - exp = self.create(_Transfer) - host_to_device_rate = exp.host_to_device_list() - print(host_to_device_rate/(1024*1024), "MiB/s") - self.assertGreater(host_to_device_rate, 2.0e6) + def pad(name): + nonlocal max_length + return name + " " * (max_length - len(name)) + print() + print("| {} | Mean (MiB/s) | std (MiB/s) |".format(pad("Test"))) + print("| {} | ------------ | ------------ |".format("-" * max_length)) + for v in self.results: + print("| {} | {:>12.2f} | {:>12.2f} |".format( + pad(v[0]), v[1], v[2])) - def test_host_to_device_array(self): + def test_bytes_large(self): exp = self.create(_Transfer) - host_to_device_rate = exp.host_to_device_array() - print(host_to_device_rate/(1024*1024), "MiB/s") - self.assertGreater(host_to_device_rate, 2.0e6) + results = exp.test_bytes(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) - def test_device_to_host(self): + def test_bytes_small(self): exp = self.create(_Transfer) - device_to_host_rate = exp.device_to_host() - print(device_to_host_rate/(1024*1024), "MiB/s") - self.assertGreater(device_to_host_rate, 2.2e6) + results = exp.test_bytes(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) - def test_device_to_host_list(self): + def test_byte_list_large(self): exp = self.create(_Transfer) - rate = exp.device_to_host_list() - print(rate/(1024*1024), "MiB/s") - self.assertGreater(rate, .15e6) + results = exp.test_byte_list(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes List (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes List (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) - def test_device_to_host_array(self): + def test_byte_list_small(self): exp = self.create(_Transfer) - rate = exp.device_to_host_array() - print(rate/(1024*1024), "MiB/s") - self.assertGreater(rate, .15e6) + results = exp.test_byte_list(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes List (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes List (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + def test_list_large(self): + exp = self.create(_Transfer) + results = exp.test_list(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 List (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 List (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_list_small(self): + exp = self.create(_Transfer) + results = exp.test_list(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 List (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 List (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_array_large(self): + exp = self.create(_Transfer) + results = exp.test_array(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 Array (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 Array (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_array_small(self): + exp = self.create(_Transfer) + results = exp.test_array(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 Array (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_async_throughput(self): + exp = self.create(_Transfer) + results = exp.test_async() + print("Async throughput: {:>6.2f}MiB/s".format(results)) class _KernelOverhead(EnvExperiment): def build(self): From b2572003acfef50b32aa00cd17388f00fed5f87a Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 27 Aug 2020 11:27:40 +0800 Subject: [PATCH 08/26] RPC: optimization by caching This reduced the calls needed for socket send/recv. --- artiq/coredevice/comm_kernel.py | 87 +++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/artiq/coredevice/comm_kernel.py b/artiq/coredevice/comm_kernel.py index d5096cb69..babf035b3 100644 --- a/artiq/coredevice/comm_kernel.py +++ b/artiq/coredevice/comm_kernel.py @@ -66,16 +66,16 @@ def _receive_list(kernel, embedding_map): tag = chr(kernel._read_int8()) if tag == "b": buffer = kernel._read(length) - return numpy.ndarray((length, ), 'B', buffer).tolist() + return list(buffer) elif tag == "i": buffer = kernel._read(4 * length) - return numpy.ndarray((length, ), '>i4', buffer).tolist() + return list(struct.unpack(">%sl" % length, buffer)) elif tag == "I": buffer = kernel._read(8 * length) - return numpy.ndarray((length, ), '>i8', buffer).tolist() + return list(struct.unpack(">%sq" % length, buffer)) elif tag == "f": buffer = kernel._read(8 * length) - return numpy.ndarray((length, ), '>d', buffer).tolist() + return list(struct.unpack(">%sd" % length, buffer)) else: fn = receivers[tag] elems = [] @@ -178,6 +178,17 @@ class CommKernel: self._read_type = None self.host = host self.port = port + self.read_buffer = bytearray() + self.write_buffer = bytearray() + + self.unpack_int32 = struct.Struct(">l").unpack + self.unpack_int64 = struct.Struct(">q").unpack + self.unpack_float64 = struct.Struct(">d").unpack + + self.pack_header = struct.Struct(">lB").pack + self.pack_int32 = struct.Struct(">l").pack + self.pack_int64 = struct.Struct(">q").pack + self.pack_float64 = struct.Struct(">d").pack def open(self): if hasattr(self, "socket"): @@ -198,13 +209,18 @@ class CommKernel: # def _read(self, length): - r = bytes() - while len(r) < length: - rn = self.socket.recv(min(8192, length - len(r))) - if not rn: - raise ConnectionResetError("Connection closed") - r += rn - return r + # cache the reads to avoid frequent call to recv + while len(self.read_buffer) < length: + # the number is just the maximum amount + # when there is not much data, it would return earlier + diff = length - len(self.read_buffer) + flag = 0 + if diff > 8192: + flag |= socket.MSG_WAITALL + self.read_buffer += self.socket.recv(8192, flag) + result = self.read_buffer[:length] + self.read_buffer = self.read_buffer[length:] + return result def _read_header(self): self.open() @@ -212,14 +228,14 @@ class CommKernel: # Wait for a synchronization sequence, 5a 5a 5a 5a. sync_count = 0 while sync_count < 4: - (sync_byte, ) = struct.unpack("B", self._read(1)) + sync_byte = self._read(1)[0] if sync_byte == 0x5a: sync_count += 1 else: sync_count = 0 # Read message header. - (raw_type, ) = struct.unpack("B", self._read(1)) + raw_type = self._read(1)[0] self._read_type = Reply(raw_type) logger.debug("receiving message: type=%r", @@ -235,19 +251,18 @@ class CommKernel: self._read_expect(ty) def _read_int8(self): - (value, ) = struct.unpack("B", self._read(1)) - return value + return self._read(1)[0] def _read_int32(self): - (value, ) = struct.unpack(">l", self._read(4)) + (value, ) = self.unpack_int32(self._read(4)) return value def _read_int64(self): - (value, ) = struct.unpack(">q", self._read(8)) + (value, ) = self.unpack_int64(self._read(8)) return value def _read_float64(self): - (value, ) = struct.unpack(">d", self._read(8)) + (value, ) = self.unpack_float64(self._read(8)) return value def _read_bool(self): @@ -264,7 +279,15 @@ class CommKernel: # def _write(self, data): - self.socket.sendall(data) + self.write_buffer += data + # if the buffer is already pretty large, send it + # the block size is arbitrary, tuning it may improve performance + if len(self.write_buffer) > 4096: + self._flush() + + def _flush(self): + self.socket.sendall(self.write_buffer) + self.write_buffer.clear() def _write_header(self, ty): self.open() @@ -272,7 +295,7 @@ class CommKernel: logger.debug("sending message: type=%r", ty) # Write synchronization sequence and header. - self._write(struct.pack(">lB", 0x5a5a5a5a, ty.value)) + self._write(self.pack_header(0x5a5a5a5a, ty.value)) def _write_empty(self, ty): self._write_header(ty) @@ -281,19 +304,19 @@ class CommKernel: self._write(chunk) def _write_int8(self, value): - self._write(struct.pack("B", value)) + self._write(value) def _write_int32(self, value): - self._write(struct.pack(">l", value)) + self._write(self.pack_int32(value)) def _write_int64(self, value): - self._write(struct.pack(">q", value)) + self._write(self.pack_int64(value)) def _write_float64(self, value): - self._write(struct.pack(">d", value)) + self._write(self.pack_float64(value)) def _write_bool(self, value): - self._write(struct.pack("B", value)) + self._write(1 if value == True else 0) def _write_bytes(self, value): self._write_int32(len(value)) @@ -308,6 +331,7 @@ class CommKernel: def check_system_info(self): self._write_empty(Request.SystemInfo) + self._flush() self._read_header() self._read_expect(Reply.SystemInfo) @@ -332,6 +356,7 @@ class CommKernel: def load(self, kernel_library): self._write_header(Request.LoadKernel) self._write_bytes(kernel_library) + self._flush() self._read_header() if self._read_type == Reply.LoadFailed: @@ -341,6 +366,7 @@ class CommKernel: def run(self): self._write_empty(Request.RunKernel) + self._flush() logger.debug("running kernel") _rpc_sentinel = object() @@ -441,14 +467,11 @@ class CommKernel: if tag_element == "b": self._write(bytes(value)) elif tag_element == "i": - array = numpy.array(value, '>i4') - self._write(array.tobytes()) + self._write(struct.pack(">%sl" % len(value), *value)) elif tag_element == "I": - array = numpy.array(value, '>i8') - self._write(array.tobytes()) + self._write(struct.pack(">%sq" % len(value), *value)) elif tag_element == "f": - array = numpy.array(value, '>d') - self._write(array.tobytes()) + self._write(struct.pack(">%sd" % len(value), *value)) else: for elt in value: tags_copy = bytearray(tags) @@ -524,6 +547,7 @@ class CommKernel: self._write_bytes(return_tags) self._send_rpc_value(bytearray(return_tags), result, result, service) + self._flush() except RPCReturnValueError as exn: raise except Exception as exn: @@ -569,6 +593,7 @@ class CommKernel: self._write_int32(line) self._write_int32(-1) # column not known self._write_string(function) + self._flush() def _serve_exception(self, embedding_map, symbolizer, demangler): name = self._read_string() From 45ae6202c001d1d6719e23217faa258c5e72af42 Mon Sep 17 00:00:00 2001 From: Stephan Maka Date: Thu, 27 Aug 2020 23:56:57 +0200 Subject: [PATCH 09/26] build_soc: add identifier_str override option Signed-off-by: Stephan Maka --- RELEASE_NOTES.rst | 2 ++ artiq/build_soc.py | 6 ++++-- artiq/gateware/targets/kasli.py | 15 +++++++++------ artiq/gateware/targets/kasli_generic.py | 4 +++- artiq/gateware/targets/kc705.py | 8 +++++--- artiq/gateware/targets/metlino.py | 8 +++++--- artiq/gateware/targets/sayma_amc.py | 20 +++++++++++++++----- artiq/gateware/targets/sayma_rtm.py | 10 +++++++--- 8 files changed, 50 insertions(+), 23 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index c6970f71d..e38441707 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -25,6 +25,8 @@ Highlights: * Core device: ``panic_reset 1`` now correctly resets the kernel CPU as well if communication CPU panic occurs. * NumberValue accepts a ``type`` parameter specifying the output as ``int`` or ``float`` +* A parameter `--identifier-str` has been added to many targets to aid + with reproducible builds. Breaking changes: diff --git a/artiq/build_soc.py b/artiq/build_soc.py index c5487d8d7..2770c2ff3 100644 --- a/artiq/build_soc.py +++ b/artiq/build_soc.py @@ -44,10 +44,12 @@ class ReprogrammableIdentifier(Module, AutoCSR): p_INIT=sum(1 << j if c & (1 << i) else 0 for j, c in enumerate(contents))) -def add_identifier(soc, *args, **kwargs): +def add_identifier(soc, *args, identifier_str=None, **kwargs): if hasattr(soc, "identifier"): raise ValueError - identifier_str = get_identifier_string(soc, *args, **kwargs) + if identifier_str is None: + # not overridden with --identifier-str + identifier_str = get_identifier_string(soc, *args, **kwargs) soc.submodules.identifier = ReprogrammableIdentifier(identifier_str) soc.config["IDENTIFIER_STR"] = identifier_str diff --git a/artiq/gateware/targets/kasli.py b/artiq/gateware/targets/kasli.py index 73162c65b..902f00047 100755 --- a/artiq/gateware/targets/kasli.py +++ b/artiq/gateware/targets/kasli.py @@ -99,7 +99,7 @@ class StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -109,7 +109,7 @@ class StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) if self.platform.hw_rev == "v2.0": self.submodules.error_led = gpio.GPIOOut(Cat( @@ -280,7 +280,7 @@ class MasterBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -290,7 +290,7 @@ class MasterBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) platform = self.platform @@ -453,13 +453,13 @@ class SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, identifier_str=None, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, **kwargs) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) platform = self.platform @@ -674,11 +674,14 @@ def main(): help="variant: {} (default: %(default)s)".format( "/".join(sorted(VARIANTS.keys())))) parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() argdict = dict() if args.with_wrpll: argdict["with_wrpll"] = True + argdict["identifier_str"] = args.identifier_str variant = args.variant.lower() try: diff --git a/artiq/gateware/targets/kasli_generic.py b/artiq/gateware/targets/kasli_generic.py index bae599ed9..7109fc150 100755 --- a/artiq/gateware/targets/kasli_generic.py +++ b/artiq/gateware/targets/kasli_generic.py @@ -252,6 +252,8 @@ def main(): parser.set_defaults(output_dir="artiq_kasli") parser.add_argument("description", metavar="DESCRIPTION", help="JSON system description file") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() with open(args.description, "r") as f: @@ -269,7 +271,7 @@ def main(): else: raise ValueError("Invalid base") - soc = cls(description, **soc_kasli_argdict(args)) + soc = cls(description, identifier_str=args.identifier_str, **soc_kasli_argdict(args)) args.variant = description["variant"] build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/kc705.py b/artiq/gateware/targets/kc705.py index b586edda4..aa9fe96b7 100755 --- a/artiq/gateware/targets/kc705.py +++ b/artiq/gateware/targets/kc705.py @@ -119,7 +119,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -129,7 +129,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) if isinstance(self.platform.toolchain, XilinxVivadoToolchain): self.platform.toolchain.bitstream_commands.extend([ @@ -416,6 +416,8 @@ def main(): help="variant: " "nist_clock/nist_qc2/sma_spi " "(default: %(default)s)") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() variant = args.variant.lower() @@ -424,7 +426,7 @@ def main(): except KeyError: raise SystemExit("Invalid variant (-V/--variant)") - soc = cls(**soc_kc705_argdict(args)) + soc = cls(identifier_str=args.identifier_str, **soc_kc705_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/metlino.py b/artiq/gateware/targets/metlino.py index d1013653c..9c255f79d 100755 --- a/artiq/gateware/targets/metlino.py +++ b/artiq/gateware/targets/metlino.py @@ -38,7 +38,7 @@ class Master(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -49,7 +49,7 @@ class Master(MiniSoC, AMPSoC): csr_address_width=15, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) platform = self.platform rtio_clk_freq = 150e6 @@ -164,9 +164,11 @@ def main(): builder_args(parser) soc_sdram_args(parser) parser.set_defaults(output_dir="artiq_metlino") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() args.variant = "master" - soc = Master(**soc_sdram_argdict(args)) + soc = Master(identifier_str=args.identifier_str, **soc_sdram_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/sayma_amc.py b/artiq/gateware/targets/sayma_amc.py index c2cb435f1..90e486e2f 100755 --- a/artiq/gateware/targets/sayma_amc.py +++ b/artiq/gateware/targets/sayma_amc.py @@ -50,7 +50,7 @@ class SatelliteBase(MiniSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", with_sfp=False, *, with_wrpll, **kwargs): + def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", identifier_str=None, with_sfp=False, *, with_wrpll, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -59,7 +59,7 @@ class SatelliteBase(MiniSoC): ethmac_nrxslots=4, ethmac_ntxslots=4, **kwargs) - add_identifier(self, suffix=identifier_suffix) + add_identifier(self, suffix=identifier_suffix, identifier_str=identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -403,14 +403,24 @@ def main(): help="Change type of signal generator. This is used exclusively for " "development and debugging.") parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() variant = args.variant.lower() if variant == "satellite": - soc = Satellite(with_sfp=args.sfp, jdcg_type=args.jdcg_type, with_wrpll=args.with_wrpll, - **soc_sayma_amc_argdict(args)) + soc = Satellite( + with_sfp=args.sfp, + jdcg_type=args.jdcg_type, + with_wrpll=args.with_wrpll, + identifier_str=args.identifier_str, + **soc_sayma_amc_argdict(args)) elif variant == "simplesatellite": - soc = SimpleSatellite(with_sfp=args.sfp, with_wrpll=args.with_wrpll, **soc_sayma_amc_argdict(args)) + soc = SimpleSatellite( + with_sfp=args.sfp, + with_wrpll=args.with_wrpll, + identifier_str=args.identifier_str, + **soc_sayma_amc_argdict(args)) else: raise SystemExit("Invalid variant (-V/--variant)") diff --git a/artiq/gateware/targets/sayma_rtm.py b/artiq/gateware/targets/sayma_rtm.py index bdef1fa09..0a670e04e 100755 --- a/artiq/gateware/targets/sayma_rtm.py +++ b/artiq/gateware/targets/sayma_rtm.py @@ -75,11 +75,11 @@ class _SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq, *, with_wrpll, **kwargs): + def __init__(self, rtio_clk_freq, *, with_wrpll, identifier_str, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", **kwargs) - add_identifier(self) + add_identifier(self, identifier_str=identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -299,11 +299,15 @@ def main(): parser.add_argument("--rtio-clk-freq", default=150, type=int, help="RTIO clock frequency in MHz") parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--identifier-str", default=None, + help="Override ROM identifier") parser.set_defaults(output_dir=os.path.join("artiq_sayma", "rtm")) args = parser.parse_args() soc = Satellite( - rtio_clk_freq=1e6*args.rtio_clk_freq, with_wrpll=args.with_wrpll, + rtio_clk_freq=1e6*args.rtio_clk_freq, + with_wrpll=args.with_wrpll, + identifier_str=args.identifier_str, **soc_sayma_rtm_argdict(args)) builder = SatmanSoCBuilder(soc, **builder_argdict(args)) try: From 1ad9deaf910dd828321b3eedc8639f44569103b4 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Mon, 31 Aug 2020 13:55:19 +0800 Subject: [PATCH 10/26] fmcdio_vhdci_eem: fix pin naming --- artiq/gateware/fmcdio_vhdci_eem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/artiq/gateware/fmcdio_vhdci_eem.py b/artiq/gateware/fmcdio_vhdci_eem.py index c1ab62864..0296efe8e 100644 --- a/artiq/gateware/fmcdio_vhdci_eem.py +++ b/artiq/gateware/fmcdio_vhdci_eem.py @@ -19,8 +19,9 @@ def _get_connectors(): for j, pair in enumerate(eem_fmc_connections[i]): for pn in "n", "p": cc = "cc_" if j == 0 else "" + lpc_cc = "CC_" if eem_fmc_connections[i][j] in (0, 1, 17, 18) else "" connections["d{}_{}{}".format(j, cc, pn)] = \ - "LPC:LA{:02d}_{}{}".format(pair, cc.upper(), pn.upper()) + "LPC:LA{:02d}_{}{}".format(pair, lpc_cc, pn.upper()) connectors.append(("eem{}".format(i), connections)) return connectors From dfbf3311cb112dcccba3c55c5b4c8c596359f24f Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Mon, 31 Aug 2020 15:26:39 +0800 Subject: [PATCH 11/26] sayma_amc: add support for 4x DIO output channels via FMC --- artiq/gateware/targets/sayma_amc.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/artiq/gateware/targets/sayma_amc.py b/artiq/gateware/targets/sayma_amc.py index 90e486e2f..34eb6a606 100755 --- a/artiq/gateware/targets/sayma_amc.py +++ b/artiq/gateware/targets/sayma_amc.py @@ -12,8 +12,10 @@ from misoc.interconnect.csr import * from misoc.targets.sayma_amc import * from artiq.gateware.amp import AMPSoC +from artiq.gateware import eem from artiq.gateware import rtio from artiq.gateware import jesd204_tools +from artiq.gateware import fmcdio_vhdci_eem from artiq.gateware.rtio.phy import ttl_simple, ttl_serdes_ultrascale, sawg from artiq.gateware.drtio.transceiver import gth_ultrascale from artiq.gateware.drtio.siphaser import SiPhaser7Series @@ -284,7 +286,7 @@ class JDCGSyncDDS(Module, AutoCSR): class Satellite(SatelliteBase): """ - DRTIO satellite with local DAC/SAWG channels. + DRTIO satellite with local DAC/SAWG channels, as well as TTL channels via FMC and VHDCI carrier. """ def __init__(self, jdcg_type, **kwargs): SatelliteBase.__init__(self, 150e6, @@ -307,7 +309,7 @@ class Satellite(SatelliteBase): self.csr_devices.append("slave_fpga_cfg") self.config["SLAVE_FPGA_GATEWARE"] = 0x200000 - rtio_channels = [] + self.rtio_channels = rtio_channels = [] for i in range(4): phy = ttl_simple.Output(platform.request("user_led", i)) self.submodules += phy @@ -343,6 +345,27 @@ class Satellite(SatelliteBase): self.jdcg_1.sawgs for phy in sawg.phys) + # FMC-VHDCI-EEM DIOs x 2 (all OUTPUTs) + platform.add_connectors(fmcdio_vhdci_eem.connectors) + eem.DIO.add_std(self, 0, + ttl_simple.Output, ttl_simple.Output, iostandard="LVDS") + eem.DIO.add_std(self, 1, + ttl_simple.Output, ttl_simple.Output, iostandard="LVDS") + # FMC-DIO-32ch-LVDS-a Direction Control Pins (via shift register) as TTLs x 3 + platform.add_extension(fmcdio_vhdci_eem.io) + print("fmcdio_vhdci_eem.[CLK, SER, LATCH] starting at RTIO channel 0x{:06x}" + .format(len(rtio_channels))) + fmcdio_dirctl = platform.request("fmcdio_dirctl", 0) + fmcdio_dirctl_phys = [ + ttl_simple.Output(fmcdio_dirctl.clk), + ttl_simple.Output(fmcdio_dirctl.ser), + ttl_simple.Output(fmcdio_dirctl.latch) + ] + for phy in fmcdio_dirctl_phys: + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + workaround_us_lvds_tristate(platform) + self.add_rtio(rtio_channels) self.submodules.sysref_sampler = jesd204_tools.SysrefSampler( From 3d8413581010c53b89696ad961f12ad299815365 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Mon, 31 Aug 2020 12:40:41 +0800 Subject: [PATCH 12/26] examples: add Metlino master, Sayma satellite with TTLOuts via FMC --- artiq/examples/metlino_sayma_ttl/device_db.py | 95 +++++++++++++ .../metlino_sayma_ttl/repository/demo.py | 134 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 artiq/examples/metlino_sayma_ttl/device_db.py create mode 100644 artiq/examples/metlino_sayma_ttl/repository/demo.py diff --git a/artiq/examples/metlino_sayma_ttl/device_db.py b/artiq/examples/metlino_sayma_ttl/device_db.py new file mode 100644 index 000000000..d60addb5e --- /dev/null +++ b/artiq/examples/metlino_sayma_ttl/device_db.py @@ -0,0 +1,95 @@ +core_addr = "192.168.1.65" + +device_db = { + "core": { + "type": "local", + "module": "artiq.coredevice.core", + "class": "Core", + "arguments": {"host": core_addr, "ref_period": 1/(8*125e6)} + }, + "core_log": { + "type": "controller", + "host": "::1", + "port": 1068, + "command": "aqctl_corelog -p {port} --bind {bind} " + core_addr + }, + "core_cache": { + "type": "local", + "module": "artiq.coredevice.cache", + "class": "CoreCache" + }, + "core_dma": { + "type": "local", + "module": "artiq.coredevice.dma", + "class": "CoreDMA" + } +} + +# master peripherals +for i in range(4): + device_db["led" + str(i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": i}, +} + +# DEST#1 peripherals +amc_base = 0x070000 +rtm_base = 0x020000 + +for i in range(4): + device_db["led" + str(4+i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + i}, + } + +#DIO (EEM0) starting at RTIO channel 0x000056 +for i in range(8): + device_db["ttl" + str(i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000056 + i}, + } + +#DIO (EEM1) starting at RTIO channel 0x00005e +for i in range(8): + device_db["ttl" + str(8+i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x00005e + i}, + } + +device_db["fmcdio_dirctl_clk"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000066} +} + +device_db["fmcdio_dirctl_ser"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000067} +} + +device_db["fmcdio_dirctl_latch"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000068} +} + +device_db["fmcdio_dirctl"] = { + "type": "local", + "module": "artiq.coredevice.shiftreg", + "class": "ShiftReg", + "arguments": {"clk": "fmcdio_dirctl_clk", + "ser": "fmcdio_dirctl_ser", + "latch": "fmcdio_dirctl_latch"} +} diff --git a/artiq/examples/metlino_sayma_ttl/repository/demo.py b/artiq/examples/metlino_sayma_ttl/repository/demo.py new file mode 100644 index 000000000..bfc19255a --- /dev/null +++ b/artiq/examples/metlino_sayma_ttl/repository/demo.py @@ -0,0 +1,134 @@ +import sys +import os +import select + +from artiq.experiment import * +from artiq.coredevice.fmcdio_vhdci_eem import * + + +def chunker(seq, size): + res = [] + for el in seq: + res.append(el) + if len(res) == size: + yield res + res = [] + if res: + yield res + + +def is_enter_pressed() -> TBool: + if os.name == "nt": + if msvcrt.kbhit() and msvcrt.getch() == b"\r": + return True + else: + return False + else: + if select.select([sys.stdin, ], [], [], 0.0)[0]: + sys.stdin.read(1) + return True + else: + return False + + +class Demo(EnvExperiment): + def build(self): + self.setattr_device("core") + self.setattr_device("fmcdio_dirctl") + + self.leds = dict() + self.ttl_outs = dict() + + ddb = self.get_device_db() + for name, desc in ddb.items(): + if isinstance(desc, dict) and desc["type"] == "local": + module, cls = desc["module"], desc["class"] + if (module, cls) == ("artiq.coredevice.ttl", "TTLOut"): + dev = self.get_device(name) + if "led" in name: # guess + self.leds[name] = dev + elif "ttl" in name: # to exclude fmcdio_dirctl + self.ttl_outs[name] = dev + + self.leds = sorted(self.leds.items(), key=lambda x: x[1].channel) + self.ttl_outs = sorted(self.ttl_outs.items(), key=lambda x: x[1].channel) + + self.dirctl_word = ( + shiftreg_bits(0, dio_bank0_out_pins | dio_bank1_out_pins) | + shiftreg_bits(1, dio_bank0_out_pins | dio_bank1_out_pins) + ) + + @kernel + def drtio_is_up(self, drtio_index): + if not self.core.get_rtio_destination_status(drtio_index): + return False + print("DRTIO #", drtio_index, "is ready\n") + return True + + @kernel + def test_led(self, led): + while not is_enter_pressed(): + self.core.break_realtime() + # do not fill the FIFOs too much to avoid long response times + t = now_mu() - self.core.seconds_to_mu(0.2) + while self.core.get_rtio_counter_mu() < t: + pass + for i in range(3): + led.pulse(100*ms) + delay(100*ms) + + @kernel + def test_leds(self): + print("*** Testing LEDs.") + print("Check for blinking. Press ENTER when done.") + + for i in range(len(self.leds)): + led = self.leds[i:i+1] + print("Testing LED:", i) + self.test_led([dev for _, dev in led][0]) + + @kernel + def test_ttl_out_chunk(self, ttl_chunk): + while not is_enter_pressed(): + self.core.break_realtime() + for _ in range(50000): + i = 0 + for ttl in ttl_chunk: + i += 1 + for _ in range(i): + ttl.pulse(1*us) + delay(1*us) + delay(10*us) + + @kernel + def test_ttl_outs(self): + print("*** Testing TTL outputs.") + print("Outputs are tested in groups of 4. Touch each TTL connector") + print("with the oscilloscope probe tip, and check that the number of") + print("pulses corresponds to its number in the group.") + print("Press ENTER when done.") + + # for ttl_chunk in chunker(self.ttl_outs, 4): + for i in range(len(self.ttl_outs) // 4): + chunk_start, chunk_end = i*4, (i+1)*4 + ttl_chunk = self.ttl_outs[chunk_start:chunk_end] + print("Testing TTL outputs:", chunk_start, chunk_start+1, chunk_start+2, chunk_start+3) + self.test_ttl_out_chunk([dev for _, dev in ttl_chunk]) + + @kernel + def run(self): + self.core.reset() + delay(10*ms) + print("*** Waiting for DRTIO ready...") + drtio_indices = [7] + for i in drtio_indices: + while not self.drtio_is_up(i): + pass + + self.fmcdio_dirctl.set(self.dirctl_word) + delay(10*ms) + + if self.leds: + self.test_leds() + if self.ttl_outs: + self.test_ttl_outs() From 91df3d729001156ef45bf98824d37665174577e0 Mon Sep 17 00:00:00 2001 From: Stephan Maka Date: Mon, 31 Aug 2020 21:44:27 +0200 Subject: [PATCH 13/26] build_soc: override identifier_str only for gateware --- artiq/build_soc.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/artiq/build_soc.py b/artiq/build_soc.py index 2770c2ff3..e49d1b862 100644 --- a/artiq/build_soc.py +++ b/artiq/build_soc.py @@ -47,12 +47,11 @@ class ReprogrammableIdentifier(Module, AutoCSR): def add_identifier(soc, *args, identifier_str=None, **kwargs): if hasattr(soc, "identifier"): raise ValueError - if identifier_str is None: - # not overridden with --identifier-str - identifier_str = get_identifier_string(soc, *args, **kwargs) - soc.submodules.identifier = ReprogrammableIdentifier(identifier_str) - soc.config["IDENTIFIER_STR"] = identifier_str + software_identifier_str = get_identifier_string(soc, *args, **kwargs) + gateware_identifier_str = identifier_str or software_identifier_str + soc.submodules.identifier = ReprogrammableIdentifier(gateware_identifier_str) + soc.config["IDENTIFIER_STR"] = software_identifier_str def build_artiq_soc(soc, argdict): From f294d039b3eb0cdfebc28a9ac8d89252b0fecaa8 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 1 Sep 2020 16:47:04 +0800 Subject: [PATCH 14/26] test: skip NonexistentI2CBus if I2C is not supported --- artiq/test/coredevice/test_i2c.py | 1 + 1 file changed, 1 insertion(+) diff --git a/artiq/test/coredevice/test_i2c.py b/artiq/test/coredevice/test_i2c.py index b3d03c107..e5424efc6 100644 --- a/artiq/test/coredevice/test_i2c.py +++ b/artiq/test/coredevice/test_i2c.py @@ -24,6 +24,7 @@ class I2CSwitch(EnvExperiment): class NonexistentI2CBus(EnvExperiment): def build(self): self.setattr_device("core") + self.setattr_device("i2c_switch") # HACK: only run this test on boards with I2C self.broken_switch = PCA9548(self._HasEnvironment__device_mgr, 255) @kernel From 8d5dc0ad2af5ca4676c550522387ebf043ef606c Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 1 Sep 2020 17:08:26 +0800 Subject: [PATCH 15/26] test: relax test_pulse_rate on Zynq --- artiq/test/coredevice/test_rtio.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 96437d2a2..137abc734 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -12,6 +12,7 @@ from artiq.coredevice import exceptions from artiq.coredevice.comm_mgmt import CommMgmt from artiq.coredevice.comm_analyzer import (StoppedMessage, OutputMessage, InputMessage, decode_dump, get_analyzer_dump) +from artiq.compiler.targets import CortexA9Target artiq_low_latency = os.getenv("ARTIQ_LOW_LATENCY") @@ -460,11 +461,15 @@ class CoredeviceTest(ExperimentCase): def test_pulse_rate(self): """Minimum interval for sustained TTL output switching""" - self.execute(PulseRate) + exp = self.execute(PulseRate) rate = self.dataset_mgr.get("pulse_rate") print(rate) self.assertGreater(rate, 100*ns) - self.assertLess(rate, 480*ns) + if exp.core.target_cls == CortexA9Target: + # Crappy AXI PS/PL interface from Xilinx is slow. + self.assertLess(rate, 810*ns) + else: + self.assertLess(rate, 480*ns) def test_pulse_rate_ad9914_dds(self): """Minimum interval for sustained AD9914 DDS frequency switching""" From f0289d49ab1c7686e10c8390d0d28af7125abe59 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 1 Sep 2020 17:49:40 +0800 Subject: [PATCH 16/26] test: temporarily disable test_async_throughput https://git.m-labs.hk/M-Labs/artiq-zynq/issues/104 --- artiq/test/coredevice/test_performance.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index 3ca1f86ba..4d7545c67 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -264,10 +264,10 @@ class TransferTest(ExperimentCase): self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), device_to_host.std()]) - def test_async_throughput(self): - exp = self.create(_Transfer) - results = exp.test_async() - print("Async throughput: {:>6.2f}MiB/s".format(results)) + #def test_async_throughput(self): + # exp = self.create(_Transfer) + # results = exp.test_async() + # print("Async throughput: {:>6.2f}MiB/s".format(results)) class _KernelOverhead(EnvExperiment): def build(self): From 4398a2d5fa4dff9da412945c4f42d7fa429f752e Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 1 Sep 2020 17:50:09 +0800 Subject: [PATCH 17/26] test: relax loopback gate timing --- artiq/test/coredevice/test_rtio.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 137abc734..de7318d27 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -231,17 +231,18 @@ class LoopbackGateTiming(EnvExperiment): # With the exact delay known, make sure tight gate timings work. # In the most common configuration, 24 mu == 24 ns == 3 coarse periods, # which should be plenty of slack. + # FIXME: ZC706 with NIST_QC2 needs 48ns - hw problem? delay_mu(10000) gate_start_mu = now_mu() - self.loop_in.gate_both_mu(24) + self.loop_in.gate_both_mu(48) # XXX gate_end_mu = now_mu() # gateware latency offset between gate and input lat_offset = 11*8 out_mu = gate_start_mu - loop_delay_mu + lat_offset at_mu(out_mu) - self.loop_out.pulse_mu(24) + self.loop_out.pulse_mu(48) # XXX in_mu = self.loop_in.timestamp_mu(gate_end_mu) print("timings: ", gate_start_mu, in_mu - lat_offset, gate_end_mu) From 002a71dd8dd9fe77d9f103288c55f988f3ad5d2c Mon Sep 17 00:00:00 2001 From: Stephan Maka Date: Tue, 1 Sep 2020 17:48:43 +0200 Subject: [PATCH 18/26] build_soc: rename identifier_str to gateware_identifier_str --- artiq/build_soc.py | 13 +++++++------ artiq/gateware/targets/kasli.py | 16 ++++++++-------- artiq/gateware/targets/kasli_generic.py | 4 ++-- artiq/gateware/targets/kc705.py | 8 ++++---- artiq/gateware/targets/metlino.py | 8 ++++---- artiq/gateware/targets/sayma_amc.py | 10 +++++----- artiq/gateware/targets/sayma_rtm.py | 8 ++++---- 7 files changed, 34 insertions(+), 33 deletions(-) diff --git a/artiq/build_soc.py b/artiq/build_soc.py index e49d1b862..ddce92fb1 100644 --- a/artiq/build_soc.py +++ b/artiq/build_soc.py @@ -44,14 +44,15 @@ class ReprogrammableIdentifier(Module, AutoCSR): p_INIT=sum(1 << j if c & (1 << i) else 0 for j, c in enumerate(contents))) -def add_identifier(soc, *args, identifier_str=None, **kwargs): +def add_identifier(soc, *args, gateware_identifier_str=None, **kwargs): if hasattr(soc, "identifier"): raise ValueError - software_identifier_str = get_identifier_string(soc, *args, **kwargs) - gateware_identifier_str = identifier_str or software_identifier_str - - soc.submodules.identifier = ReprogrammableIdentifier(gateware_identifier_str) - soc.config["IDENTIFIER_STR"] = software_identifier_str + if gateware_identifier_str is None: + # not overridden with --identifier-str + raise ValueError("gateware_identifier_str not overridden") + identifier_str = get_identifier_string(soc, *args, **kwargs) + soc.submodules.identifier = ReprogrammableIdentifier(gateware_identifier_str or identifier_str) + soc.config["IDENTIFIER_STR"] = identifier_str def build_artiq_soc(soc, argdict): diff --git a/artiq/gateware/targets/kasli.py b/artiq/gateware/targets/kasli.py index 902f00047..96a4df558 100755 --- a/artiq/gateware/targets/kasli.py +++ b/artiq/gateware/targets/kasli.py @@ -99,7 +99,7 @@ class StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, identifier_str=None, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -109,7 +109,7 @@ class StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) if self.platform.hw_rev == "v2.0": self.submodules.error_led = gpio.GPIOOut(Cat( @@ -280,7 +280,7 @@ class MasterBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, identifier_str=None, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -290,7 +290,7 @@ class MasterBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform @@ -453,13 +453,13 @@ class SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, identifier_str=None, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, gateware_identifier_str=None, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, **kwargs) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform @@ -674,14 +674,14 @@ def main(): help="variant: {} (default: %(default)s)".format( "/".join(sorted(VARIANTS.keys())))) parser.add_argument("--with-wrpll", default=False, action="store_true") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") args = parser.parse_args() argdict = dict() if args.with_wrpll: argdict["with_wrpll"] = True - argdict["identifier_str"] = args.identifier_str + argdict["gateware_identifier_str"] = args.gateware_identifier_str variant = args.variant.lower() try: diff --git a/artiq/gateware/targets/kasli_generic.py b/artiq/gateware/targets/kasli_generic.py index 7109fc150..939f60716 100755 --- a/artiq/gateware/targets/kasli_generic.py +++ b/artiq/gateware/targets/kasli_generic.py @@ -252,7 +252,7 @@ def main(): parser.set_defaults(output_dir="artiq_kasli") parser.add_argument("description", metavar="DESCRIPTION", help="JSON system description file") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") args = parser.parse_args() @@ -271,7 +271,7 @@ def main(): else: raise ValueError("Invalid base") - soc = cls(description, identifier_str=args.identifier_str, **soc_kasli_argdict(args)) + soc = cls(description, gateware_identifier_str=args.gateware_identifier_str, **soc_kasli_argdict(args)) args.variant = description["variant"] build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/kc705.py b/artiq/gateware/targets/kc705.py index aa9fe96b7..1481fd351 100755 --- a/artiq/gateware/targets/kc705.py +++ b/artiq/gateware/targets/kc705.py @@ -119,7 +119,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, identifier_str=None, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -129,7 +129,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) if isinstance(self.platform.toolchain, XilinxVivadoToolchain): self.platform.toolchain.bitstream_commands.extend([ @@ -416,7 +416,7 @@ def main(): help="variant: " "nist_clock/nist_qc2/sma_spi " "(default: %(default)s)") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") args = parser.parse_args() @@ -426,7 +426,7 @@ def main(): except KeyError: raise SystemExit("Invalid variant (-V/--variant)") - soc = cls(identifier_str=args.identifier_str, **soc_kc705_argdict(args)) + soc = cls(gateware_identifier_str=args.gateware_identifier_str, **soc_kc705_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/metlino.py b/artiq/gateware/targets/metlino.py index 9c255f79d..2139278ef 100755 --- a/artiq/gateware/targets/metlino.py +++ b/artiq/gateware/targets/metlino.py @@ -38,7 +38,7 @@ class Master(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, identifier_str=None, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -49,7 +49,7 @@ class Master(MiniSoC, AMPSoC): csr_address_width=15, **kwargs) AMPSoC.__init__(self) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform rtio_clk_freq = 150e6 @@ -164,11 +164,11 @@ def main(): builder_args(parser) soc_sdram_args(parser) parser.set_defaults(output_dir="artiq_metlino") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") args = parser.parse_args() args.variant = "master" - soc = Master(identifier_str=args.identifier_str, **soc_sdram_argdict(args)) + soc = Master(gateware_identifier_str=args.gateware_identifier_str, **soc_sdram_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/sayma_amc.py b/artiq/gateware/targets/sayma_amc.py index 34eb6a606..7f8a8a5b8 100755 --- a/artiq/gateware/targets/sayma_amc.py +++ b/artiq/gateware/targets/sayma_amc.py @@ -52,7 +52,7 @@ class SatelliteBase(MiniSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", identifier_str=None, with_sfp=False, *, with_wrpll, **kwargs): + def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", gateware_identifier_str=None, with_sfp=False, *, with_wrpll, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -61,7 +61,7 @@ class SatelliteBase(MiniSoC): ethmac_nrxslots=4, ethmac_ntxslots=4, **kwargs) - add_identifier(self, suffix=identifier_suffix, identifier_str=identifier_str) + add_identifier(self, suffix=identifier_suffix, gateware_identifier_str=gateware_identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -426,7 +426,7 @@ def main(): help="Change type of signal generator. This is used exclusively for " "development and debugging.") parser.add_argument("--with-wrpll", default=False, action="store_true") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") args = parser.parse_args() @@ -436,13 +436,13 @@ def main(): with_sfp=args.sfp, jdcg_type=args.jdcg_type, with_wrpll=args.with_wrpll, - identifier_str=args.identifier_str, + gateware_identifier_str=args.gateware_identifier_str, **soc_sayma_amc_argdict(args)) elif variant == "simplesatellite": soc = SimpleSatellite( with_sfp=args.sfp, with_wrpll=args.with_wrpll, - identifier_str=args.identifier_str, + gateware_identifier_str=args.gateware_identifier_str, **soc_sayma_amc_argdict(args)) else: raise SystemExit("Invalid variant (-V/--variant)") diff --git a/artiq/gateware/targets/sayma_rtm.py b/artiq/gateware/targets/sayma_rtm.py index 0a670e04e..294a17823 100755 --- a/artiq/gateware/targets/sayma_rtm.py +++ b/artiq/gateware/targets/sayma_rtm.py @@ -75,11 +75,11 @@ class _SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq, *, with_wrpll, identifier_str, **kwargs): + def __init__(self, rtio_clk_freq, *, with_wrpll, gateware_identifier_str, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", **kwargs) - add_identifier(self, identifier_str=identifier_str) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -299,7 +299,7 @@ def main(): parser.add_argument("--rtio-clk-freq", default=150, type=int, help="RTIO clock frequency in MHz") parser.add_argument("--with-wrpll", default=False, action="store_true") - parser.add_argument("--identifier-str", default=None, + parser.add_argument("--gateware-identifier-str", default=None, help="Override ROM identifier") parser.set_defaults(output_dir=os.path.join("artiq_sayma", "rtm")) args = parser.parse_args() @@ -307,7 +307,7 @@ def main(): soc = Satellite( rtio_clk_freq=1e6*args.rtio_clk_freq, with_wrpll=args.with_wrpll, - identifier_str=args.identifier_str, + gateware_identifier_str=args.gateware_identifier_str, **soc_sayma_rtm_argdict(args)) builder = SatmanSoCBuilder(soc, **builder_argdict(args)) try: From 47e88dfcbe925890fec49b83799cec1a52d2cbc4 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 3 Sep 2020 14:19:55 +0800 Subject: [PATCH 19/26] Revert "test: temporarily disable test_async_throughput" This reverts commit f0289d49ab1c7686e10c8390d0d28af7125abe59. --- artiq/test/coredevice/test_performance.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index 4d7545c67..3ca1f86ba 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -264,10 +264,10 @@ class TransferTest(ExperimentCase): self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), device_to_host.std()]) - #def test_async_throughput(self): - # exp = self.create(_Transfer) - # results = exp.test_async() - # print("Async throughput: {:>6.2f}MiB/s".format(results)) + def test_async_throughput(self): + exp = self.create(_Transfer) + results = exp.test_async() + print("Async throughput: {:>6.2f}MiB/s".format(results)) class _KernelOverhead(EnvExperiment): def build(self): From 458a411320afb98e71b5d583962f154946c83d38 Mon Sep 17 00:00:00 2001 From: Harry Ho Date: Thu, 3 Sep 2020 15:08:31 +0800 Subject: [PATCH 20/26] metlino_sayma_ttl: Fix RTIO frequency & demo code (#1516) --- artiq/examples/metlino_sayma_ttl/device_db.py | 2 +- .../metlino_sayma_ttl/repository/demo.py | 39 ++++++++----------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/artiq/examples/metlino_sayma_ttl/device_db.py b/artiq/examples/metlino_sayma_ttl/device_db.py index d60addb5e..c8c3acb8e 100644 --- a/artiq/examples/metlino_sayma_ttl/device_db.py +++ b/artiq/examples/metlino_sayma_ttl/device_db.py @@ -5,7 +5,7 @@ device_db = { "type": "local", "module": "artiq.coredevice.core", "class": "Core", - "arguments": {"host": core_addr, "ref_period": 1/(8*125e6)} + "arguments": {"host": core_addr, "ref_period": 1/(8*150e6)} }, "core_log": { "type": "controller", diff --git a/artiq/examples/metlino_sayma_ttl/repository/demo.py b/artiq/examples/metlino_sayma_ttl/repository/demo.py index bfc19255a..bb273ce2c 100644 --- a/artiq/examples/metlino_sayma_ttl/repository/demo.py +++ b/artiq/examples/metlino_sayma_ttl/repository/demo.py @@ -58,6 +58,17 @@ class Demo(EnvExperiment): shiftreg_bits(1, dio_bank0_out_pins | dio_bank1_out_pins) ) + @kernel + def init(self): + self.core.break_realtime() + print("*** Waiting for DRTIO ready...") + drtio_indices = [7] + for i in drtio_indices: + while not self.drtio_is_up(i): + pass + + self.fmcdio_dirctl.set(self.dirctl_word) + @kernel def drtio_is_up(self, drtio_index): if not self.core.get_rtio_destination_status(drtio_index): @@ -77,15 +88,13 @@ class Demo(EnvExperiment): led.pulse(100*ms) delay(100*ms) - @kernel def test_leds(self): print("*** Testing LEDs.") print("Check for blinking. Press ENTER when done.") - for i in range(len(self.leds)): - led = self.leds[i:i+1] - print("Testing LED:", i) - self.test_led([dev for _, dev in led][0]) + for led_name, led_dev in self.leds: + print("Testing LED: {}".format(led_name)) + self.test_led(led_dev) @kernel def test_ttl_out_chunk(self, ttl_chunk): @@ -100,7 +109,6 @@ class Demo(EnvExperiment): delay(1*us) delay(10*us) - @kernel def test_ttl_outs(self): print("*** Testing TTL outputs.") print("Outputs are tested in groups of 4. Touch each TTL connector") @@ -108,25 +116,12 @@ class Demo(EnvExperiment): print("pulses corresponds to its number in the group.") print("Press ENTER when done.") - # for ttl_chunk in chunker(self.ttl_outs, 4): - for i in range(len(self.ttl_outs) // 4): - chunk_start, chunk_end = i*4, (i+1)*4 - ttl_chunk = self.ttl_outs[chunk_start:chunk_end] - print("Testing TTL outputs:", chunk_start, chunk_start+1, chunk_start+2, chunk_start+3) - self.test_ttl_out_chunk([dev for _, dev in ttl_chunk]) + for ttl_chunk in chunker(self.ttl_outs, 4): + print("Testing TTL outputs: {}.".format(", ".join(name for name, dev in ttl_chunk))) + self.test_ttl_out_chunk([dev for name, dev in ttl_chunk]) - @kernel def run(self): self.core.reset() - delay(10*ms) - print("*** Waiting for DRTIO ready...") - drtio_indices = [7] - for i in drtio_indices: - while not self.drtio_is_up(i): - pass - - self.fmcdio_dirctl.set(self.dirctl_word) - delay(10*ms) if self.leds: self.test_leds() From 1b475bdac41f746d5c0657ca3b617cadb69ec1e6 Mon Sep 17 00:00:00 2001 From: Stephan Maka Date: Thu, 3 Sep 2020 14:21:36 +0200 Subject: [PATCH 21/26] build_soc: remove assertion that was used for test runs --- artiq/build_soc.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/artiq/build_soc.py b/artiq/build_soc.py index ddce92fb1..e3a1f3360 100644 --- a/artiq/build_soc.py +++ b/artiq/build_soc.py @@ -47,9 +47,6 @@ class ReprogrammableIdentifier(Module, AutoCSR): def add_identifier(soc, *args, gateware_identifier_str=None, **kwargs): if hasattr(soc, "identifier"): raise ValueError - if gateware_identifier_str is None: - # not overridden with --identifier-str - raise ValueError("gateware_identifier_str not overridden") identifier_str = get_identifier_string(soc, *args, **kwargs) soc.submodules.identifier = ReprogrammableIdentifier(gateware_identifier_str or identifier_str) soc.config["IDENTIFIER_STR"] = identifier_str From 56aa22caeb02328eb5403d0c076d3ce168a34e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Thu, 3 Sep 2020 17:42:35 +0200 Subject: [PATCH 22/26] fastino: document/cleanup * added documentation on `update`/`hold` mechanism * mask machine unit values * cleanup coredevice driver close #1518 --- artiq/coredevice/fastino.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/artiq/coredevice/fastino.py b/artiq/coredevice/fastino.py index 32306388a..200d89018 100644 --- a/artiq/coredevice/fastino.py +++ b/artiq/coredevice/fastino.py @@ -1,25 +1,44 @@ """RTIO driver for the Fastino 32channel, 16 bit, 2.5 MS/s per channel, streaming DAC. - -TODO: Example, describe update/hold """ from artiq.language.core import kernel, portable, delay -from artiq.coredevice.rtio import rtio_output, rtio_output_wide, rtio_input_data +from artiq.coredevice.rtio import (rtio_output, rtio_output_wide, + rtio_input_data) from artiq.language.units import us -from artiq.language.types import TInt32, TList, TFloat +from artiq.language.types import TInt32, TList class Fastino: """Fastino 32-channel, 16-bit, 2.5 MS/s per channel streaming DAC + The RTIO PHY supports staging DAC data before transmitting them by writing + to the DAC RTIO addresses, if a channel is not "held" by setting its bit + using :meth:`set_hold`, the next frame will contain the update. For the + DACs held, the update is triggered explicitly by setting the corresponding + bit using :meth:`set_update`. Update is self-clearing. This enables atomic + DAC updates synchronized to a frame edge. + + The `log2_width=0` RTIO layout uses one DAC channel per RTIO address + and a dense RTIO address space. The RTIO words are narrow. + (32 bit compared to 512) and few-channel updates are efficient. + There is the least amount of DAC state tracking in kernels, + at the cost of more DMA and RTIO data. + + Other `log2_width` (up to `log2_width=5`) settings pack multiple + (in powers of two) DAC channels into one group and into one RTIO write. + The RTIO data width increases accordingly. The `log2_width` + LSBs of the RTIO address for a DAC channel write must be zero and the + address space is sparse. + + If `log2_width` is zero, the :meth:`set_dac`/:meth:`set_dac_mu` interface + must be used. If non-zero, the :meth:`set_group`/:meth:`set_group_mu` + interface must be used. + :param channel: RTIO channel number :param core_device: Core device name (default: "core") :param log2_width: Width of DAC channel group (power of two, - see the RTIO PHY for details). If zero, the - :meth:`set_dac`/:meth:`set_dac_mu` interface must be used. - If non-zero, the :meth:`set_group`/:meth:`set_group_mu` - interface must be used. Value must match the corresponding value + see the RTIO PHY for details). Value must match the corresponding value in the RTIO PHY. """ kernel_invariants = {"core", "channel", "width"} @@ -94,7 +113,7 @@ class Fastino: :param voltage: Voltage in SI Volts. :return: DAC data word in machine units, 16 bit integer. """ - return int(round((0x8000/10.)*voltage)) + 0x8000 + return (int(round((0x8000/10.)*voltage)) + 0x8000) & 0xffff @portable def voltage_group_to_mu(self, voltage, data): From 6195b1d3a0d74b5587d62be425d2c3175331d10e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 4 Sep 2020 13:49:22 +0800 Subject: [PATCH 23/26] rpc: fixed _write_bool Closes #1519 --- artiq/coredevice/comm_kernel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/artiq/coredevice/comm_kernel.py b/artiq/coredevice/comm_kernel.py index babf035b3..98211d592 100644 --- a/artiq/coredevice/comm_kernel.py +++ b/artiq/coredevice/comm_kernel.py @@ -316,7 +316,7 @@ class CommKernel: self._write(self.pack_float64(value)) def _write_bool(self, value): - self._write(1 if value == True else 0) + self._write(b'\x01' if value else b'\x00') def _write_bytes(self, value): self._write_int32(len(value)) @@ -425,7 +425,7 @@ class CommKernel: elif tag == "b": check(isinstance(value, bool), lambda: "bool") - self._write_int8(value) + self._write_bool(value) elif tag == "i": check(isinstance(value, (int, numpy.int32)) and (-2**31 < value < 2**31-1), From bff611a888366d3f4cffe2086a257ad4ea8f2ad9 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 11 Sep 2020 11:21:45 +0800 Subject: [PATCH 24/26] test: relax test_dma_playback_time on Zynq --- artiq/test/coredevice/test_rtio.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index de7318d27..3313b5c14 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -627,11 +627,13 @@ class _DMA(EnvExperiment): self.delta = now_mu() - start @kernel - def playback_many(self, n): + def playback_many(self, n, add_delay=False): handle = self.core_dma.get_handle(self.trace_name) self.core.break_realtime() t1 = self.core.get_rtio_counter_mu() for i in range(n): + if add_delay: + delay(2*us) self.core_dma.playback_handle(handle) t2 = self.core.get_rtio_counter_mu() self.set_dataset("dma_playback_time", self.core.mu_to_seconds(t2 - t1)) @@ -724,13 +726,18 @@ class DMATest(ExperimentCase): self.device_mgr.get_desc("ad9914dds0") except KeyError: raise unittest.SkipTest("skipped on Kasli for now") + exp = self.create(_DMA) + is_zynq = exp.core.target_cls == CortexA9Target count = 20000 exp.record_many(40) - exp.playback_many(count) + exp.playback_many(count, is_zynq) dt = self.dataset_mgr.get("dma_playback_time") print("dt={}, dt/count={}".format(dt, dt/count)) - self.assertLess(dt/count, 4.5*us) + if is_zynq: + self.assertLess(dt/count, 6.2*us) + else: + self.assertLess(dt/count, 4.5*us) def test_dma_underflow(self): exp = self.create(_DMA) From 29c940f4e378073399412f3cc58c5443280bc714 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 17 Sep 2020 16:53:43 +0800 Subject: [PATCH 25/26] kasli2: forward sma_clkin to si5324 --- artiq/gateware/targets/kasli.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/artiq/gateware/targets/kasli.py b/artiq/gateware/targets/kasli.py index 96a4df558..d1eb75252 100755 --- a/artiq/gateware/targets/kasli.py +++ b/artiq/gateware/targets/kasli.py @@ -79,6 +79,21 @@ class _RTIOCRG(Module, AutoCSR): ] +class SMAClkinForward(Module): + def __init__(self, platform): + sma_clkin = platform.request("sma_clkin") + sma_clkin_se = Signal() + sma_clkin_buffered = Signal() + cdr_clk_se = Signal() + cdr_clk = platform.request("cdr_clk") + self.specials += [ + Instance("IBUFDS", i_I=sma_clkin.p, i_IB=sma_clkin.n, o_O=sma_clkin_se), + Instance("BUFIO", i_I=sma_clkin_se, o_O=sma_clkin_buffered), + Instance("ODDR", i_C=sma_clkin_buffered, i_CE=1, i_D1=0, i_D2=1, o_Q=cdr_clk_se), + Instance("OBUFDS", i_I=cdr_clk_se, o_O=cdr_clk.p, o_OB=cdr_clk.n) + ] + + def fix_serdes_timing_path(platform): # ignore timing of path from OSERDESE2 through the pad to ISERDESE2 platform.add_platform_command( @@ -115,6 +130,7 @@ class StandaloneBase(MiniSoC, AMPSoC): self.submodules.error_led = gpio.GPIOOut(Cat( self.platform.request("error_led"))) self.csr_devices.append("error_led") + self.submodules += SMAClkinForward(self.platform) i2c = self.platform.request("i2c") self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) @@ -294,6 +310,9 @@ class MasterBase(MiniSoC, AMPSoC): platform = self.platform + if platform.hw_rev == "v2.0": + self.submodules += SMAClkinForward(platform) + i2c = self.platform.request("i2c") self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) self.csr_devices.append("i2c") From c55f2222dc1fed45dbeb4c7dfd8e186f1c196407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20J=C3=B6rdens?= Date: Tue, 22 Sep 2020 17:58:53 +0200 Subject: [PATCH 26/26] fastino: documentation and eem pass-through * Repeat information about matching log2_width a few times in the hope that people read it. #1518 * Pass through log2_width in kasli_generic json. close #1481 * Check DAC value range. #1518 --- artiq/coredevice/fastino.py | 22 ++++++++++++---------- artiq/gateware/eem.py | 4 ++-- artiq/gateware/targets/kasli_generic.py | 3 ++- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/artiq/coredevice/fastino.py b/artiq/coredevice/fastino.py index 200d89018..73fcfdf38 100644 --- a/artiq/coredevice/fastino.py +++ b/artiq/coredevice/fastino.py @@ -19,17 +19,17 @@ class Fastino: bit using :meth:`set_update`. Update is self-clearing. This enables atomic DAC updates synchronized to a frame edge. - The `log2_width=0` RTIO layout uses one DAC channel per RTIO address - and a dense RTIO address space. The RTIO words are narrow. - (32 bit compared to 512) and few-channel updates are efficient. - There is the least amount of DAC state tracking in kernels, - at the cost of more DMA and RTIO data. + The `log2_width=0` RTIO layout uses one DAC channel per RTIO address and a + dense RTIO address space. The RTIO words are narrow. (32 bit) and + few-channel updates are efficient. There is the least amount of DAC state + tracking in kernels, at the cost of more DMA and RTIO data. + The setting here and in the RTIO PHY (gateware) must match. Other `log2_width` (up to `log2_width=5`) settings pack multiple (in powers of two) DAC channels into one group and into one RTIO write. The RTIO data width increases accordingly. The `log2_width` LSBs of the RTIO address for a DAC channel write must be zero and the - address space is sparse. + address space is sparse. For `log2_width=5` the RTIO data is 512 bit wide. If `log2_width` is zero, the :meth:`set_dac`/:meth:`set_dac_mu` interface must be used. If non-zero, the :meth:`set_group`/:meth:`set_group_mu` @@ -37,9 +37,8 @@ class Fastino: :param channel: RTIO channel number :param core_device: Core device name (default: "core") - :param log2_width: Width of DAC channel group (power of two, - see the RTIO PHY for details). Value must match the corresponding value - in the RTIO PHY. + :param log2_width: Width of DAC channel group (logarithm base 2). + Value must match the corresponding value in the RTIO PHY (gateware). """ kernel_invariants = {"core", "channel", "width"} @@ -113,7 +112,10 @@ class Fastino: :param voltage: Voltage in SI Volts. :return: DAC data word in machine units, 16 bit integer. """ - return (int(round((0x8000/10.)*voltage)) + 0x8000) & 0xffff + data = int(round((0x8000/10.)*voltage)) + 0x8000 + if data < 0 or data > 0xffff: + raise ValueError("DAC voltage out of bounds") + return data @portable def voltage_group_to_mu(self, voltage, data): diff --git a/artiq/gateware/eem.py b/artiq/gateware/eem.py index fba2c1b5a..19fbe7ea8 100644 --- a/artiq/gateware/eem.py +++ b/artiq/gateware/eem.py @@ -618,11 +618,11 @@ class Fastino(_EEM): ) for pol in "pn"] @classmethod - def add_std(cls, target, eem, iostandard="LVDS_25"): + def add_std(cls, target, eem, log2_width, iostandard="LVDS_25"): cls.add_extension(target, eem, iostandard=iostandard) phy = fastino.Fastino(target.platform.request("fastino{}_ser_p".format(eem)), target.platform.request("fastino{}_ser_n".format(eem)), - log2_width=0) + log2_width=log2_width) target.submodules += phy target.rtio_channels.append(rtio.Channel.from_phy(phy, ififo_depth=4)) diff --git a/artiq/gateware/targets/kasli_generic.py b/artiq/gateware/targets/kasli_generic.py index 939f60716..2fcd299a1 100755 --- a/artiq/gateware/targets/kasli_generic.py +++ b/artiq/gateware/targets/kasli_generic.py @@ -109,7 +109,8 @@ def peripheral_mirny(module, peripheral): def peripheral_fastino(module, peripheral): if len(peripheral["ports"]) != 1: raise ValueError("wrong number of ports") - eem.Fastino.add_std(module, peripheral["ports"][0]) + eem.Fastino.add_std(module, peripheral["ports"][0], + peripheral.get("log2_width", 0)) peripheral_processors = {