diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 7b9899d71..e38441707 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -11,6 +11,7 @@ Highlights: * Performance improvements: - #1432: SERDES TTL inputs can now detect edges on pulses that are shorter than the RTIO period + - Improved performance for kernel RPC involving list and array. * Coredevice SI to mu conversions now always return valid codes, or raise a `ValueError`. * Zotino now exposes `voltage_to_mu()` * `ad9910`: The maximum amplitude scale factor is now `0x3fff` (was `0x3ffe` @@ -24,6 +25,8 @@ Highlights: * Core device: ``panic_reset 1`` now correctly resets the kernel CPU as well if communication CPU panic occurs. * NumberValue accepts a ``type`` parameter specifying the output as ``int`` or ``float`` +* A parameter `--identifier-str` has been added to many targets to aid + with reproducible builds. Breaking changes: diff --git a/artiq/build_soc.py b/artiq/build_soc.py index c5487d8d7..e3a1f3360 100644 --- a/artiq/build_soc.py +++ b/artiq/build_soc.py @@ -44,15 +44,14 @@ class ReprogrammableIdentifier(Module, AutoCSR): p_INIT=sum(1 << j if c & (1 << i) else 0 for j, c in enumerate(contents))) -def add_identifier(soc, *args, **kwargs): +def add_identifier(soc, *args, gateware_identifier_str=None, **kwargs): if hasattr(soc, "identifier"): raise ValueError identifier_str = get_identifier_string(soc, *args, **kwargs) - soc.submodules.identifier = ReprogrammableIdentifier(identifier_str) + soc.submodules.identifier = ReprogrammableIdentifier(gateware_identifier_str or identifier_str) soc.config["IDENTIFIER_STR"] = identifier_str - def build_artiq_soc(soc, argdict): firmware_dir = os.path.join(artiq_dir, "firmware") builder = Builder(soc, **argdict) diff --git a/artiq/coredevice/comm_kernel.py b/artiq/coredevice/comm_kernel.py index 41bddd553..98211d592 100644 --- a/artiq/coredevice/comm_kernel.py +++ b/artiq/coredevice/comm_kernel.py @@ -43,9 +43,11 @@ class Reply(Enum): class UnsupportedDevice(Exception): pass + class LoadError(Exception): pass + class RPCReturnValueError(ValueError): pass @@ -53,6 +55,105 @@ class RPCReturnValueError(ValueError): RPCKeyword = namedtuple('RPCKeyword', ['name', 'value']) +def _receive_fraction(kernel, embedding_map): + numerator = kernel._read_int64() + denominator = kernel._read_int64() + return Fraction(numerator, denominator) + + +def _receive_list(kernel, embedding_map): + length = kernel._read_int32() + tag = chr(kernel._read_int8()) + if tag == "b": + buffer = kernel._read(length) + return list(buffer) + elif tag == "i": + buffer = kernel._read(4 * length) + return list(struct.unpack(">%sl" % length, buffer)) + elif tag == "I": + buffer = kernel._read(8 * length) + return list(struct.unpack(">%sq" % length, buffer)) + elif tag == "f": + buffer = kernel._read(8 * length) + return list(struct.unpack(">%sd" % length, buffer)) + else: + fn = receivers[tag] + elems = [] + for _ in range(length): + # discard tag, as our device would still send the tag for each + # non-primitive elements. + kernel._read_int8() + item = fn(kernel, embedding_map) + elems.append(item) + return elems + + +def _receive_array(kernel, embedding_map): + num_dims = kernel._read_int8() + shape = tuple(kernel._read_int32() for _ in range(num_dims)) + tag = chr(kernel._read_int8()) + fn = receivers[tag] + length = numpy.prod(shape) + if tag == "b": + buffer = kernel._read(length) + elems = numpy.ndarray((length, ), 'B', buffer) + elif tag == "i": + buffer = kernel._read(4 * length) + elems = numpy.ndarray((length, ), '>i4', buffer) + elif tag == "I": + buffer = kernel._read(8 * length) + elems = numpy.ndarray((length, ), '>i8', buffer) + elif tag == "f": + buffer = kernel._read(8 * length) + elems = numpy.ndarray((length, ), '>d', buffer) + else: + fn = receivers[tag] + elems = [] + for _ in range(numpy.prod(shape)): + # discard the tag + kernel._read_int8() + item = fn(kernel, embedding_map) + elems.append(item) + elems = numpy.array(elems) + return elems.reshape(shape) + + +def _receive_range(kernel, embedding_map): + start = kernel._receive_rpc_value(embedding_map) + stop = kernel._receive_rpc_value(embedding_map) + step = kernel._receive_rpc_value(embedding_map) + return range(start, stop, step) + + +def _receive_keyword(kernel, embedding_map): + name = kernel._read_string() + value = kernel._receive_rpc_value(embedding_map) + return RPCKeyword(name, value) + + +receivers = { + "\x00": lambda kernel, embedding_map: kernel._rpc_sentinel, + "t": lambda kernel, embedding_map: + tuple(kernel._receive_rpc_value(embedding_map) + for _ in range(kernel._read_int8())), + "n": lambda kernel, embedding_map: None, + "b": lambda kernel, embedding_map: bool(kernel._read_int8()), + "i": lambda kernel, embedding_map: numpy.int32(kernel._read_int32()), + "I": lambda kernel, embedding_map: numpy.int64(kernel._read_int64()), + "f": lambda kernel, embedding_map: kernel._read_float64(), + "s": lambda kernel, embedding_map: kernel._read_string(), + "B": lambda kernel, embedding_map: kernel._read_bytes(), + "A": lambda kernel, embedding_map: kernel._read_bytes(), + "O": lambda kernel, embedding_map: + embedding_map.retrieve_object(kernel._read_int32()), + "F": _receive_fraction, + "l": _receive_list, + "a": _receive_array, + "r": _receive_range, + "k": _receive_keyword +} + + class CommKernelDummy: def __init__(self): pass @@ -77,6 +178,17 @@ class CommKernel: self._read_type = None self.host = host self.port = port + self.read_buffer = bytearray() + self.write_buffer = bytearray() + + self.unpack_int32 = struct.Struct(">l").unpack + self.unpack_int64 = struct.Struct(">q").unpack + self.unpack_float64 = struct.Struct(">d").unpack + + self.pack_header = struct.Struct(">lB").pack + self.pack_int32 = struct.Struct(">l").pack + self.pack_int64 = struct.Struct(">q").pack + self.pack_float64 = struct.Struct(">d").pack def open(self): if hasattr(self, "socket"): @@ -97,13 +209,18 @@ class CommKernel: # def _read(self, length): - r = bytes() - while len(r) < length: - rn = self.socket.recv(min(8192, length - len(r))) - if not rn: - raise ConnectionResetError("Connection closed") - r += rn - return r + # cache the reads to avoid frequent call to recv + while len(self.read_buffer) < length: + # the number is just the maximum amount + # when there is not much data, it would return earlier + diff = length - len(self.read_buffer) + flag = 0 + if diff > 8192: + flag |= socket.MSG_WAITALL + self.read_buffer += self.socket.recv(8192, flag) + result = self.read_buffer[:length] + self.read_buffer = self.read_buffer[length:] + return result def _read_header(self): self.open() @@ -111,14 +228,14 @@ class CommKernel: # Wait for a synchronization sequence, 5a 5a 5a 5a. sync_count = 0 while sync_count < 4: - (sync_byte, ) = struct.unpack("B", self._read(1)) + sync_byte = self._read(1)[0] if sync_byte == 0x5a: sync_count += 1 else: sync_count = 0 # Read message header. - (raw_type, ) = struct.unpack("B", self._read(1)) + raw_type = self._read(1)[0] self._read_type = Reply(raw_type) logger.debug("receiving message: type=%r", @@ -134,19 +251,18 @@ class CommKernel: self._read_expect(ty) def _read_int8(self): - (value, ) = struct.unpack("B", self._read(1)) - return value + return self._read(1)[0] def _read_int32(self): - (value, ) = struct.unpack(">l", self._read(4)) + (value, ) = self.unpack_int32(self._read(4)) return value def _read_int64(self): - (value, ) = struct.unpack(">q", self._read(8)) + (value, ) = self.unpack_int64(self._read(8)) return value def _read_float64(self): - (value, ) = struct.unpack(">d", self._read(8)) + (value, ) = self.unpack_float64(self._read(8)) return value def _read_bool(self): @@ -163,7 +279,15 @@ class CommKernel: # def _write(self, data): - self.socket.sendall(data) + self.write_buffer += data + # if the buffer is already pretty large, send it + # the block size is arbitrary, tuning it may improve performance + if len(self.write_buffer) > 4096: + self._flush() + + def _flush(self): + self.socket.sendall(self.write_buffer) + self.write_buffer.clear() def _write_header(self, ty): self.open() @@ -171,7 +295,7 @@ class CommKernel: logger.debug("sending message: type=%r", ty) # Write synchronization sequence and header. - self._write(struct.pack(">lB", 0x5a5a5a5a, ty.value)) + self._write(self.pack_header(0x5a5a5a5a, ty.value)) def _write_empty(self, ty): self._write_header(ty) @@ -180,19 +304,19 @@ class CommKernel: self._write(chunk) def _write_int8(self, value): - self._write(struct.pack("B", value)) + self._write(value) def _write_int32(self, value): - self._write(struct.pack(">l", value)) + self._write(self.pack_int32(value)) def _write_int64(self, value): - self._write(struct.pack(">q", value)) + self._write(self.pack_int64(value)) def _write_float64(self, value): - self._write(struct.pack(">d", value)) + self._write(self.pack_float64(value)) def _write_bool(self, value): - self._write(struct.pack("B", value)) + self._write(b'\x01' if value else b'\x00') def _write_bytes(self, value): self._write_int32(len(value)) @@ -207,6 +331,7 @@ class CommKernel: def check_system_info(self): self._write_empty(Request.SystemInfo) + self._flush() self._read_header() self._read_expect(Reply.SystemInfo) @@ -231,6 +356,7 @@ class CommKernel: def load(self, kernel_library): self._write_header(Request.LoadKernel) self._write_bytes(kernel_library) + self._flush() self._read_header() if self._read_type == Reply.LoadFailed: @@ -240,6 +366,7 @@ class CommKernel: def run(self): self._write_empty(Request.RunKernel) + self._flush() logger.debug("running kernel") _rpc_sentinel = object() @@ -247,50 +374,8 @@ class CommKernel: # See rpc_proto.rs and compiler/ir.py:rpc_tag. def _receive_rpc_value(self, embedding_map): tag = chr(self._read_int8()) - if tag == "\x00": - return self._rpc_sentinel - elif tag == "t": - length = self._read_int8() - return tuple(self._receive_rpc_value(embedding_map) for _ in range(length)) - elif tag == "n": - return None - elif tag == "b": - return bool(self._read_int8()) - elif tag == "i": - return numpy.int32(self._read_int32()) - elif tag == "I": - return numpy.int64(self._read_int64()) - elif tag == "f": - return self._read_float64() - elif tag == "F": - numerator = self._read_int64() - denominator = self._read_int64() - return Fraction(numerator, denominator) - elif tag == "s": - return self._read_string() - elif tag == "B": - return self._read_bytes() - elif tag == "A": - return self._read_bytes() - elif tag == "l": - length = self._read_int32() - return [self._receive_rpc_value(embedding_map) for _ in range(length)] - elif tag == "a": - num_dims = self._read_int8() - shape = tuple(self._read_int32() for _ in range(num_dims)) - elems = [self._receive_rpc_value(embedding_map) for _ in range(numpy.prod(shape))] - return numpy.array(elems).reshape(shape) - elif tag == "r": - start = self._receive_rpc_value(embedding_map) - stop = self._receive_rpc_value(embedding_map) - step = self._receive_rpc_value(embedding_map) - return range(start, stop, step) - elif tag == "k": - name = self._read_string() - value = self._receive_rpc_value(embedding_map) - return RPCKeyword(name, value) - elif tag == "O": - return embedding_map.retrieve_object(self._read_int32()) + if tag in receivers: + return receivers.get(tag)(self, embedding_map) else: raise IOError("Unknown RPC value tag: {}".format(repr(tag))) @@ -340,7 +425,7 @@ class CommKernel: elif tag == "b": check(isinstance(value, bool), lambda: "bool") - self._write_int8(value) + self._write_bool(value) elif tag == "i": check(isinstance(value, (int, numpy.int32)) and (-2**31 < value < 2**31-1), @@ -357,8 +442,8 @@ class CommKernel: self._write_float64(value) elif tag == "F": check(isinstance(value, Fraction) and - (-2**63 < value.numerator < 2**63-1) and - (-2**63 < value.denominator < 2**63-1), + (-2**63 < value.numerator < 2**63-1) and + (-2**63 < value.denominator < 2**63-1), lambda: "64-bit Fraction") self._write_int64(value.numerator) self._write_int64(value.denominator) @@ -378,21 +463,44 @@ class CommKernel: check(isinstance(value, list), lambda: "list") self._write_int32(len(value)) - for elt in value: - tags_copy = bytearray(tags) - self._send_rpc_value(tags_copy, elt, root, function) + tag_element = chr(tags[0]) + if tag_element == "b": + self._write(bytes(value)) + elif tag_element == "i": + self._write(struct.pack(">%sl" % len(value), *value)) + elif tag_element == "I": + self._write(struct.pack(">%sq" % len(value), *value)) + elif tag_element == "f": + self._write(struct.pack(">%sd" % len(value), *value)) + else: + for elt in value: + tags_copy = bytearray(tags) + self._send_rpc_value(tags_copy, elt, root, function) self._skip_rpc_value(tags) elif tag == "a": check(isinstance(value, numpy.ndarray), lambda: "numpy.ndarray") num_dims = tags.pop(0) check(num_dims == len(value.shape), - lambda: "{}-dimensional numpy.ndarray".format(num_dims)) + lambda: "{}-dimensional numpy.ndarray".format(num_dims)) for s in value.shape: self._write_int32(s) - for elt in value.reshape((-1,), order="C"): - tags_copy = bytearray(tags) - self._send_rpc_value(tags_copy, elt, root, function) + tag_element = chr(tags[0]) + if tag_element == "b": + self._write(value.reshape((-1,), order="C").tobytes()) + elif tag_element == "i": + array = value.reshape((-1,), order="C").astype('>i4') + self._write(array.tobytes()) + elif tag_element == "I": + array = value.reshape((-1,), order="C").astype('>i8') + self._write(array.tobytes()) + elif tag_element == "f": + array = value.reshape((-1,), order="C").astype('>d') + self._write(array.tobytes()) + else: + for elt in value.reshape((-1,), order="C"): + tags_copy = bytearray(tags) + self._send_rpc_value(tags_copy, elt, root, function) self._skip_rpc_value(tags) elif tag == "r": check(isinstance(value, range), @@ -414,15 +522,15 @@ class CommKernel: return msg def _serve_rpc(self, embedding_map): - is_async = self._read_bool() - service_id = self._read_int32() + is_async = self._read_bool() + service_id = self._read_int32() args, kwargs = self._receive_rpc_args(embedding_map) - return_tags = self._read_bytes() + return_tags = self._read_bytes() if service_id == 0: - service = lambda obj, attr, value: setattr(obj, attr, value) + def service(obj, attr, value): return setattr(obj, attr, value) else: - service = embedding_map.retrieve_object(service_id) + service = embedding_map.retrieve_object(service_id) logger.debug("rpc service: [%d]%r%s %r %r -> %s", service_id, service, (" (async)" if is_async else ""), args, kwargs, return_tags) @@ -432,15 +540,19 @@ class CommKernel: try: result = service(*args, **kwargs) - logger.debug("rpc service: %d %r %r = %r", service_id, args, kwargs, result) + logger.debug("rpc service: %d %r %r = %r", + service_id, args, kwargs, result) self._write_header(Request.RPCReply) self._write_bytes(return_tags) - self._send_rpc_value(bytearray(return_tags), result, result, service) + self._send_rpc_value(bytearray(return_tags), + result, result, service) + self._flush() except RPCReturnValueError as exn: raise except Exception as exn: - logger.debug("rpc service: %d %r %r ! %r", service_id, args, kwargs, exn) + logger.debug("rpc service: %d %r %r ! %r", + service_id, args, kwargs, exn) self._write_header(Request.RPCException) @@ -479,23 +591,24 @@ class CommKernel: assert False self._write_string(filename) self._write_int32(line) - self._write_int32(-1) # column not known + self._write_int32(-1) # column not known self._write_string(function) + self._flush() def _serve_exception(self, embedding_map, symbolizer, demangler): - name = self._read_string() - message = self._read_string() - params = [self._read_int64() for _ in range(3)] + name = self._read_string() + message = self._read_string() + params = [self._read_int64() for _ in range(3)] - filename = self._read_string() - line = self._read_int32() - column = self._read_int32() - function = self._read_string() + filename = self._read_string() + line = self._read_int32() + column = self._read_int32() + function = self._read_string() backtrace = [self._read_int32() for _ in range(self._read_int32())] traceback = list(reversed(symbolizer(backtrace))) + \ - [(filename, line, column, *demangler([function]), None)] + [(filename, line, column, *demangler([function]), None)] core_exn = exceptions.CoreException(name, message, params, traceback) if core_exn.id == 0: diff --git a/artiq/coredevice/fastino.py b/artiq/coredevice/fastino.py index 32306388a..73fcfdf38 100644 --- a/artiq/coredevice/fastino.py +++ b/artiq/coredevice/fastino.py @@ -1,26 +1,44 @@ """RTIO driver for the Fastino 32channel, 16 bit, 2.5 MS/s per channel, streaming DAC. - -TODO: Example, describe update/hold """ from artiq.language.core import kernel, portable, delay -from artiq.coredevice.rtio import rtio_output, rtio_output_wide, rtio_input_data +from artiq.coredevice.rtio import (rtio_output, rtio_output_wide, + rtio_input_data) from artiq.language.units import us -from artiq.language.types import TInt32, TList, TFloat +from artiq.language.types import TInt32, TList class Fastino: """Fastino 32-channel, 16-bit, 2.5 MS/s per channel streaming DAC + The RTIO PHY supports staging DAC data before transmitting them by writing + to the DAC RTIO addresses, if a channel is not "held" by setting its bit + using :meth:`set_hold`, the next frame will contain the update. For the + DACs held, the update is triggered explicitly by setting the corresponding + bit using :meth:`set_update`. Update is self-clearing. This enables atomic + DAC updates synchronized to a frame edge. + + The `log2_width=0` RTIO layout uses one DAC channel per RTIO address and a + dense RTIO address space. The RTIO words are narrow. (32 bit) and + few-channel updates are efficient. There is the least amount of DAC state + tracking in kernels, at the cost of more DMA and RTIO data. + The setting here and in the RTIO PHY (gateware) must match. + + Other `log2_width` (up to `log2_width=5`) settings pack multiple + (in powers of two) DAC channels into one group and into one RTIO write. + The RTIO data width increases accordingly. The `log2_width` + LSBs of the RTIO address for a DAC channel write must be zero and the + address space is sparse. For `log2_width=5` the RTIO data is 512 bit wide. + + If `log2_width` is zero, the :meth:`set_dac`/:meth:`set_dac_mu` interface + must be used. If non-zero, the :meth:`set_group`/:meth:`set_group_mu` + interface must be used. + :param channel: RTIO channel number :param core_device: Core device name (default: "core") - :param log2_width: Width of DAC channel group (power of two, - see the RTIO PHY for details). If zero, the - :meth:`set_dac`/:meth:`set_dac_mu` interface must be used. - If non-zero, the :meth:`set_group`/:meth:`set_group_mu` - interface must be used. Value must match the corresponding value - in the RTIO PHY. + :param log2_width: Width of DAC channel group (logarithm base 2). + Value must match the corresponding value in the RTIO PHY (gateware). """ kernel_invariants = {"core", "channel", "width"} @@ -94,7 +112,10 @@ class Fastino: :param voltage: Voltage in SI Volts. :return: DAC data word in machine units, 16 bit integer. """ - return int(round((0x8000/10.)*voltage)) + 0x8000 + data = int(round((0x8000/10.)*voltage)) + 0x8000 + if data < 0 or data > 0xffff: + raise ValueError("DAC voltage out of bounds") + return data @portable def voltage_group_to_mu(self, voltage, data): diff --git a/artiq/examples/metlino_sayma_ttl/device_db.py b/artiq/examples/metlino_sayma_ttl/device_db.py new file mode 100644 index 000000000..c8c3acb8e --- /dev/null +++ b/artiq/examples/metlino_sayma_ttl/device_db.py @@ -0,0 +1,95 @@ +core_addr = "192.168.1.65" + +device_db = { + "core": { + "type": "local", + "module": "artiq.coredevice.core", + "class": "Core", + "arguments": {"host": core_addr, "ref_period": 1/(8*150e6)} + }, + "core_log": { + "type": "controller", + "host": "::1", + "port": 1068, + "command": "aqctl_corelog -p {port} --bind {bind} " + core_addr + }, + "core_cache": { + "type": "local", + "module": "artiq.coredevice.cache", + "class": "CoreCache" + }, + "core_dma": { + "type": "local", + "module": "artiq.coredevice.dma", + "class": "CoreDMA" + } +} + +# master peripherals +for i in range(4): + device_db["led" + str(i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": i}, +} + +# DEST#1 peripherals +amc_base = 0x070000 +rtm_base = 0x020000 + +for i in range(4): + device_db["led" + str(4+i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + i}, + } + +#DIO (EEM0) starting at RTIO channel 0x000056 +for i in range(8): + device_db["ttl" + str(i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000056 + i}, + } + +#DIO (EEM1) starting at RTIO channel 0x00005e +for i in range(8): + device_db["ttl" + str(8+i)] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x00005e + i}, + } + +device_db["fmcdio_dirctl_clk"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000066} +} + +device_db["fmcdio_dirctl_ser"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000067} +} + +device_db["fmcdio_dirctl_latch"] = { + "type": "local", + "module": "artiq.coredevice.ttl", + "class": "TTLOut", + "arguments": {"channel": amc_base + 0x000068} +} + +device_db["fmcdio_dirctl"] = { + "type": "local", + "module": "artiq.coredevice.shiftreg", + "class": "ShiftReg", + "arguments": {"clk": "fmcdio_dirctl_clk", + "ser": "fmcdio_dirctl_ser", + "latch": "fmcdio_dirctl_latch"} +} diff --git a/artiq/examples/metlino_sayma_ttl/repository/demo.py b/artiq/examples/metlino_sayma_ttl/repository/demo.py new file mode 100644 index 000000000..bb273ce2c --- /dev/null +++ b/artiq/examples/metlino_sayma_ttl/repository/demo.py @@ -0,0 +1,129 @@ +import sys +import os +import select + +from artiq.experiment import * +from artiq.coredevice.fmcdio_vhdci_eem import * + + +def chunker(seq, size): + res = [] + for el in seq: + res.append(el) + if len(res) == size: + yield res + res = [] + if res: + yield res + + +def is_enter_pressed() -> TBool: + if os.name == "nt": + if msvcrt.kbhit() and msvcrt.getch() == b"\r": + return True + else: + return False + else: + if select.select([sys.stdin, ], [], [], 0.0)[0]: + sys.stdin.read(1) + return True + else: + return False + + +class Demo(EnvExperiment): + def build(self): + self.setattr_device("core") + self.setattr_device("fmcdio_dirctl") + + self.leds = dict() + self.ttl_outs = dict() + + ddb = self.get_device_db() + for name, desc in ddb.items(): + if isinstance(desc, dict) and desc["type"] == "local": + module, cls = desc["module"], desc["class"] + if (module, cls) == ("artiq.coredevice.ttl", "TTLOut"): + dev = self.get_device(name) + if "led" in name: # guess + self.leds[name] = dev + elif "ttl" in name: # to exclude fmcdio_dirctl + self.ttl_outs[name] = dev + + self.leds = sorted(self.leds.items(), key=lambda x: x[1].channel) + self.ttl_outs = sorted(self.ttl_outs.items(), key=lambda x: x[1].channel) + + self.dirctl_word = ( + shiftreg_bits(0, dio_bank0_out_pins | dio_bank1_out_pins) | + shiftreg_bits(1, dio_bank0_out_pins | dio_bank1_out_pins) + ) + + @kernel + def init(self): + self.core.break_realtime() + print("*** Waiting for DRTIO ready...") + drtio_indices = [7] + for i in drtio_indices: + while not self.drtio_is_up(i): + pass + + self.fmcdio_dirctl.set(self.dirctl_word) + + @kernel + def drtio_is_up(self, drtio_index): + if not self.core.get_rtio_destination_status(drtio_index): + return False + print("DRTIO #", drtio_index, "is ready\n") + return True + + @kernel + def test_led(self, led): + while not is_enter_pressed(): + self.core.break_realtime() + # do not fill the FIFOs too much to avoid long response times + t = now_mu() - self.core.seconds_to_mu(0.2) + while self.core.get_rtio_counter_mu() < t: + pass + for i in range(3): + led.pulse(100*ms) + delay(100*ms) + + def test_leds(self): + print("*** Testing LEDs.") + print("Check for blinking. Press ENTER when done.") + + for led_name, led_dev in self.leds: + print("Testing LED: {}".format(led_name)) + self.test_led(led_dev) + + @kernel + def test_ttl_out_chunk(self, ttl_chunk): + while not is_enter_pressed(): + self.core.break_realtime() + for _ in range(50000): + i = 0 + for ttl in ttl_chunk: + i += 1 + for _ in range(i): + ttl.pulse(1*us) + delay(1*us) + delay(10*us) + + def test_ttl_outs(self): + print("*** Testing TTL outputs.") + print("Outputs are tested in groups of 4. Touch each TTL connector") + print("with the oscilloscope probe tip, and check that the number of") + print("pulses corresponds to its number in the group.") + print("Press ENTER when done.") + + for ttl_chunk in chunker(self.ttl_outs, 4): + print("Testing TTL outputs: {}.".format(", ".join(name for name, dev in ttl_chunk))) + self.test_ttl_out_chunk([dev for name, dev in ttl_chunk]) + + def run(self): + self.core.reset() + + if self.leds: + self.test_leds() + if self.ttl_outs: + self.test_ttl_outs() diff --git a/artiq/firmware/libproto_artiq/lib.rs b/artiq/firmware/libproto_artiq/lib.rs index d8cbfe607..66c04d5e6 100644 --- a/artiq/firmware/libproto_artiq/lib.rs +++ b/artiq/firmware/libproto_artiq/lib.rs @@ -11,6 +11,7 @@ extern crate cslice; #[macro_use] extern crate log; +extern crate byteorder; extern crate io; extern crate dyld; diff --git a/artiq/firmware/libproto_artiq/rpc_proto.rs b/artiq/firmware/libproto_artiq/rpc_proto.rs index b35e6b905..84296a62c 100644 --- a/artiq/firmware/libproto_artiq/rpc_proto.rs +++ b/artiq/firmware/libproto_artiq/rpc_proto.rs @@ -1,6 +1,7 @@ use core::str; +use core::slice; use cslice::{CSlice, CMutSlice}; - +use byteorder::{NetworkEndian, ByteOrder}; use io::{ProtoRead, Read, Write, ProtoWrite, Error}; use self::tag::{Tag, TagIterator, split_tag}; @@ -53,13 +54,34 @@ unsafe fn recv_value(reader: &mut R, tag: Tag, data: &mut *mut (), struct List { elements: *mut (), length: u32 }; consume_value!(List, |ptr| { (*ptr).length = reader.read_u32()?; + let length = (*ptr).length as usize; let tag = it.clone().next().expect("truncated tag"); (*ptr).elements = alloc(tag.size() * (*ptr).length as usize)?; let mut data = (*ptr).elements; - for _ in 0..(*ptr).length as usize { - recv_value(reader, tag, &mut data, alloc)? + match tag { + Tag::Bool => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length); + reader.read_exact(dest)?; + }, + Tag::Int32 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i32, length); + NetworkEndian::from_slice_i32(dest); + }, + Tag::Int64 | Tag::Float64 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i64, length); + NetworkEndian::from_slice_i64(dest); + }, + _ => { + for _ in 0..length { + recv_value(reader, tag, &mut data, alloc)? + } + } } Ok(()) }) @@ -72,13 +94,34 @@ unsafe fn recv_value(reader: &mut R, tag: Tag, data: &mut *mut (), total_len *= len; consume_value!(u32, |ptr| *ptr = len ) } + let length = total_len as usize; let elt_tag = it.clone().next().expect("truncated tag"); *buffer = alloc(elt_tag.size() * total_len as usize)?; let mut data = *buffer; - for _ in 0..total_len { - recv_value(reader, elt_tag, &mut data, alloc)? + match elt_tag { + Tag::Bool => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length); + reader.read_exact(dest)?; + }, + Tag::Int32 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 4); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i32, length); + NetworkEndian::from_slice_i32(dest); + }, + Tag::Int64 | Tag::Float64 => { + let dest = slice::from_raw_parts_mut(data as *mut u8, length * 8); + reader.read_exact(dest)?; + let dest = slice::from_raw_parts_mut(data as *mut i64, length); + NetworkEndian::from_slice_i64(dest); + }, + _ => { + for _ in 0..length { + recv_value(reader, elt_tag, &mut data, alloc)? + } + } } Ok(()) }) @@ -155,11 +198,33 @@ unsafe fn send_value(writer: &mut W, tag: Tag, data: &mut *const ()) #[repr(C)] struct List { elements: *const (), length: u32 }; consume_value!(List, |ptr| { + let length = (*ptr).length as usize; writer.write_u32((*ptr).length)?; let tag = it.clone().next().expect("truncated tag"); let mut data = (*ptr).elements; - for _ in 0..(*ptr).length as usize { - send_value(writer, tag, &mut data)?; + writer.write_u8(tag.as_u8())?; + match tag { + Tag::Bool => { + let slice = slice::from_raw_parts(data as *const u8, length); + writer.write_all(slice)?; + }, + Tag::Int32 => { + let slice = slice::from_raw_parts(data as *const u32, length); + for v in slice.iter() { + writer.write_u32(*v)?; + } + }, + Tag::Int64 | Tag::Float64 => { + let slice = slice::from_raw_parts(data as *const u64, length); + for v in slice.iter() { + writer.write_u64(*v)?; + } + }, + _ => { + for _ in 0..length { + send_value(writer, tag, &mut data)?; + } + } } Ok(()) }) @@ -176,9 +241,31 @@ unsafe fn send_value(writer: &mut W, tag: Tag, data: &mut *const ()) total_len *= *len; }) } + let length = total_len as usize; let mut data = *buffer; - for _ in 0..total_len as usize { - send_value(writer, elt_tag, &mut data)?; + writer.write_u8(elt_tag.as_u8())?; + match elt_tag { + Tag::Bool => { + let slice = slice::from_raw_parts(data as *const u8, length); + writer.write_all(slice)?; + }, + Tag::Int32 => { + let slice = slice::from_raw_parts(data as *const u32, length); + for v in slice.iter() { + writer.write_u32(*v)?; + } + }, + Tag::Int64 | Tag::Float64 => { + let slice = slice::from_raw_parts(data as *const u64, length); + for v in slice.iter() { + writer.write_u64(*v)?; + } + }, + _ => { + for _ in 0..length { + send_value(writer, elt_tag, &mut data)?; + } + } } Ok(()) }) diff --git a/artiq/firmware/runtime/main.rs b/artiq/firmware/runtime/main.rs index fda9d37ba..73660b611 100644 --- a/artiq/firmware/runtime/main.rs +++ b/artiq/firmware/runtime/main.rs @@ -107,6 +107,9 @@ fn startup() { io_expander1 = board_misoc::io_expander::IoExpander::new(1); io_expander0.init().expect("I2C I/O expander #0 initialization failed"); io_expander1.init().expect("I2C I/O expander #1 initialization failed"); + io_expander0.set_oe(0, 1 << 1).unwrap(); + io_expander0.set(0, 1, false); + io_expander0.service().unwrap(); } rtio_clocking::init(); diff --git a/artiq/firmware/satman/main.rs b/artiq/firmware/satman/main.rs index 9e2144370..60f0f4c0a 100644 --- a/artiq/firmware/satman/main.rs +++ b/artiq/firmware/satman/main.rs @@ -475,6 +475,9 @@ pub extern fn main() -> i32 { io_expander1.set(1, 7, true); io_expander1.service().unwrap(); } + io_expander0.set_oe(0, 1 << 1).unwrap(); + io_expander0.set(0, 1, false); + io_expander0.service().unwrap(); } #[cfg(has_si5324)] diff --git a/artiq/gateware/eem.py b/artiq/gateware/eem.py index d6314e9be..627bc29e8 100644 --- a/artiq/gateware/eem.py +++ b/artiq/gateware/eem.py @@ -619,12 +619,12 @@ class Fastino(_EEM): ) for pol in "pn"] @classmethod - def add_std(cls, target, eem, iostandard="LVDS_25"): + def add_std(cls, target, eem, log2_width, iostandard="LVDS_25"): cls.add_extension(target, eem, iostandard=iostandard) phy = fastino.Fastino(target.platform.request("fastino{}_ser_p".format(eem)), target.platform.request("fastino{}_ser_n".format(eem)), - log2_width=0) + log2_width=log2_width) target.submodules += phy target.rtio_channels.append(rtio.Channel.from_phy(phy, ififo_depth=4)) diff --git a/artiq/gateware/fmcdio_vhdci_eem.py b/artiq/gateware/fmcdio_vhdci_eem.py index c1ab62864..0296efe8e 100644 --- a/artiq/gateware/fmcdio_vhdci_eem.py +++ b/artiq/gateware/fmcdio_vhdci_eem.py @@ -19,8 +19,9 @@ def _get_connectors(): for j, pair in enumerate(eem_fmc_connections[i]): for pn in "n", "p": cc = "cc_" if j == 0 else "" + lpc_cc = "CC_" if eem_fmc_connections[i][j] in (0, 1, 17, 18) else "" connections["d{}_{}{}".format(j, cc, pn)] = \ - "LPC:LA{:02d}_{}{}".format(pair, cc.upper(), pn.upper()) + "LPC:LA{:02d}_{}{}".format(pair, lpc_cc, pn.upper()) connectors.append(("eem{}".format(i), connections)) return connectors diff --git a/artiq/gateware/targets/kasli.py b/artiq/gateware/targets/kasli.py index 73162c65b..d1eb75252 100755 --- a/artiq/gateware/targets/kasli.py +++ b/artiq/gateware/targets/kasli.py @@ -79,6 +79,21 @@ class _RTIOCRG(Module, AutoCSR): ] +class SMAClkinForward(Module): + def __init__(self, platform): + sma_clkin = platform.request("sma_clkin") + sma_clkin_se = Signal() + sma_clkin_buffered = Signal() + cdr_clk_se = Signal() + cdr_clk = platform.request("cdr_clk") + self.specials += [ + Instance("IBUFDS", i_I=sma_clkin.p, i_IB=sma_clkin.n, o_O=sma_clkin_se), + Instance("BUFIO", i_I=sma_clkin_se, o_O=sma_clkin_buffered), + Instance("ODDR", i_C=sma_clkin_buffered, i_CE=1, i_D1=0, i_D2=1, o_Q=cdr_clk_se), + Instance("OBUFDS", i_I=cdr_clk_se, o_O=cdr_clk.p, o_OB=cdr_clk.n) + ] + + def fix_serdes_timing_path(platform): # ignore timing of path from OSERDESE2 through the pad to ISERDESE2 platform.add_platform_command( @@ -99,7 +114,7 @@ class StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -109,12 +124,13 @@ class StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) if self.platform.hw_rev == "v2.0": self.submodules.error_led = gpio.GPIOOut(Cat( self.platform.request("error_led"))) self.csr_devices.append("error_led") + self.submodules += SMAClkinForward(self.platform) i2c = self.platform.request("i2c") self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) @@ -280,7 +296,7 @@ class MasterBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -290,10 +306,13 @@ class MasterBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform + if platform.hw_rev == "v2.0": + self.submodules += SMAClkinForward(platform) + i2c = self.platform.request("i2c") self.submodules.i2c = gpio.GPIOTristate([i2c.scl, i2c.sda]) self.csr_devices.append("i2c") @@ -453,13 +472,13 @@ class SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, **kwargs): + def __init__(self, rtio_clk_freq=125e6, enable_sata=False, *, with_wrpll=False, gateware_identifier_str=None, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", l2_size=128*1024, **kwargs) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform @@ -674,11 +693,14 @@ def main(): help="variant: {} (default: %(default)s)".format( "/".join(sorted(VARIANTS.keys())))) parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() argdict = dict() if args.with_wrpll: argdict["with_wrpll"] = True + argdict["gateware_identifier_str"] = args.gateware_identifier_str variant = args.variant.lower() try: diff --git a/artiq/gateware/targets/kasli_generic.py b/artiq/gateware/targets/kasli_generic.py index a68951c43..577b93dad 100755 --- a/artiq/gateware/targets/kasli_generic.py +++ b/artiq/gateware/targets/kasli_generic.py @@ -109,7 +109,8 @@ def peripheral_mirny(module, peripheral): def peripheral_fastino(module, peripheral): if len(peripheral["ports"]) != 1: raise ValueError("wrong number of ports") - eem.Fastino.add_std(module, peripheral["ports"][0]) + eem.Fastino.add_std(module, peripheral["ports"][0], + peripheral.get("log2_width", 0)) def peripheral_phaser(module, peripheral): @@ -259,6 +260,8 @@ def main(): parser.set_defaults(output_dir="artiq_kasli") parser.add_argument("description", metavar="DESCRIPTION", help="JSON system description file") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() with open(args.description, "r") as f: @@ -276,7 +279,7 @@ def main(): else: raise ValueError("Invalid base") - soc = cls(description, **soc_kasli_argdict(args)) + soc = cls(description, gateware_identifier_str=args.gateware_identifier_str, **soc_kasli_argdict(args)) args.variant = description["variant"] build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/kc705.py b/artiq/gateware/targets/kc705.py index b586edda4..1481fd351 100755 --- a/artiq/gateware/targets/kc705.py +++ b/artiq/gateware/targets/kc705.py @@ -119,7 +119,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -129,7 +129,7 @@ class _StandaloneBase(MiniSoC, AMPSoC): ethmac_ntxslots=4, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) if isinstance(self.platform.toolchain, XilinxVivadoToolchain): self.platform.toolchain.bitstream_commands.extend([ @@ -416,6 +416,8 @@ def main(): help="variant: " "nist_clock/nist_qc2/sma_spi " "(default: %(default)s)") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() variant = args.variant.lower() @@ -424,7 +426,7 @@ def main(): except KeyError: raise SystemExit("Invalid variant (-V/--variant)") - soc = cls(**soc_kc705_argdict(args)) + soc = cls(gateware_identifier_str=args.gateware_identifier_str, **soc_kc705_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/metlino.py b/artiq/gateware/targets/metlino.py index d1013653c..2139278ef 100755 --- a/artiq/gateware/targets/metlino.py +++ b/artiq/gateware/targets/metlino.py @@ -38,7 +38,7 @@ class Master(MiniSoC, AMPSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, **kwargs): + def __init__(self, gateware_identifier_str=None, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -49,7 +49,7 @@ class Master(MiniSoC, AMPSoC): csr_address_width=15, **kwargs) AMPSoC.__init__(self) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) platform = self.platform rtio_clk_freq = 150e6 @@ -164,9 +164,11 @@ def main(): builder_args(parser) soc_sdram_args(parser) parser.set_defaults(output_dir="artiq_metlino") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() args.variant = "master" - soc = Master(**soc_sdram_argdict(args)) + soc = Master(gateware_identifier_str=args.gateware_identifier_str, **soc_sdram_argdict(args)) build_artiq_soc(soc, builder_argdict(args)) diff --git a/artiq/gateware/targets/sayma_amc.py b/artiq/gateware/targets/sayma_amc.py index c2cb435f1..7f8a8a5b8 100755 --- a/artiq/gateware/targets/sayma_amc.py +++ b/artiq/gateware/targets/sayma_amc.py @@ -12,8 +12,10 @@ from misoc.interconnect.csr import * from misoc.targets.sayma_amc import * from artiq.gateware.amp import AMPSoC +from artiq.gateware import eem from artiq.gateware import rtio from artiq.gateware import jesd204_tools +from artiq.gateware import fmcdio_vhdci_eem from artiq.gateware.rtio.phy import ttl_simple, ttl_serdes_ultrascale, sawg from artiq.gateware.drtio.transceiver import gth_ultrascale from artiq.gateware.drtio.siphaser import SiPhaser7Series @@ -50,7 +52,7 @@ class SatelliteBase(MiniSoC): } mem_map.update(MiniSoC.mem_map) - def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", with_sfp=False, *, with_wrpll, **kwargs): + def __init__(self, rtio_clk_freq=125e6, identifier_suffix="", gateware_identifier_str=None, with_sfp=False, *, with_wrpll, **kwargs): MiniSoC.__init__(self, cpu_type="or1k", sdram_controller_type="minicon", @@ -59,7 +61,7 @@ class SatelliteBase(MiniSoC): ethmac_nrxslots=4, ethmac_ntxslots=4, **kwargs) - add_identifier(self, suffix=identifier_suffix) + add_identifier(self, suffix=identifier_suffix, gateware_identifier_str=gateware_identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -284,7 +286,7 @@ class JDCGSyncDDS(Module, AutoCSR): class Satellite(SatelliteBase): """ - DRTIO satellite with local DAC/SAWG channels. + DRTIO satellite with local DAC/SAWG channels, as well as TTL channels via FMC and VHDCI carrier. """ def __init__(self, jdcg_type, **kwargs): SatelliteBase.__init__(self, 150e6, @@ -307,7 +309,7 @@ class Satellite(SatelliteBase): self.csr_devices.append("slave_fpga_cfg") self.config["SLAVE_FPGA_GATEWARE"] = 0x200000 - rtio_channels = [] + self.rtio_channels = rtio_channels = [] for i in range(4): phy = ttl_simple.Output(platform.request("user_led", i)) self.submodules += phy @@ -343,6 +345,27 @@ class Satellite(SatelliteBase): self.jdcg_1.sawgs for phy in sawg.phys) + # FMC-VHDCI-EEM DIOs x 2 (all OUTPUTs) + platform.add_connectors(fmcdio_vhdci_eem.connectors) + eem.DIO.add_std(self, 0, + ttl_simple.Output, ttl_simple.Output, iostandard="LVDS") + eem.DIO.add_std(self, 1, + ttl_simple.Output, ttl_simple.Output, iostandard="LVDS") + # FMC-DIO-32ch-LVDS-a Direction Control Pins (via shift register) as TTLs x 3 + platform.add_extension(fmcdio_vhdci_eem.io) + print("fmcdio_vhdci_eem.[CLK, SER, LATCH] starting at RTIO channel 0x{:06x}" + .format(len(rtio_channels))) + fmcdio_dirctl = platform.request("fmcdio_dirctl", 0) + fmcdio_dirctl_phys = [ + ttl_simple.Output(fmcdio_dirctl.clk), + ttl_simple.Output(fmcdio_dirctl.ser), + ttl_simple.Output(fmcdio_dirctl.latch) + ] + for phy in fmcdio_dirctl_phys: + self.submodules += phy + rtio_channels.append(rtio.Channel.from_phy(phy)) + workaround_us_lvds_tristate(platform) + self.add_rtio(rtio_channels) self.submodules.sysref_sampler = jesd204_tools.SysrefSampler( @@ -403,14 +426,24 @@ def main(): help="Change type of signal generator. This is used exclusively for " "development and debugging.") parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") args = parser.parse_args() variant = args.variant.lower() if variant == "satellite": - soc = Satellite(with_sfp=args.sfp, jdcg_type=args.jdcg_type, with_wrpll=args.with_wrpll, - **soc_sayma_amc_argdict(args)) + soc = Satellite( + with_sfp=args.sfp, + jdcg_type=args.jdcg_type, + with_wrpll=args.with_wrpll, + gateware_identifier_str=args.gateware_identifier_str, + **soc_sayma_amc_argdict(args)) elif variant == "simplesatellite": - soc = SimpleSatellite(with_sfp=args.sfp, with_wrpll=args.with_wrpll, **soc_sayma_amc_argdict(args)) + soc = SimpleSatellite( + with_sfp=args.sfp, + with_wrpll=args.with_wrpll, + gateware_identifier_str=args.gateware_identifier_str, + **soc_sayma_amc_argdict(args)) else: raise SystemExit("Invalid variant (-V/--variant)") diff --git a/artiq/gateware/targets/sayma_rtm.py b/artiq/gateware/targets/sayma_rtm.py index bdef1fa09..294a17823 100755 --- a/artiq/gateware/targets/sayma_rtm.py +++ b/artiq/gateware/targets/sayma_rtm.py @@ -75,11 +75,11 @@ class _SatelliteBase(BaseSoC): } mem_map.update(BaseSoC.mem_map) - def __init__(self, rtio_clk_freq, *, with_wrpll, **kwargs): + def __init__(self, rtio_clk_freq, *, with_wrpll, gateware_identifier_str, **kwargs): BaseSoC.__init__(self, cpu_type="or1k", **kwargs) - add_identifier(self) + add_identifier(self, gateware_identifier_str=gateware_identifier_str) self.rtio_clk_freq = rtio_clk_freq platform = self.platform @@ -299,11 +299,15 @@ def main(): parser.add_argument("--rtio-clk-freq", default=150, type=int, help="RTIO clock frequency in MHz") parser.add_argument("--with-wrpll", default=False, action="store_true") + parser.add_argument("--gateware-identifier-str", default=None, + help="Override ROM identifier") parser.set_defaults(output_dir=os.path.join("artiq_sayma", "rtm")) args = parser.parse_args() soc = Satellite( - rtio_clk_freq=1e6*args.rtio_clk_freq, with_wrpll=args.with_wrpll, + rtio_clk_freq=1e6*args.rtio_clk_freq, + with_wrpll=args.with_wrpll, + gateware_identifier_str=args.gateware_identifier_str, **soc_sayma_rtm_argdict(args)) builder = SatmanSoCBuilder(soc, **builder_argdict(args)) try: diff --git a/artiq/test/coredevice/test_i2c.py b/artiq/test/coredevice/test_i2c.py index b3d03c107..e5424efc6 100644 --- a/artiq/test/coredevice/test_i2c.py +++ b/artiq/test/coredevice/test_i2c.py @@ -24,6 +24,7 @@ class I2CSwitch(EnvExperiment): class NonexistentI2CBus(EnvExperiment): def build(self): self.setattr_device("core") + self.setattr_device("i2c_switch") # HACK: only run this test on boards with I2C self.broken_switch = PCA9548(self._HasEnvironment__device_mgr, 255) @kernel diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index 071c5d27c..3ca1f86ba 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -1,72 +1,273 @@ import os import time import unittest +import numpy from artiq.experiment import * from artiq.test.hardware_testbench import ExperimentCase +# large: 1MB payload +# small: 1KB payload +bytes_large = b"\x00" * (1 << 20) +bytes_small = b"\x00" * (1 << 10) + +list_large = [123] * (1 << 18) +list_small = [123] * (1 << 8) + +array_large = numpy.array(list_large, numpy.int32) +array_small = numpy.array(list_small, numpy.int32) + +byte_list_large = [True] * (1 << 20) +byte_list_small = [True] * (1 << 10) + +received_bytes = 0 +time_start = 0 +time_end = 0 class _Transfer(EnvExperiment): def build(self): self.setattr_device("core") - self.data = b"\x00"*(10**6) + self.count = 10 + self.h2d = [0.0] * self.count + self.d2h = [0.0] * self.count @rpc - def source(self) -> TBytes: - return self.data + def get_bytes(self, large: TBool) -> TBytes: + if large: + return bytes_large + else: + return bytes_small - @rpc(flags={"async"}) + @rpc + def get_list(self, large: TBool) -> TList(TInt32): + if large: + return list_large + else: + return list_small + + @rpc + def get_byte_list(self, large: TBool) -> TList(TBool): + if large: + return byte_list_large + else: + return byte_list_small + + @rpc + def get_array(self, large: TBool) -> TArray(TInt32): + if large: + return array_large + else: + return array_small + + @rpc + def get_string_list(self) -> TList(TStr): + return string_list + + @rpc def sink(self, data): - assert data == self.data + pass @rpc(flags={"async"}) - def sink_array(self, data): - assert data == [0]*(1 << 15) + def sink_async(self, data): + global received_bytes, time_start, time_end + if received_bytes == 0: + time_start = time.time() + received_bytes += len(data) + if received_bytes == (1024 ** 2)*128: + time_end = time.time() + + @rpc + def get_async_throughput(self) -> TFloat: + return 128.0 / (time_end - time_start) @kernel - def host_to_device(self): - t0 = self.core.get_rtio_counter_mu() - data = self.source() - t1 = self.core.get_rtio_counter_mu() - return len(data)/self.core.mu_to_seconds(t1-t0) + def test_bytes(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_bytes(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def device_to_host(self): - t0 = self.core.get_rtio_counter_mu() - self.sink(self.data) - t1 = self.core.get_rtio_counter_mu() - return len(self.data)/self.core.mu_to_seconds(t1-t0) + def test_byte_list(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_byte_list(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) @kernel - def device_to_host_array(self): - #data = [[0]*8 for _ in range(1 << 12)] - data = [0]*(1 << 15) - t0 = self.core.get_rtio_counter_mu() - self.sink_array(data) - t1 = self.core.get_rtio_counter_mu() - return ((len(data)*4)/ - self.core.mu_to_seconds(t1-t0)) + def test_list(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_list(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + for i in range(self.count): + inner() + return (self.h2d, self.d2h) + + @kernel + def test_array(self, large): + def inner(): + t0 = self.core.get_rtio_counter_mu() + data = self.get_array(large) + t1 = self.core.get_rtio_counter_mu() + self.sink(data) + t2 = self.core.get_rtio_counter_mu() + self.h2d[i] = self.core.mu_to_seconds(t1 - t0) + self.d2h[i] = self.core.mu_to_seconds(t2 - t1) + + for i in range(self.count): + inner() + return (self.h2d, self.d2h) + + @kernel + def test_async(self): + data = self.get_bytes(True) + for _ in range(128): + self.sink_async(data) + return self.get_async_throughput() class TransferTest(ExperimentCase): - def test_host_to_device(self): - exp = self.create(_Transfer) - host_to_device_rate = exp.host_to_device() - print(host_to_device_rate/(1024*1024), "MiB/s") - self.assertGreater(host_to_device_rate, 2.0e6) + @classmethod + def setUpClass(self): + self.results = [] - def test_device_to_host(self): - exp = self.create(_Transfer) - device_to_host_rate = exp.device_to_host() - print(device_to_host_rate/(1024*1024), "MiB/s") - self.assertGreater(device_to_host_rate, 2.2e6) + @classmethod + def tearDownClass(self): + if len(self.results) == 0: + return + max_length = max(max(len(row[0]) for row in self.results), len("Test")) - def test_device_to_host_array(self): - exp = self.create(_Transfer) - rate = exp.device_to_host_array() - print(rate/(1024*1024), "MiB/s") - self.assertGreater(rate, .15e6) + def pad(name): + nonlocal max_length + return name + " " * (max_length - len(name)) + print() + print("| {} | Mean (MiB/s) | std (MiB/s) |".format(pad("Test"))) + print("| {} | ------------ | ------------ |".format("-" * max_length)) + for v in self.results: + print("| {} | {:>12.2f} | {:>12.2f} |".format( + pad(v[0]), v[1], v[2])) + def test_bytes_large(self): + exp = self.create(_Transfer) + results = exp.test_bytes(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_bytes_small(self): + exp = self.create(_Transfer) + results = exp.test_bytes(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_byte_list_large(self): + exp = self.create(_Transfer) + results = exp.test_byte_list(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes List (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes List (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_byte_list_small(self): + exp = self.create(_Transfer) + results = exp.test_byte_list(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["Bytes List (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["Bytes List (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_list_large(self): + exp = self.create(_Transfer) + results = exp.test_list(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 List (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 List (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_list_small(self): + exp = self.create(_Transfer) + results = exp.test_list(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 List (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 List (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_array_large(self): + exp = self.create(_Transfer) + results = exp.test_array(True) + host_to_device = (1 << 20) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 20) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 Array (1MB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 Array (1MB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_array_small(self): + exp = self.create(_Transfer) + results = exp.test_array(False) + host_to_device = (1 << 10) / numpy.array(results[0], numpy.float64) + device_to_host = (1 << 10) / numpy.array(results[1], numpy.float64) + host_to_device /= 1024*1024 + device_to_host /= 1024*1024 + self.results.append(["I32 Array (1KB) H2D", host_to_device.mean(), + host_to_device.std()]) + self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), + device_to_host.std()]) + + def test_async_throughput(self): + exp = self.create(_Transfer) + results = exp.test_async() + print("Async throughput: {:>6.2f}MiB/s".format(results)) class _KernelOverhead(EnvExperiment): def build(self): diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index 96437d2a2..3313b5c14 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -12,6 +12,7 @@ from artiq.coredevice import exceptions from artiq.coredevice.comm_mgmt import CommMgmt from artiq.coredevice.comm_analyzer import (StoppedMessage, OutputMessage, InputMessage, decode_dump, get_analyzer_dump) +from artiq.compiler.targets import CortexA9Target artiq_low_latency = os.getenv("ARTIQ_LOW_LATENCY") @@ -230,17 +231,18 @@ class LoopbackGateTiming(EnvExperiment): # With the exact delay known, make sure tight gate timings work. # In the most common configuration, 24 mu == 24 ns == 3 coarse periods, # which should be plenty of slack. + # FIXME: ZC706 with NIST_QC2 needs 48ns - hw problem? delay_mu(10000) gate_start_mu = now_mu() - self.loop_in.gate_both_mu(24) + self.loop_in.gate_both_mu(48) # XXX gate_end_mu = now_mu() # gateware latency offset between gate and input lat_offset = 11*8 out_mu = gate_start_mu - loop_delay_mu + lat_offset at_mu(out_mu) - self.loop_out.pulse_mu(24) + self.loop_out.pulse_mu(48) # XXX in_mu = self.loop_in.timestamp_mu(gate_end_mu) print("timings: ", gate_start_mu, in_mu - lat_offset, gate_end_mu) @@ -460,11 +462,15 @@ class CoredeviceTest(ExperimentCase): def test_pulse_rate(self): """Minimum interval for sustained TTL output switching""" - self.execute(PulseRate) + exp = self.execute(PulseRate) rate = self.dataset_mgr.get("pulse_rate") print(rate) self.assertGreater(rate, 100*ns) - self.assertLess(rate, 480*ns) + if exp.core.target_cls == CortexA9Target: + # Crappy AXI PS/PL interface from Xilinx is slow. + self.assertLess(rate, 810*ns) + else: + self.assertLess(rate, 480*ns) def test_pulse_rate_ad9914_dds(self): """Minimum interval for sustained AD9914 DDS frequency switching""" @@ -621,11 +627,13 @@ class _DMA(EnvExperiment): self.delta = now_mu() - start @kernel - def playback_many(self, n): + def playback_many(self, n, add_delay=False): handle = self.core_dma.get_handle(self.trace_name) self.core.break_realtime() t1 = self.core.get_rtio_counter_mu() for i in range(n): + if add_delay: + delay(2*us) self.core_dma.playback_handle(handle) t2 = self.core.get_rtio_counter_mu() self.set_dataset("dma_playback_time", self.core.mu_to_seconds(t2 - t1)) @@ -718,13 +726,18 @@ class DMATest(ExperimentCase): self.device_mgr.get_desc("ad9914dds0") except KeyError: raise unittest.SkipTest("skipped on Kasli for now") + exp = self.create(_DMA) + is_zynq = exp.core.target_cls == CortexA9Target count = 20000 exp.record_many(40) - exp.playback_many(count) + exp.playback_many(count, is_zynq) dt = self.dataset_mgr.get("dma_playback_time") print("dt={}, dt/count={}".format(dt, dt/count)) - self.assertLess(dt/count, 4.5*us) + if is_zynq: + self.assertLess(dt/count, 6.2*us) + else: + self.assertLess(dt/count, 4.5*us) def test_dma_underflow(self): exp = self.create(_DMA)