diff --git a/artiq/test/coredevice/test_performance.py b/artiq/test/coredevice/test_performance.py index 2786dc551..f029549a2 100644 --- a/artiq/test/coredevice/test_performance.py +++ b/artiq/test/coredevice/test_performance.py @@ -1,44 +1,45 @@ import os import time import unittest +from typing import Literal + import numpy -from numpy import int32, float64 +from numpy import int32, float64, ndarray from artiq.experiment import * from artiq.test.hardware_testbench import ExperimentCase +from artiq.coredevice.core import Core + + +bool_list_large = [True] * (1 << 20) +bool_list_small = [True] * (1 << 10) # large: 1MB payload # small: 1KB payload -bytes_large = b"\x00" * (1 << 20) -bytes_small = b"\x00" * (1 << 10) - list_large = [123] * (1 << 18) list_small = [123] * (1 << 8) array_large = numpy.array(list_large, int32) array_small = numpy.array(list_small, int32) -byte_list_large = [True] * (1 << 20) -byte_list_small = [True] * (1 << 10) - received_bytes = 0 time_start = 0 time_end = 0 + +@nac3 class _Transfer(EnvExperiment): + core: KernelInvariant[Core] + count: KernelInvariant[int32] + h2d: Kernel[list[float]] + d2h: Kernel[list[float]] + def build(self): self.setattr_device("core") self.count = 10 self.h2d = [0.0] * self.count self.d2h = [0.0] * self.count - @rpc - def get_bytes(self, large: bool) -> bytes: - if large: - return bytes_large - else: - return bytes_small - @rpc def get_list(self, large: bool) -> list[int32]: if large: @@ -47,14 +48,14 @@ class _Transfer(EnvExperiment): return list_small @rpc - def get_byte_list(self, large: bool) -> list[bool]: + def get_bool_list(self, large: bool) -> list[bool]: if large: - return byte_list_large + return bool_list_large else: - return byte_list_small + return bool_list_small @rpc - def get_array(self, large: bool) -> numpy.ndarray: # NAC3TODO: [int32] + def get_array(self, large: bool) -> ndarray[int32, Literal[1]]: if large: return array_large else: @@ -65,11 +66,19 @@ class _Transfer(EnvExperiment): return string_list @rpc - def sink(self, data): + def sink_bool_list(self, data: list[bool]): pass - @rpc(flags={"async"}) - def sink_async(self, data): + @rpc + def sink_list(self, data: list[int32]): + pass + + @rpc + def sink_array(self, data: ndarray[int32, Literal[1]]): + pass + + @rpc # NAC3TODO (flags={"async"}) + def sink_async(self, data: list[int32]): global received_bytes, time_start, time_end if received_bytes == 0: time_start = time.time() @@ -81,73 +90,48 @@ class _Transfer(EnvExperiment): def get_async_throughput(self) -> float: return 128.0 / (time_end - time_start) + @kernel - def test_bytes(self, large): - def inner(): + def test_bool_list(self, large: bool): + for i in range(self.count): t0 = self.core.get_rtio_counter_mu() - data = self.get_bytes(large) + data = self.get_bool_list(large) t1 = self.core.get_rtio_counter_mu() - self.sink(data) + self.sink_bool_list(data) t2 = self.core.get_rtio_counter_mu() self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.d2h[i] = self.core.mu_to_seconds(t2 - t1) - for i in range(self.count): - inner() - return (self.h2d, self.d2h) - @kernel - def test_byte_list(self, large): - def inner(): - t0 = self.core.get_rtio_counter_mu() - data = self.get_byte_list(large) - t1 = self.core.get_rtio_counter_mu() - self.sink(data) - t2 = self.core.get_rtio_counter_mu() - self.h2d[i] = self.core.mu_to_seconds(t1 - t0) - self.d2h[i] = self.core.mu_to_seconds(t2 - t1) - + def test_list(self, large: bool): for i in range(self.count): - inner() - return (self.h2d, self.d2h) - - @kernel - def test_list(self, large): - def inner(): t0 = self.core.get_rtio_counter_mu() data = self.get_list(large) t1 = self.core.get_rtio_counter_mu() - self.sink(data) + self.sink_list(data) t2 = self.core.get_rtio_counter_mu() self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.d2h[i] = self.core.mu_to_seconds(t2 - t1) - for i in range(self.count): - inner() - return (self.h2d, self.d2h) - @kernel - def test_array(self, large): - def inner(): + def test_array(self, large: bool): + for i in range(self.count): t0 = self.core.get_rtio_counter_mu() data = self.get_array(large) t1 = self.core.get_rtio_counter_mu() - self.sink(data) + self.sink_array(data) t2 = self.core.get_rtio_counter_mu() self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.d2h[i] = self.core.mu_to_seconds(t2 - t1) - for i in range(self.count): - inner() - return (self.h2d, self.d2h) - @kernel - def test_async(self): - data = self.get_bytes(True) + def test_async(self) -> float: + data = self.get_list(True) for _ in range(128): self.sink_async(data) return self.get_async_throughput() + class TransferTest(ExperimentCase): @classmethod def setUpClass(self): @@ -169,59 +153,35 @@ class TransferTest(ExperimentCase): print("| {} | {:>12.2f} | {:>12.2f} |".format( pad(v[0]), v[1], v[2])) - def test_bytes_large(self): + def test_bool_list_large(self): exp = self.create(_Transfer) - results = exp.test_bytes(True) - host_to_device = (1 << 20) / numpy.array(results[0], float64) - device_to_host = (1 << 20) / numpy.array(results[1], float64) + exp.test_bool_list(True) + host_to_device = (1 << 20) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 20) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 - self.results.append(["Bytes (1MB) H2D", host_to_device.mean(), + self.results.append(["Bool List (1MB) H2D", host_to_device.mean(), host_to_device.std()]) - self.results.append(["Bytes (1MB) D2H", device_to_host.mean(), + self.results.append(["Bool List (1MB) D2H", device_to_host.mean(), device_to_host.std()]) - def test_bytes_small(self): + def test_bool_list_small(self): exp = self.create(_Transfer) - results = exp.test_bytes(False) - host_to_device = (1 << 10) / numpy.array(results[0], float64) - device_to_host = (1 << 10) / numpy.array(results[1], float64) + exp.test_bool_list(False) + host_to_device = (1 << 10) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 10) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 - self.results.append(["Bytes (1KB) H2D", host_to_device.mean(), + self.results.append(["Bool List (1KB) H2D", host_to_device.mean(), host_to_device.std()]) - self.results.append(["Bytes (1KB) D2H", device_to_host.mean(), - device_to_host.std()]) - - def test_byte_list_large(self): - exp = self.create(_Transfer) - results = exp.test_byte_list(True) - host_to_device = (1 << 20) / numpy.array(results[0], float64) - device_to_host = (1 << 20) / numpy.array(results[1], float64) - host_to_device /= 1024*1024 - device_to_host /= 1024*1024 - self.results.append(["Bytes List (1MB) H2D", host_to_device.mean(), - host_to_device.std()]) - self.results.append(["Bytes List (1MB) D2H", device_to_host.mean(), - device_to_host.std()]) - - def test_byte_list_small(self): - exp = self.create(_Transfer) - results = exp.test_byte_list(False) - host_to_device = (1 << 10) / numpy.array(results[0], float64) - device_to_host = (1 << 10) / numpy.array(results[1], float64) - host_to_device /= 1024*1024 - device_to_host /= 1024*1024 - self.results.append(["Bytes List (1KB) H2D", host_to_device.mean(), - host_to_device.std()]) - self.results.append(["Bytes List (1KB) D2H", device_to_host.mean(), + self.results.append(["Bool List (1KB) D2H", device_to_host.mean(), device_to_host.std()]) def test_list_large(self): exp = self.create(_Transfer) - results = exp.test_list(True) - host_to_device = (1 << 20) / numpy.array(results[0], float64) - device_to_host = (1 << 20) / numpy.array(results[1], float64) + exp.test_list(True) + host_to_device = (1 << 20) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 20) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 self.results.append(["I32 List (1MB) H2D", host_to_device.mean(), @@ -231,9 +191,9 @@ class TransferTest(ExperimentCase): def test_list_small(self): exp = self.create(_Transfer) - results = exp.test_list(False) - host_to_device = (1 << 10) / numpy.array(results[0], float64) - device_to_host = (1 << 10) / numpy.array(results[1], float64) + exp.test_list(False) + host_to_device = (1 << 10) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 10) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 self.results.append(["I32 List (1KB) H2D", host_to_device.mean(), @@ -243,9 +203,9 @@ class TransferTest(ExperimentCase): def test_array_large(self): exp = self.create(_Transfer) - results = exp.test_array(True) - host_to_device = (1 << 20) / numpy.array(results[0], float64) - device_to_host = (1 << 20) / numpy.array(results[1], float64) + exp.test_array(True) + host_to_device = (1 << 20) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 20) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 self.results.append(["I32 Array (1MB) H2D", host_to_device.mean(), @@ -255,9 +215,9 @@ class TransferTest(ExperimentCase): def test_array_small(self): exp = self.create(_Transfer) - results = exp.test_array(False) - host_to_device = (1 << 10) / numpy.array(results[0], float64) - device_to_host = (1 << 10) / numpy.array(results[1], float64) + exp.test_array(False) + host_to_device = (1 << 10) / numpy.array(exp.h2d, float64) + device_to_host = (1 << 10) / numpy.array(exp.d2h, float64) host_to_device /= 1024*1024 device_to_host /= 1024*1024 self.results.append(["I32 Array (1KB) H2D", host_to_device.mean(), @@ -265,12 +225,17 @@ class TransferTest(ExperimentCase): self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), device_to_host.std()]) + @unittest.skip("NAC3TODO https://git.m-labs.hk/M-Labs/nac3/issues/182") def test_async_throughput(self): exp = self.create(_Transfer) results = exp.test_async() print("Async throughput: {:>6.2f}MiB/s".format(results)) + +@nac3 class _KernelOverhead(EnvExperiment): + core: KernelInvariant[Core] + def build(self): self.setattr_device("core")