1
0
forked from M-Labs/artiq

test/performance: port to NAC3

This commit is contained in:
Sébastien Bourdeauducq 2024-08-30 13:21:27 +08:00
parent a7b0450593
commit 016691cee3

View File

@ -1,44 +1,45 @@
import os import os
import time import time
import unittest import unittest
from typing import Literal
import numpy import numpy
from numpy import int32, float64 from numpy import int32, float64, ndarray
from artiq.experiment import * from artiq.experiment import *
from artiq.test.hardware_testbench import ExperimentCase from artiq.test.hardware_testbench import ExperimentCase
from artiq.coredevice.core import Core
bool_list_large = [True] * (1 << 20)
bool_list_small = [True] * (1 << 10)
# large: 1MB payload # large: 1MB payload
# small: 1KB payload # small: 1KB payload
bytes_large = b"\x00" * (1 << 20)
bytes_small = b"\x00" * (1 << 10)
list_large = [123] * (1 << 18) list_large = [123] * (1 << 18)
list_small = [123] * (1 << 8) list_small = [123] * (1 << 8)
array_large = numpy.array(list_large, int32) array_large = numpy.array(list_large, int32)
array_small = numpy.array(list_small, int32) array_small = numpy.array(list_small, int32)
byte_list_large = [True] * (1 << 20)
byte_list_small = [True] * (1 << 10)
received_bytes = 0 received_bytes = 0
time_start = 0 time_start = 0
time_end = 0 time_end = 0
@nac3
class _Transfer(EnvExperiment): class _Transfer(EnvExperiment):
core: KernelInvariant[Core]
count: KernelInvariant[int32]
h2d: Kernel[list[float]]
d2h: Kernel[list[float]]
def build(self): def build(self):
self.setattr_device("core") self.setattr_device("core")
self.count = 10 self.count = 10
self.h2d = [0.0] * self.count self.h2d = [0.0] * self.count
self.d2h = [0.0] * self.count self.d2h = [0.0] * self.count
@rpc
def get_bytes(self, large: bool) -> bytes:
if large:
return bytes_large
else:
return bytes_small
@rpc @rpc
def get_list(self, large: bool) -> list[int32]: def get_list(self, large: bool) -> list[int32]:
if large: if large:
@ -47,14 +48,14 @@ class _Transfer(EnvExperiment):
return list_small return list_small
@rpc @rpc
def get_byte_list(self, large: bool) -> list[bool]: def get_bool_list(self, large: bool) -> list[bool]:
if large: if large:
return byte_list_large return bool_list_large
else: else:
return byte_list_small return bool_list_small
@rpc @rpc
def get_array(self, large: bool) -> numpy.ndarray: # NAC3TODO: [int32] def get_array(self, large: bool) -> ndarray[int32, Literal[1]]:
if large: if large:
return array_large return array_large
else: else:
@ -65,11 +66,19 @@ class _Transfer(EnvExperiment):
return string_list return string_list
@rpc @rpc
def sink(self, data): def sink_bool_list(self, data: list[bool]):
pass pass
@rpc(flags={"async"}) @rpc
def sink_async(self, data): def sink_list(self, data: list[int32]):
pass
@rpc
def sink_array(self, data: ndarray[int32, Literal[1]]):
pass
@rpc # NAC3TODO (flags={"async"})
def sink_async(self, data: list[int32]):
global received_bytes, time_start, time_end global received_bytes, time_start, time_end
if received_bytes == 0: if received_bytes == 0:
time_start = time.time() time_start = time.time()
@ -81,73 +90,48 @@ class _Transfer(EnvExperiment):
def get_async_throughput(self) -> float: def get_async_throughput(self) -> float:
return 128.0 / (time_end - time_start) return 128.0 / (time_end - time_start)
@kernel @kernel
def test_bytes(self, large): def test_bool_list(self, large: bool):
def inner(): for i in range(self.count):
t0 = self.core.get_rtio_counter_mu() t0 = self.core.get_rtio_counter_mu()
data = self.get_bytes(large) data = self.get_bool_list(large)
t1 = self.core.get_rtio_counter_mu() t1 = self.core.get_rtio_counter_mu()
self.sink(data) self.sink_bool_list(data)
t2 = self.core.get_rtio_counter_mu() t2 = self.core.get_rtio_counter_mu()
self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.h2d[i] = self.core.mu_to_seconds(t1 - t0)
self.d2h[i] = self.core.mu_to_seconds(t2 - t1) self.d2h[i] = self.core.mu_to_seconds(t2 - t1)
for i in range(self.count):
inner()
return (self.h2d, self.d2h)
@kernel @kernel
def test_byte_list(self, large): def test_list(self, large: bool):
def inner():
t0 = self.core.get_rtio_counter_mu()
data = self.get_byte_list(large)
t1 = self.core.get_rtio_counter_mu()
self.sink(data)
t2 = self.core.get_rtio_counter_mu()
self.h2d[i] = self.core.mu_to_seconds(t1 - t0)
self.d2h[i] = self.core.mu_to_seconds(t2 - t1)
for i in range(self.count): for i in range(self.count):
inner()
return (self.h2d, self.d2h)
@kernel
def test_list(self, large):
def inner():
t0 = self.core.get_rtio_counter_mu() t0 = self.core.get_rtio_counter_mu()
data = self.get_list(large) data = self.get_list(large)
t1 = self.core.get_rtio_counter_mu() t1 = self.core.get_rtio_counter_mu()
self.sink(data) self.sink_list(data)
t2 = self.core.get_rtio_counter_mu() t2 = self.core.get_rtio_counter_mu()
self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.h2d[i] = self.core.mu_to_seconds(t1 - t0)
self.d2h[i] = self.core.mu_to_seconds(t2 - t1) self.d2h[i] = self.core.mu_to_seconds(t2 - t1)
for i in range(self.count):
inner()
return (self.h2d, self.d2h)
@kernel @kernel
def test_array(self, large): def test_array(self, large: bool):
def inner(): for i in range(self.count):
t0 = self.core.get_rtio_counter_mu() t0 = self.core.get_rtio_counter_mu()
data = self.get_array(large) data = self.get_array(large)
t1 = self.core.get_rtio_counter_mu() t1 = self.core.get_rtio_counter_mu()
self.sink(data) self.sink_array(data)
t2 = self.core.get_rtio_counter_mu() t2 = self.core.get_rtio_counter_mu()
self.h2d[i] = self.core.mu_to_seconds(t1 - t0) self.h2d[i] = self.core.mu_to_seconds(t1 - t0)
self.d2h[i] = self.core.mu_to_seconds(t2 - t1) self.d2h[i] = self.core.mu_to_seconds(t2 - t1)
for i in range(self.count):
inner()
return (self.h2d, self.d2h)
@kernel @kernel
def test_async(self): def test_async(self) -> float:
data = self.get_bytes(True) data = self.get_list(True)
for _ in range(128): for _ in range(128):
self.sink_async(data) self.sink_async(data)
return self.get_async_throughput() return self.get_async_throughput()
class TransferTest(ExperimentCase): class TransferTest(ExperimentCase):
@classmethod @classmethod
def setUpClass(self): def setUpClass(self):
@ -169,59 +153,35 @@ class TransferTest(ExperimentCase):
print("| {} | {:>12.2f} | {:>12.2f} |".format( print("| {} | {:>12.2f} | {:>12.2f} |".format(
pad(v[0]), v[1], v[2])) pad(v[0]), v[1], v[2]))
def test_bytes_large(self): def test_bool_list_large(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_bytes(True) exp.test_bool_list(True)
host_to_device = (1 << 20) / numpy.array(results[0], float64) host_to_device = (1 << 20) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 20) / numpy.array(results[1], float64) device_to_host = (1 << 20) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["Bytes (1MB) H2D", host_to_device.mean(), self.results.append(["Bool List (1MB) H2D", host_to_device.mean(),
host_to_device.std()]) host_to_device.std()])
self.results.append(["Bytes (1MB) D2H", device_to_host.mean(), self.results.append(["Bool List (1MB) D2H", device_to_host.mean(),
device_to_host.std()]) device_to_host.std()])
def test_bytes_small(self): def test_bool_list_small(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_bytes(False) exp.test_bool_list(False)
host_to_device = (1 << 10) / numpy.array(results[0], float64) host_to_device = (1 << 10) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 10) / numpy.array(results[1], float64) device_to_host = (1 << 10) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["Bytes (1KB) H2D", host_to_device.mean(), self.results.append(["Bool List (1KB) H2D", host_to_device.mean(),
host_to_device.std()]) host_to_device.std()])
self.results.append(["Bytes (1KB) D2H", device_to_host.mean(), self.results.append(["Bool List (1KB) D2H", device_to_host.mean(),
device_to_host.std()])
def test_byte_list_large(self):
exp = self.create(_Transfer)
results = exp.test_byte_list(True)
host_to_device = (1 << 20) / numpy.array(results[0], float64)
device_to_host = (1 << 20) / numpy.array(results[1], float64)
host_to_device /= 1024*1024
device_to_host /= 1024*1024
self.results.append(["Bytes List (1MB) H2D", host_to_device.mean(),
host_to_device.std()])
self.results.append(["Bytes List (1MB) D2H", device_to_host.mean(),
device_to_host.std()])
def test_byte_list_small(self):
exp = self.create(_Transfer)
results = exp.test_byte_list(False)
host_to_device = (1 << 10) / numpy.array(results[0], float64)
device_to_host = (1 << 10) / numpy.array(results[1], float64)
host_to_device /= 1024*1024
device_to_host /= 1024*1024
self.results.append(["Bytes List (1KB) H2D", host_to_device.mean(),
host_to_device.std()])
self.results.append(["Bytes List (1KB) D2H", device_to_host.mean(),
device_to_host.std()]) device_to_host.std()])
def test_list_large(self): def test_list_large(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_list(True) exp.test_list(True)
host_to_device = (1 << 20) / numpy.array(results[0], float64) host_to_device = (1 << 20) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 20) / numpy.array(results[1], float64) device_to_host = (1 << 20) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["I32 List (1MB) H2D", host_to_device.mean(), self.results.append(["I32 List (1MB) H2D", host_to_device.mean(),
@ -231,9 +191,9 @@ class TransferTest(ExperimentCase):
def test_list_small(self): def test_list_small(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_list(False) exp.test_list(False)
host_to_device = (1 << 10) / numpy.array(results[0], float64) host_to_device = (1 << 10) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 10) / numpy.array(results[1], float64) device_to_host = (1 << 10) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["I32 List (1KB) H2D", host_to_device.mean(), self.results.append(["I32 List (1KB) H2D", host_to_device.mean(),
@ -243,9 +203,9 @@ class TransferTest(ExperimentCase):
def test_array_large(self): def test_array_large(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_array(True) exp.test_array(True)
host_to_device = (1 << 20) / numpy.array(results[0], float64) host_to_device = (1 << 20) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 20) / numpy.array(results[1], float64) device_to_host = (1 << 20) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["I32 Array (1MB) H2D", host_to_device.mean(), self.results.append(["I32 Array (1MB) H2D", host_to_device.mean(),
@ -255,9 +215,9 @@ class TransferTest(ExperimentCase):
def test_array_small(self): def test_array_small(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_array(False) exp.test_array(False)
host_to_device = (1 << 10) / numpy.array(results[0], float64) host_to_device = (1 << 10) / numpy.array(exp.h2d, float64)
device_to_host = (1 << 10) / numpy.array(results[1], float64) device_to_host = (1 << 10) / numpy.array(exp.d2h, float64)
host_to_device /= 1024*1024 host_to_device /= 1024*1024
device_to_host /= 1024*1024 device_to_host /= 1024*1024
self.results.append(["I32 Array (1KB) H2D", host_to_device.mean(), self.results.append(["I32 Array (1KB) H2D", host_to_device.mean(),
@ -265,12 +225,17 @@ class TransferTest(ExperimentCase):
self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(), self.results.append(["I32 Array (1KB) D2H", device_to_host.mean(),
device_to_host.std()]) device_to_host.std()])
@unittest.skip("NAC3TODO https://git.m-labs.hk/M-Labs/nac3/issues/182")
def test_async_throughput(self): def test_async_throughput(self):
exp = self.create(_Transfer) exp = self.create(_Transfer)
results = exp.test_async() results = exp.test_async()
print("Async throughput: {:>6.2f}MiB/s".format(results)) print("Async throughput: {:>6.2f}MiB/s".format(results))
@nac3
class _KernelOverhead(EnvExperiment): class _KernelOverhead(EnvExperiment):
core: KernelInvariant[Core]
def build(self): def build(self):
self.setattr_device("core") self.setattr_device("core")