From d8b1e59538822b3377a30ba21306073f1367f3d7 Mon Sep 17 00:00:00 2001 From: Etienne Wodey Date: Mon, 16 Nov 2020 21:10:56 +0100 Subject: [PATCH 1/3] datasets: allow passing options to HDF5 backend (e.g. compression) This breaks the internal dataset representation used by applets and when saving to disk (``dataset_db.pyon``). See ``test/test_dataset_db.py`` and ``test/test_datasets.py`` for examples. Signed-off-by: Etienne Wodey --- RELEASE_NOTES.rst | 8 +++ artiq/applets/big_number.py | 2 +- artiq/applets/image.py | 2 +- artiq/applets/plot_hist.py | 4 +- artiq/applets/plot_xy.py | 9 ++-- artiq/applets/plot_xy_hist.py | 6 +-- artiq/applets/simple.py | 3 +- artiq/browser/datasets.py | 4 +- artiq/dashboard/datasets.py | 18 +++---- artiq/language/environment.py | 13 ++++- artiq/master/databases.py | 39 +++++++++++--- artiq/master/worker_db.py | 38 +++++++++----- artiq/test/test_dataset_db.py | 99 +++++++++++++++++++++++++++++++++++ artiq/test/test_datasets.py | 43 +++++++++++++-- artiq/test/test_scheduler.py | 10 +++- 15 files changed, 248 insertions(+), 50 deletions(-) create mode 100644 artiq/test/test_dataset_db.py diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1c668fd3f..bbd2cd113 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -13,6 +13,10 @@ Highlights: - Improved documentation - Expose the DAC coarse mixer and sif_sync - Exposes upconverter calibration and enabling/disabling of upconverter LO & RF outputs. +* HDF5 options can now be passed when creating datasets with ``set_dataset``. This allows + in particular to use transparent compression filters as follows: + ``set_dataset(name, value, compression="gzip")``. + Breaking changes: * Updated Phaser-Upconverter default frequency 2.875 GHz. The new default uses the target PFD @@ -20,6 +24,9 @@ Breaking changes: * `Phaser.init()` now disables all Kasli-oscillators. This avoids full power RF output being generated for some configurations. * Phaser: fixed coarse mixer frequency configuration +* The internal dataset representation was changed to support tracking HDF5 options like e.g. + a compression method. This requires changes to code reading the dataset persistence file + (``dataset_db.pyon``) and to custom applets. ARTIQ-6 @@ -89,6 +96,7 @@ Breaking changes: * Experiment classes with underscore-prefixed names are now ignored when ``artiq_client`` determines which experiment to submit (consistent with ``artiq_run``). + ARTIQ-5 ------- diff --git a/artiq/applets/big_number.py b/artiq/applets/big_number.py index 62348c8cf..a0714b734 100755 --- a/artiq/applets/big_number.py +++ b/artiq/applets/big_number.py @@ -13,7 +13,7 @@ class NumberWidget(QtWidgets.QLCDNumber): def data_changed(self, data, mods): try: - n = float(data[self.dataset_name][1]) + n = float(data[self.dataset_name]["value"]) except (KeyError, ValueError, TypeError): n = "---" self.display(n) diff --git a/artiq/applets/image.py b/artiq/applets/image.py index b7d36c1a1..4bc6b5f86 100755 --- a/artiq/applets/image.py +++ b/artiq/applets/image.py @@ -13,7 +13,7 @@ class Image(pyqtgraph.ImageView): def data_changed(self, data, mods): try: - img = data[self.args.img][1] + img = data[self.args.img]["value"] except KeyError: return self.setImage(img) diff --git a/artiq/applets/plot_hist.py b/artiq/applets/plot_hist.py index dc46997b9..44a4d242e 100755 --- a/artiq/applets/plot_hist.py +++ b/artiq/applets/plot_hist.py @@ -13,11 +13,11 @@ class HistogramPlot(pyqtgraph.PlotWidget): def data_changed(self, data, mods, title): try: - y = data[self.args.y][1] + y = data[self.args.y]["value"] if self.args.x is None: x = None else: - x = data[self.args.x][1] + x = data[self.args.x]["value"] except KeyError: return if x is None: diff --git a/artiq/applets/plot_xy.py b/artiq/applets/plot_xy.py index da7f2197d..ae387cb75 100755 --- a/artiq/applets/plot_xy.py +++ b/artiq/applets/plot_xy.py @@ -5,6 +5,7 @@ import PyQt5 # make sure pyqtgraph imports Qt5 import pyqtgraph from artiq.applets.simple import TitleApplet +from artiq.master.databases import make_dataset as empty_dataset class XYPlot(pyqtgraph.PlotWidget): @@ -14,14 +15,14 @@ class XYPlot(pyqtgraph.PlotWidget): def data_changed(self, data, mods, title): try: - y = data[self.args.y][1] + y = data[self.args.y]["value"] except KeyError: return - x = data.get(self.args.x, (False, None))[1] + x = data.get(self.args.x, empty_dataset())["value"] if x is None: x = np.arange(len(y)) - error = data.get(self.args.error, (False, None))[1] - fit = data.get(self.args.fit, (False, None))[1] + error = data.get(self.args.error, empty_dataset())["value"] + fit = data.get(self.args.fit, empty_dataset())["value"] if not len(y) or len(y) != len(x): return diff --git a/artiq/applets/plot_xy_hist.py b/artiq/applets/plot_xy_hist.py index 7b8135561..a3a35b4ad 100755 --- a/artiq/applets/plot_xy_hist.py +++ b/artiq/applets/plot_xy_hist.py @@ -112,9 +112,9 @@ class XYHistPlot(QtWidgets.QSplitter): def data_changed(self, data, mods): try: - xs = data[self.args.xs][1] - histogram_bins = data[self.args.histogram_bins][1] - histograms_counts = data[self.args.histograms_counts][1] + xs = data[self.args.xs]["value"] + histogram_bins = data[self.args.histogram_bins]["value"] + histograms_counts = data[self.args.histograms_counts]["value"] except KeyError: return if self._can_use_partial(mods): diff --git a/artiq/applets/simple.py b/artiq/applets/simple.py index e5776310a..db6f2334d 100644 --- a/artiq/applets/simple.py +++ b/artiq/applets/simple.py @@ -10,6 +10,7 @@ from sipyco.sync_struct import Subscriber, process_mod from sipyco import pyon from sipyco.pipe_ipc import AsyncioChildComm +from artiq.master.databases import make_dataset as empty_dataset logger = logging.getLogger(__name__) @@ -251,7 +252,7 @@ class TitleApplet(SimpleApplet): def emit_data_changed(self, data, mod_buffer): if self.args.title is not None: - title_values = {k.replace(".", "/"): data.get(k, (False, None))[1] + title_values = {k.replace(".", "/"): data.get(k, empty_dataset())["value"] for k in self.dataset_title} try: title = self.args.title.format(**title_values) diff --git a/artiq/browser/datasets.py b/artiq/browser/datasets.py index b66b18216..d3d8171ac 100644 --- a/artiq/browser/datasets.py +++ b/artiq/browser/datasets.py @@ -104,8 +104,8 @@ class DatasetsDock(QtWidgets.QDockWidget): idx = self.table_model_filter.mapToSource(idx[0]) key = self.table_model.index_to_key(idx) if key is not None: - persist, value = self.table_model.backing_store[key] - asyncio.ensure_future(self._upload_dataset(key, value)) + dataset = self.table_model.backing_store[key] + asyncio.ensure_future(self._upload_dataset(key, dataset["value"])) def save_state(self): return bytes(self.table.header().saveState()) diff --git a/artiq/dashboard/datasets.py b/artiq/dashboard/datasets.py index 5e353d4c9..e0c3a7981 100644 --- a/artiq/dashboard/datasets.py +++ b/artiq/dashboard/datasets.py @@ -83,16 +83,16 @@ class StringEditor(Editor): class Model(DictSyncTreeSepModel): - def __init__(self, init): - DictSyncTreeSepModel.__init__(self, ".", - ["Dataset", "Persistent", "Value"], - init) + def __init__(self, init): + DictSyncTreeSepModel.__init__( + self, ".", ["Dataset", "Persistent", "Value"], init + ) def convert(self, k, v, column): if column == 1: - return "Y" if v[0] else "N" + return "Y" if v["persist"] else "N" elif column == 2: - return short_format(v[1]) + return short_format(v["value"]) else: raise ValueError @@ -152,8 +152,8 @@ class DatasetsDock(QtWidgets.QDockWidget): idx = self.table_model_filter.mapToSource(idx[0]) key = self.table_model.index_to_key(idx) if key is not None: - persist, value = self.table_model.backing_store[key] - t = type(value) + dataset = self.table_model.backing_store[key] + t = type(dataset["value"]) if np.issubdtype(t, np.number): dialog_cls = NumberEditor elif np.issubdtype(t, np.bool_): @@ -164,7 +164,7 @@ class DatasetsDock(QtWidgets.QDockWidget): logger.error("Cannot edit dataset %s: " "type %s is not supported", key, t) return - dialog_cls(self, self.dataset_ctl, key, value).open() + dialog_cls(self, self.dataset_ctl, key, dataset["value"]).open() def delete_clicked(self): idx = self.table.selectedIndexes() diff --git a/artiq/language/environment.py b/artiq/language/environment.py index 7992fe3af..fa64c7906 100644 --- a/artiq/language/environment.py +++ b/artiq/language/environment.py @@ -331,7 +331,8 @@ class HasEnvironment: @rpc(flags={"async"}) def set_dataset(self, key, value, - broadcast=False, persist=False, archive=True, save=None): + broadcast=False, persist=False, archive=True, save=None, + **hdf5_options): """Sets the contents and handling modes of a dataset. Datasets must be scalars (``bool``, ``int``, ``float`` or NumPy scalar) @@ -344,12 +345,20 @@ class HasEnvironment: :param archive: the data is saved into the local storage of the current run (archived as a HDF5 file). :param save: deprecated. + :param hdf5_options: additional keyword arguments are passed to + :meth:`h5py.Group.create_dataset`. For example, pass ``compression="gzip"`` + to enable transparent zlib compression of this dataset in the HDF5 archive. + See the `h5py documentation `_ + for a list of valid options. """ if save is not None: warnings.warn("set_dataset save parameter is deprecated, " "use archive instead", FutureWarning) archive = save - self.__dataset_mgr.set(key, value, broadcast, persist, archive) + + self.__dataset_mgr.set( + key, value, broadcast, persist, archive, hdf5_options + ) @rpc(flags={"async"}) def mutate_dataset(self, key, index, value): diff --git a/artiq/master/databases.py b/artiq/master/databases.py index 14cfae4cd..fcf1ad31c 100644 --- a/artiq/master/databases.py +++ b/artiq/master/databases.py @@ -35,6 +35,15 @@ class DeviceDB: return desc +def make_dataset(*, persist=False, value=None, hdf5_options=None): + "PYON-serializable representation of a dataset in the DatasetDB" + return { + "persist": persist, + "value": value, + "hdf5_options": hdf5_options or {}, + } + + class DatasetDB(TaskObject): def __init__(self, persist_file, autosave_period=30): self.persist_file = persist_file @@ -44,10 +53,23 @@ class DatasetDB(TaskObject): file_data = pyon.load_file(self.persist_file) except FileNotFoundError: file_data = dict() - self.data = Notifier({k: (True, v) for k, v in file_data.items()}) + self.data = Notifier( + { + k: make_dataset( + persist=True, + value=v["value"], + hdf5_options=v["hdf5_options"] + ) + for k, v in file_data.items() + } + ) def save(self): - data = {k: v[1] for k, v in self.data.raw_view.items() if v[0]} + data = { + k: d + for k, d in self.data.raw_view.items() + if d["persist"] + } pyon.store_file(self.persist_file, data) async def _do(self): @@ -59,20 +81,23 @@ class DatasetDB(TaskObject): self.save() def get(self, key): - return self.data.raw_view[key][1] + return self.data.raw_view[key] def update(self, mod): process_mod(self.data, mod) # convenience functions (update() can be used instead) - def set(self, key, value, persist=None): + def set(self, key, value, persist=None, **hdf5_options): if persist is None: if key in self.data.raw_view: - persist = self.data.raw_view[key][0] + persist = self.data.raw_view[key].persist else: persist = False - self.data[key] = (persist, value) + self.data[key] = make_dataset( + persist=persist, + value=value, + hdf5_options=hdf5_options, + ) def delete(self, key): del self.data[key] - # diff --git a/artiq/master/worker_db.py b/artiq/master/worker_db.py index 172846145..8a2200e05 100644 --- a/artiq/master/worker_db.py +++ b/artiq/master/worker_db.py @@ -8,9 +8,12 @@ from operator import setitem import importlib import logging +import numpy as np + from sipyco.sync_struct import Notifier from sipyco.pc_rpc import AutoTarget, Client, BestEffortClient +from artiq.master.databases import make_dataset logger = logging.getLogger(__name__) @@ -115,7 +118,8 @@ class DatasetManager: self.ddb = ddb self._broadcaster.publish = ddb.update - def set(self, key, value, broadcast=False, persist=False, archive=True): + def set(self, key, value, broadcast=False, persist=False, archive=True, + hdf5_options=None): if key in self.archive: logger.warning("Modifying dataset '%s' which is in archive, " "archive will remain untouched", @@ -125,12 +129,20 @@ class DatasetManager: broadcast = True if broadcast: - self._broadcaster[key] = persist, value + self._broadcaster[key] = make_dataset( + persist=persist, + value=value, + hdf5_options=hdf5_options, + ) elif key in self._broadcaster.raw_view: del self._broadcaster[key] if archive: - self.local[key] = value + self.local[key] = make_dataset( + persist=persist, + value=value, + hdf5_options=hdf5_options, + ) elif key in self.local: del self.local[key] @@ -138,11 +150,11 @@ class DatasetManager: target = self.local.get(key, None) if key in self._broadcaster.raw_view: if target is not None: - assert target is self._broadcaster.raw_view[key][1] - return self._broadcaster[key][1] + assert target["value"] is self._broadcaster.raw_view[key]["value"] + return self._broadcaster[key]["value"] if target is None: raise KeyError("Cannot mutate nonexistent dataset '{}'".format(key)) - return target + return target["value"] def mutate(self, key, index, value): target = self._get_mutation_target(key) @@ -158,15 +170,15 @@ class DatasetManager: def get(self, key, archive=False): if key in self.local: - return self.local[key] - - data = self.ddb.get(key) + return self.local[key]["value"] + + dataset = self.ddb.get(key) if archive: if key in self.archive: logger.warning("Dataset '%s' is already in archive, " "overwriting", key, stack_info=True) - self.archive[key] = data - return data + self.archive[key] = dataset + return dataset["value"] def write_hdf5(self, f): datasets_group = f.create_group("datasets") @@ -182,7 +194,7 @@ def _write(group, k, v): # Add context to exception message when the user writes a dataset that is # not representable in HDF5. try: - group[k] = v + group.create_dataset(k, data=v["value"], **v["hdf5_options"]) except TypeError as e: raise TypeError("Error writing dataset '{}' of type '{}': {}".format( - k, type(v), e)) + k, type(v["value"]), e)) diff --git a/artiq/test/test_dataset_db.py b/artiq/test/test_dataset_db.py new file mode 100644 index 000000000..74aff8219 --- /dev/null +++ b/artiq/test/test_dataset_db.py @@ -0,0 +1,99 @@ +"""Test internal dataset representation (persistence, applets)""" +import unittest +import tempfile + +from artiq.master.databases import DatasetDB +from sipyco import pyon + +KEY1 = "key1" +KEY2 = "key2" +KEY3 = "key3" +DATA = list(range(10)) +COMP = "gzip" + + +class TestDatasetDB(unittest.TestCase): + def setUp(self): + # empty dataset persistance file + self.persist_file = tempfile.NamedTemporaryFile(mode="w+") + print("{}", file=self.persist_file, flush=True) + + self.ddb = DatasetDB(self.persist_file.name) + + self.ddb.set(KEY1, DATA, persist=True) + self.ddb.set(KEY2, DATA, persist=True, compression=COMP) + self.ddb.set(KEY3, DATA, shuffle=True) + + self.save_ddb_to_disk() + + def save_ddb_to_disk(self): + self.ddb.save() + self.persist_file.flush() + + def load_ddb_from_disk(self): + return pyon.load_file(self.persist_file.name) + + def test_persist_format(self): + data = pyon.load_file(self.persist_file.name) + + for key in [KEY1, KEY2]: + self.assertTrue(data[key]["persist"]) + self.assertEqual(data[key]["value"], DATA) + + self.assertEqual(data[KEY2]["hdf5_options"]["compression"], COMP) + self.assertEqual(data[KEY1]["hdf5_options"], dict()) + + def test_only_persist_marked_datasets(self): + data = self.load_ddb_from_disk() + + with self.assertRaises(KeyError): + data[KEY3] + + def test_memory_format(self): + ds = self.ddb.get(KEY2) + self.assertTrue(ds["persist"]) + self.assertEqual(ds["value"], DATA) + self.assertEqual(ds["hdf5_options"]["compression"], COMP) + + ds = self.ddb.get(KEY3) + self.assertFalse(ds["persist"]) + self.assertEqual(ds["value"], DATA) + self.assertTrue(ds["hdf5_options"]["shuffle"]) + + def test_delete(self): + self.ddb.delete(KEY1) + self.save_ddb_to_disk() + + data = self.load_ddb_from_disk() + + with self.assertRaises(KeyError): + data[KEY1] + + self.assertTrue(data[KEY2]["persist"]) + + def test_update(self): + self.assertFalse(self.ddb.get(KEY3)["persist"]) + + mod = { + "action": "setitem", + "path": [KEY3], + "key": "persist", + "value": True, + } + + self.ddb.update(mod) + self.assertTrue(self.ddb.get(KEY3)["persist"]) + + def test_update_hdf5_options(self): + with self.assertRaises(KeyError): + self.ddb.get(KEY1)["hdf5_options"]["shuffle"] + + mod = { + "action": "setitem", + "path": [KEY1, "hdf5_options"], + "key": "shuffle", + "value": False, + } + + self.ddb.update(mod) + self.assertFalse(self.ddb.get(KEY1)["hdf5_options"]["shuffle"]) diff --git a/artiq/test/test_datasets.py b/artiq/test/test_datasets.py index 871568a2a..0d86a4b7c 100644 --- a/artiq/test/test_datasets.py +++ b/artiq/test/test_datasets.py @@ -3,6 +3,9 @@ import copy import unittest +import h5py +import numpy as np + from sipyco.sync_struct import process_mod from artiq.experiment import EnvExperiment @@ -14,7 +17,7 @@ class MockDatasetDB: self.data = dict() def get(self, key): - return self.data[key][1] + return self.data[key]["value"] def update(self, mod): # Copy mod before applying to avoid sharing references to objects @@ -82,9 +85,9 @@ class ExperimentDatasetCase(unittest.TestCase): def test_append_broadcast(self): self.exp.set(KEY, [], broadcast=True) self.exp.append(KEY, 0) - self.assertEqual(self.dataset_db.data[KEY][1], [0]) + self.assertEqual(self.dataset_db.data[KEY]["value"], [0]) self.exp.append(KEY, 1) - self.assertEqual(self.dataset_db.data[KEY][1], [0, 1]) + self.assertEqual(self.dataset_db.data[KEY]["value"], [0, 1]) def test_append_array(self): for broadcast in (True, False): @@ -103,3 +106,37 @@ class ExperimentDatasetCase(unittest.TestCase): with self.assertRaises(KeyError): self.exp.append(KEY, 0) + def test_write_hdf5_options(self): + data = np.random.randint(0, 1024, 1024) + self.exp.set(KEY, data, + compression="gzip", compression_opts=6, + shuffle=True, fletcher32=True) + + with h5py.File("test.h5", "a", "core", backing_store=False) as f: + self.dataset_mgr.write_hdf5(f) + + self.assertTrue(np.array_equal(f["datasets"][KEY][()], data)) + self.assertEqual(f["datasets"][KEY].compression, "gzip") + self.assertEqual(f["datasets"][KEY].compression_opts, 6) + self.assertTrue(f["datasets"][KEY].shuffle) + self.assertTrue(f["datasets"][KEY].fletcher32) + + def test_write_hdf5_no_options(self): + data = np.random.randint(0, 1024, 1024) + self.exp.set(KEY, data) + + with h5py.File("test.h5", "a", "core", backing_store=False) as f: + self.dataset_mgr.write_hdf5(f) + self.assertTrue(np.array_equal(f["datasets"][KEY][()], data)) + self.assertIsNone(f["datasets"][KEY].compression) + + def test_write_hdf5_invalid_type(self): + class CustomType: + def __init__(self, x): + self.x = x + + self.exp.set(KEY, CustomType(42)) + + with h5py.File("test.h5", "w", "core", backing_store=False) as f: + with self.assertRaisesRegex(TypeError, "CustomType"): + self.dataset_mgr.write_hdf5(f) diff --git a/artiq/test/test_scheduler.py b/artiq/test/test_scheduler.py index ad4f243bd..5a8cdb6bc 100644 --- a/artiq/test/test_scheduler.py +++ b/artiq/test/test_scheduler.py @@ -7,6 +7,7 @@ from time import time, sleep from artiq.experiment import * from artiq.master.scheduler import Scheduler +from artiq.master.databases import make_dataset class EmptyExperiment(EnvExperiment): @@ -291,8 +292,13 @@ class SchedulerCase(unittest.TestCase): nonlocal termination_ok self.assertEqual( mod, - {"action": "setitem", "key": "termination_ok", - "value": (False, True), "path": []}) + { + "action": "setitem", + "key": "termination_ok", + "value": make_dataset(value=True), + "path": [] + } + ) termination_ok = True handlers = { "update_dataset": check_termination From 12ef907f34ddcbd9ff0473aa4b34e78b657c659e Mon Sep 17 00:00:00 2001 From: Etienne Wodey Date: Thu, 17 Jun 2021 16:30:38 +0200 Subject: [PATCH 2/3] master/databases: fix AttributeError in DatasetDB.set() Add corresponding unit test. Signed-off-by: Etienne Wodey --- artiq/master/databases.py | 2 +- artiq/test/test_dataset_db.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/artiq/master/databases.py b/artiq/master/databases.py index fcf1ad31c..310b5caec 100644 --- a/artiq/master/databases.py +++ b/artiq/master/databases.py @@ -90,7 +90,7 @@ class DatasetDB(TaskObject): def set(self, key, value, persist=None, **hdf5_options): if persist is None: if key in self.data.raw_view: - persist = self.data.raw_view[key].persist + persist = self.data.raw_view[key]["persist"] else: persist = False self.data[key] = make_dataset( diff --git a/artiq/test/test_dataset_db.py b/artiq/test/test_dataset_db.py index 74aff8219..3fa4b1f8a 100644 --- a/artiq/test/test_dataset_db.py +++ b/artiq/test/test_dataset_db.py @@ -97,3 +97,15 @@ class TestDatasetDB(unittest.TestCase): self.ddb.update(mod) self.assertFalse(self.ddb.get(KEY1)["hdf5_options"]["shuffle"]) + + def test_reset_copies_persist(self): + self.assertTrue(self.ddb.get(KEY1)["persist"]) + self.ddb.set(KEY1, DATA) + self.assertTrue(self.ddb.get(KEY1)["persist"]) + + self.assertFalse(self.ddb.get(KEY3)["persist"]) + self.ddb.set(KEY3, DATA) + self.assertFalse(self.ddb.get(KEY3)["persist"]) + + self.ddb.set(KEY3, DATA, persist=True) + self.assertTrue(self.ddb.get(KEY3)["persist"]) From 8bedf278f0457b570dedc6aa16dd0533c1baefb0 Mon Sep 17 00:00:00 2001 From: Etienne Wodey Date: Thu, 17 Jun 2021 16:43:05 +0200 Subject: [PATCH 3/3] set_dataset: pass HDF5 options as a dict, not as loose kwargs Signed-off-by: Etienne Wodey --- RELEASE_NOTES.rst | 2 +- artiq/language/environment.py | 6 +++--- artiq/master/databases.py | 2 +- artiq/test/test_dataset_db.py | 4 ++-- artiq/test/test_datasets.py | 13 ++++++++++--- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index bbd2cd113..c0cd659a5 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -15,7 +15,7 @@ Highlights: - Exposes upconverter calibration and enabling/disabling of upconverter LO & RF outputs. * HDF5 options can now be passed when creating datasets with ``set_dataset``. This allows in particular to use transparent compression filters as follows: - ``set_dataset(name, value, compression="gzip")``. + ``set_dataset(name, value, hdf5_options={"compression": "gzip"})``. Breaking changes: diff --git a/artiq/language/environment.py b/artiq/language/environment.py index fa64c7906..9647325cb 100644 --- a/artiq/language/environment.py +++ b/artiq/language/environment.py @@ -332,7 +332,7 @@ class HasEnvironment: @rpc(flags={"async"}) def set_dataset(self, key, value, broadcast=False, persist=False, archive=True, save=None, - **hdf5_options): + hdf5_options=None): """Sets the contents and handling modes of a dataset. Datasets must be scalars (``bool``, ``int``, ``float`` or NumPy scalar) @@ -345,8 +345,8 @@ class HasEnvironment: :param archive: the data is saved into the local storage of the current run (archived as a HDF5 file). :param save: deprecated. - :param hdf5_options: additional keyword arguments are passed to - :meth:`h5py.Group.create_dataset`. For example, pass ``compression="gzip"`` + :param hdf5_options: dict of keyword arguments to pass to + :meth:`h5py.Group.create_dataset`. For example, pass ``{"compression": "gzip"}`` to enable transparent zlib compression of this dataset in the HDF5 archive. See the `h5py documentation `_ for a list of valid options. diff --git a/artiq/master/databases.py b/artiq/master/databases.py index 310b5caec..8ef71c6a2 100644 --- a/artiq/master/databases.py +++ b/artiq/master/databases.py @@ -87,7 +87,7 @@ class DatasetDB(TaskObject): process_mod(self.data, mod) # convenience functions (update() can be used instead) - def set(self, key, value, persist=None, **hdf5_options): + def set(self, key, value, persist=None, hdf5_options=None): if persist is None: if key in self.data.raw_view: persist = self.data.raw_view[key]["persist"] diff --git a/artiq/test/test_dataset_db.py b/artiq/test/test_dataset_db.py index 3fa4b1f8a..3d087a806 100644 --- a/artiq/test/test_dataset_db.py +++ b/artiq/test/test_dataset_db.py @@ -21,8 +21,8 @@ class TestDatasetDB(unittest.TestCase): self.ddb = DatasetDB(self.persist_file.name) self.ddb.set(KEY1, DATA, persist=True) - self.ddb.set(KEY2, DATA, persist=True, compression=COMP) - self.ddb.set(KEY3, DATA, shuffle=True) + self.ddb.set(KEY2, DATA, persist=True, hdf5_options=dict(compression=COMP)) + self.ddb.set(KEY3, DATA, hdf5_options=dict(shuffle=True)) self.save_ddb_to_disk() diff --git a/artiq/test/test_datasets.py b/artiq/test/test_datasets.py index 0d86a4b7c..3fa6d6bb7 100644 --- a/artiq/test/test_datasets.py +++ b/artiq/test/test_datasets.py @@ -108,9 +108,16 @@ class ExperimentDatasetCase(unittest.TestCase): def test_write_hdf5_options(self): data = np.random.randint(0, 1024, 1024) - self.exp.set(KEY, data, - compression="gzip", compression_opts=6, - shuffle=True, fletcher32=True) + self.exp.set( + KEY, + data, + hdf5_options=dict( + compression="gzip", + compression_opts=6, + shuffle=True, + fletcher32=True + ), + ) with h5py.File("test.h5", "a", "core", backing_store=False) as f: self.dataset_mgr.write_hdf5(f)