master: archive input datasets. Closes #587

This commit is contained in:
Sebastien Bourdeauducq 2016-10-18 17:08:36 +08:00
parent ed2624545f
commit 69d96b0158
6 changed files with 45 additions and 11 deletions

View File

@ -13,6 +13,9 @@ Release notes
* Dynamic __getattr__'s returning RPC target methods are not supported anymore. * Dynamic __getattr__'s returning RPC target methods are not supported anymore.
Controller driver classes must define all their methods intended for RPC as Controller driver classes must define all their methods intended for RPC as
members. members.
* Datasets requested by experiments are by default archived into their HDF5
output. If this behavior is undesirable, turn it off by passing
``archive=False`` to ``get_dataset``.
2.0 2.0

View File

@ -187,8 +187,16 @@ class FilesDock(QtWidgets.QDockWidget):
except: except:
logger.warning("unable to read metadata from %s", logger.warning("unable to read metadata from %s",
info.filePath(), exc_info=True) info.filePath(), exc_info=True)
rd = dict()
if "archive" in f:
rd = {k: (True, v.value) for k, v in f["archive"].items()}
if "datasets" in f: if "datasets" in f:
rd = {k: (True, v.value) for k, v in f["datasets"].items()} for k, v in f["datasets"].items():
if k in rd:
logger.warning("dataset '%s' is both in archive and "
"outputs", k)
rd[k] = (True, v.value)
if rd:
self.datasets.init(rd) self.datasets.init(rd)
self.dataset_changed.emit(info.filePath()) self.dataset_changed.emit(info.filePath())

View File

@ -58,7 +58,7 @@ class FloppingF(EnvExperiment):
# Use get_dataset so that analyze can be run stand-alone. # Use get_dataset so that analyze can be run stand-alone.
brightness = self.get_dataset("flopping_f_brightness") brightness = self.get_dataset("flopping_f_brightness")
try: try:
frequency = self.get_dataset("flopping_f_frequency") frequency = self.get_dataset("flopping_f_frequency", archive=False)
except KeyError: except KeyError:
# Since flopping_f_frequency is not saved, it is missing if # Since flopping_f_frequency is not saved, it is missing if
# analyze() is run on HDF5 data. But assuming that the arguments # analyze() is run on HDF5 data. But assuming that the arguments
@ -68,7 +68,8 @@ class FloppingF(EnvExperiment):
self.set_dataset("flopping_f_frequency", frequency, self.set_dataset("flopping_f_frequency", frequency,
broadcast=True, save=False) broadcast=True, save=False)
popt, pcov = curve_fit(model, frequency, brightness, popt, pcov = curve_fit(model, frequency, brightness,
p0=[self.get_dataset("flopping_freq", 1500.0)]) p0=[self.get_dataset("flopping_freq", 1500.0,
archive=False)])
perr = np.sqrt(np.diag(pcov)) perr = np.sqrt(np.diag(pcov))
if perr < 0.1: if perr < 0.1:
F0 = float(popt) F0 = float(popt)

View File

@ -303,7 +303,7 @@ class HasEnvironment:
as ``slice(*sub_tuple)`` (multi-dimensional slicing).""" as ``slice(*sub_tuple)`` (multi-dimensional slicing)."""
self.__dataset_mgr.mutate(key, index, value) self.__dataset_mgr.mutate(key, index, value)
def get_dataset(self, key, default=NoDefault): def get_dataset(self, key, default=NoDefault, archive=True):
"""Returns the contents of a dataset. """Returns the contents of a dataset.
The local storage is searched first, followed by the master storage The local storage is searched first, followed by the master storage
@ -312,19 +312,25 @@ class HasEnvironment:
If the dataset does not exist, returns the default value. If no default If the dataset does not exist, returns the default value. If no default
is provided, raises ``KeyError``. is provided, raises ``KeyError``.
By default, datasets obtained by this method are archived into the output
HDF5 file of the experiment. If an archived dataset is requested more
than one time (and therefore its value has potentially changed) or is
modified, a warning is emitted. Archival can be turned off by setting
the ``archive`` argument to ``False``.
""" """
try: try:
return self.__dataset_mgr.get(key) return self.__dataset_mgr.get(key, archive)
except KeyError: except KeyError:
if default is NoDefault: if default is NoDefault:
raise raise
else: else:
return default return default
def setattr_dataset(self, key, default=NoDefault): def setattr_dataset(self, key, default=NoDefault, archive=True):
"""Sets the contents of a dataset as attribute. The names of the """Sets the contents of a dataset as attribute. The names of the
dataset and of the attribute are the same.""" dataset and of the attribute are the same."""
setattr(self, key, self.get_dataset(key, default)) setattr(self, key, self.get_dataset(key, default, archive))
class Experiment: class Experiment:

View File

@ -181,11 +181,17 @@ class DatasetManager:
def __init__(self, ddb): def __init__(self, ddb):
self.broadcast = Notifier(dict()) self.broadcast = Notifier(dict())
self.local = dict() self.local = dict()
self.archive = dict()
self.ddb = ddb self.ddb = ddb
self.broadcast.publish = ddb.update self.broadcast.publish = ddb.update
def set(self, key, value, broadcast=False, persist=False, save=True): def set(self, key, value, broadcast=False, persist=False, save=True):
if key in self.archive:
logger.warning("Modifying dataset '%s' which is in archive, "
"archive will remain untouched",
key, stack_info=True)
if persist: if persist:
broadcast = True broadcast = True
if broadcast: if broadcast:
@ -211,12 +217,22 @@ class DatasetManager:
index = slice(*index) index = slice(*index)
setitem(target, index, value) setitem(target, index, value)
def get(self, key): def get(self, key, archive):
if key in self.local: if key in self.local:
return self.local[key] return self.local[key]
else: else:
return self.ddb.get(key) data = self.ddb.get(key)
if archive:
if key in self.archive:
logger.warning("Dataset '%s' is already in archive, "
"overwriting", key, stack_info=True)
self.archive[key] = data
return data
def write_hdf5(self, f): def write_hdf5(self, f):
datasets_group = f.create_group("datasets")
for k, v in self.local.items(): for k, v in self.local.items():
f[k] = v datasets_group[k] = v
archive_group = f.create_group("archive")
for k, v in self.archive.items():
archive_group[k] = v

View File

@ -245,7 +245,7 @@ def main():
elif action == "write_results": elif action == "write_results":
filename = "{:09}-{}.h5".format(rid, exp.__name__) filename = "{:09}-{}.h5".format(rid, exp.__name__)
with h5py.File(filename, "w") as f: with h5py.File(filename, "w") as f:
dataset_mgr.write_hdf5(f.create_group("datasets")) dataset_mgr.write_hdf5(f)
f["artiq_version"] = artiq_version f["artiq_version"] = artiq_version
f["rid"] = rid f["rid"] = rid
f["start_time"] = int(time.mktime(start_time)) f["start_time"] = int(time.mktime(start_time))