2018-03-09 05:50:48 +08:00
|
|
|
"""Worker process implementation.
|
|
|
|
|
|
|
|
This module contains the worker process main() function and the glue code
|
|
|
|
necessary to connect the global artefacts used from experiment code (scheduler,
|
|
|
|
device database, etc.) to their actual implementation in the parent master
|
|
|
|
process via IPC.
|
|
|
|
"""
|
|
|
|
|
2014-10-05 16:25:31 +08:00
|
|
|
import sys
|
2015-02-22 04:42:26 +08:00
|
|
|
import time
|
2015-08-07 15:51:56 +08:00
|
|
|
import os
|
2015-10-20 18:11:50 +08:00
|
|
|
import logging
|
2016-01-16 09:58:45 +08:00
|
|
|
import traceback
|
2015-12-06 17:27:15 +08:00
|
|
|
from collections import OrderedDict
|
2014-10-05 16:25:31 +08:00
|
|
|
|
2016-04-08 10:21:26 +08:00
|
|
|
import h5py
|
|
|
|
|
2016-01-16 09:28:26 +08:00
|
|
|
import artiq
|
2016-01-26 21:59:36 +08:00
|
|
|
from artiq.protocols import pipe_ipc, pyon
|
2016-04-04 22:02:42 +08:00
|
|
|
from artiq.protocols.packed_exceptions import raise_packed_exc
|
2016-01-27 04:59:37 +08:00
|
|
|
from artiq.tools import multiline_log_config, file_import
|
2016-07-09 22:53:28 +08:00
|
|
|
from artiq.master.worker_db import DeviceManager, DatasetManager, DummyDevice
|
2016-04-16 19:31:07 +08:00
|
|
|
from artiq.language.environment import (is_experiment, TraceArgumentManager,
|
|
|
|
ProcessArgumentManager)
|
2015-10-06 13:50:00 +08:00
|
|
|
from artiq.language.core import set_watchdog_factory, TerminationRequested
|
2016-06-27 14:37:29 +08:00
|
|
|
from artiq.language.types import TBool
|
2016-08-06 12:01:49 +08:00
|
|
|
from artiq.compiler import import_cache
|
2016-01-16 09:28:26 +08:00
|
|
|
from artiq.coredevice.core import CompileError, host_only, _render_diagnostic
|
2015-11-09 12:32:29 +08:00
|
|
|
from artiq import __version__ as artiq_version
|
2014-12-08 19:22:02 +08:00
|
|
|
|
|
|
|
|
2016-01-26 21:59:36 +08:00
|
|
|
ipc = None
|
|
|
|
|
2016-04-05 15:38:49 +08:00
|
|
|
|
2014-12-31 17:41:22 +08:00
|
|
|
def get_object():
|
2016-01-26 21:59:36 +08:00
|
|
|
line = ipc.readline().decode()
|
2014-12-31 17:41:22 +08:00
|
|
|
return pyon.decode(line)
|
|
|
|
|
|
|
|
|
2014-10-05 16:25:31 +08:00
|
|
|
def put_object(obj):
|
2014-10-25 16:31:34 +08:00
|
|
|
ds = pyon.encode(obj)
|
2016-01-26 21:59:36 +08:00
|
|
|
ipc.write((ds + "\n").encode())
|
2014-10-05 16:25:31 +08:00
|
|
|
|
|
|
|
|
2016-04-04 22:02:42 +08:00
|
|
|
def make_parent_action(action):
|
2015-10-28 17:35:57 +08:00
|
|
|
def parent_action(*args, **kwargs):
|
|
|
|
request = {"action": action, "args": args, "kwargs": kwargs}
|
2015-01-07 17:50:05 +08:00
|
|
|
put_object(request)
|
|
|
|
reply = get_object()
|
2015-05-17 16:11:00 +08:00
|
|
|
if "action" in reply:
|
|
|
|
if reply["action"] == "terminate":
|
|
|
|
sys.exit()
|
|
|
|
else:
|
|
|
|
raise ValueError
|
2015-01-07 17:50:05 +08:00
|
|
|
if reply["status"] == "ok":
|
|
|
|
return reply["data"]
|
|
|
|
else:
|
2016-04-04 22:02:42 +08:00
|
|
|
raise_packed_exc(reply["exception"])
|
2015-01-07 17:50:05 +08:00
|
|
|
return parent_action
|
|
|
|
|
|
|
|
|
2015-10-12 17:18:23 +08:00
|
|
|
class ParentDeviceDB:
|
2015-10-28 17:35:57 +08:00
|
|
|
get_device_db = make_parent_action("get_device_db")
|
2016-04-04 22:02:42 +08:00
|
|
|
get = make_parent_action("get_device")
|
2015-01-12 18:51:23 +08:00
|
|
|
|
|
|
|
|
2015-10-12 17:18:23 +08:00
|
|
|
class ParentDatasetDB:
|
2016-04-04 22:02:42 +08:00
|
|
|
get = make_parent_action("get_dataset")
|
2015-10-28 17:35:57 +08:00
|
|
|
update = make_parent_action("update_dataset")
|
2015-01-13 19:12:19 +08:00
|
|
|
|
|
|
|
|
2015-03-11 23:43:07 +08:00
|
|
|
class Watchdog:
|
2015-10-28 17:35:57 +08:00
|
|
|
_create = make_parent_action("create_watchdog")
|
|
|
|
_delete = make_parent_action("delete_watchdog")
|
2015-03-11 23:43:07 +08:00
|
|
|
|
|
|
|
def __init__(self, t):
|
|
|
|
self.t = t
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
self.wid = Watchdog._create(self.t)
|
|
|
|
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
|
|
Watchdog._delete(self.wid)
|
|
|
|
|
|
|
|
|
2015-04-28 23:23:59 +08:00
|
|
|
set_watchdog_factory(Watchdog)
|
|
|
|
|
|
|
|
|
2015-02-20 03:09:11 +08:00
|
|
|
class Scheduler:
|
2015-10-30 13:41:18 +08:00
|
|
|
def set_run_info(self, rid, pipeline_name, expid, priority):
|
|
|
|
self.rid = rid
|
2015-05-17 16:11:00 +08:00
|
|
|
self.pipeline_name = pipeline_name
|
|
|
|
self.expid = expid
|
2015-05-24 20:37:47 +08:00
|
|
|
self.priority = priority
|
2015-02-20 03:09:11 +08:00
|
|
|
|
2016-10-18 13:49:43 +08:00
|
|
|
pause_noexc = staticmethod(make_parent_action("pause"))
|
|
|
|
@host_only
|
|
|
|
def pause(self):
|
|
|
|
if self.pause_noexc():
|
|
|
|
raise TerminationRequested
|
2016-07-09 22:58:19 +08:00
|
|
|
|
2016-10-18 13:49:43 +08:00
|
|
|
_check_pause = staticmethod(make_parent_action("scheduler_check_pause"))
|
2016-06-27 14:37:29 +08:00
|
|
|
def check_pause(self, rid=None) -> TBool:
|
|
|
|
if rid is None:
|
|
|
|
rid = self.rid
|
|
|
|
return self._check_pause(rid)
|
|
|
|
|
2016-10-18 13:49:43 +08:00
|
|
|
_submit = staticmethod(make_parent_action("scheduler_submit"))
|
|
|
|
def submit(self, pipeline_name=None, expid=None, priority=None, due_date=None, flush=False):
|
|
|
|
if pipeline_name is None:
|
|
|
|
pipeline_name = self.pipeline_name
|
|
|
|
if expid is None:
|
|
|
|
expid = self.expid
|
|
|
|
if priority is None:
|
|
|
|
priority = self.priority
|
|
|
|
return self._submit(pipeline_name, expid, priority, due_date, flush)
|
|
|
|
|
|
|
|
delete = staticmethod(make_parent_action("scheduler_delete"))
|
|
|
|
request_termination = staticmethod(
|
|
|
|
make_parent_action("scheduler_request_termination"))
|
|
|
|
get_status = staticmethod(make_parent_action("scheduler_get_status"))
|
|
|
|
|
2015-02-20 03:09:11 +08:00
|
|
|
|
2016-09-05 00:53:44 +08:00
|
|
|
class CCB:
|
|
|
|
issue = staticmethod(make_parent_action("ccb_issue"))
|
|
|
|
|
|
|
|
|
2015-07-15 17:08:12 +08:00
|
|
|
def get_exp(file, class_name):
|
2015-11-24 22:34:33 +08:00
|
|
|
module = file_import(file, prefix="artiq_worker_")
|
2015-07-15 17:08:12 +08:00
|
|
|
if class_name is None:
|
2015-03-08 22:43:04 +08:00
|
|
|
exps = [v for k, v in module.__dict__.items()
|
|
|
|
if is_experiment(v)]
|
|
|
|
if len(exps) != 1:
|
|
|
|
raise ValueError("Found {} experiments in module"
|
|
|
|
.format(len(exps)))
|
|
|
|
return exps[0]
|
2015-01-13 19:12:19 +08:00
|
|
|
else:
|
2015-07-15 17:08:12 +08:00
|
|
|
return getattr(module, class_name)
|
2015-01-13 19:12:19 +08:00
|
|
|
|
|
|
|
|
2015-10-28 17:35:57 +08:00
|
|
|
register_experiment = make_parent_action("register_experiment")
|
2015-07-15 16:54:44 +08:00
|
|
|
|
|
|
|
|
2015-10-12 17:18:23 +08:00
|
|
|
class ExamineDeviceMgr:
|
2015-10-28 17:35:57 +08:00
|
|
|
get_device_db = make_parent_action("get_device_db")
|
2015-10-04 18:29:39 +08:00
|
|
|
|
2017-01-07 23:20:17 +08:00
|
|
|
@staticmethod
|
2015-10-28 17:35:57 +08:00
|
|
|
def get(name):
|
2016-07-09 22:53:28 +08:00
|
|
|
return DummyDevice()
|
2015-07-15 16:54:44 +08:00
|
|
|
|
|
|
|
|
2017-01-07 23:20:17 +08:00
|
|
|
class ExamineDatasetMgr:
|
|
|
|
@staticmethod
|
|
|
|
def get(key, archive=False):
|
|
|
|
return ParentDatasetDB.get(key)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def update(self, mod):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2015-10-12 17:18:23 +08:00
|
|
|
def examine(device_mgr, dataset_mgr, file):
|
2018-05-02 12:48:50 +08:00
|
|
|
previous_keys = set(sys.modules.keys())
|
2018-05-02 12:32:35 +08:00
|
|
|
try:
|
|
|
|
module = file_import(file)
|
2018-05-02 12:50:37 +08:00
|
|
|
for class_name, exp_class in module.__dict__.items():
|
|
|
|
if class_name[0] == "_":
|
|
|
|
continue
|
|
|
|
if is_experiment(exp_class):
|
|
|
|
if exp_class.__doc__ is None:
|
|
|
|
name = class_name
|
|
|
|
else:
|
|
|
|
name = exp_class.__doc__.splitlines()[0].strip()
|
|
|
|
if name[-1] == ".":
|
|
|
|
name = name[:-1]
|
|
|
|
argument_mgr = TraceArgumentManager()
|
2019-02-03 06:47:57 +08:00
|
|
|
scheduler_defaults = {}
|
|
|
|
cls = exp_class((device_mgr, dataset_mgr, argument_mgr, scheduler_defaults))
|
2018-05-02 12:50:37 +08:00
|
|
|
arginfo = OrderedDict(
|
|
|
|
(k, (proc.describe(), group, tooltip))
|
|
|
|
for k, (proc, group, tooltip) in argument_mgr.requested_args.items())
|
2019-02-03 06:47:57 +08:00
|
|
|
register_experiment(class_name, name, arginfo, scheduler_defaults)
|
2018-05-02 12:32:35 +08:00
|
|
|
finally:
|
2018-05-02 12:48:50 +08:00
|
|
|
new_keys = set(sys.modules.keys())
|
|
|
|
for key in new_keys - previous_keys:
|
|
|
|
del sys.modules[key]
|
2015-07-15 16:54:44 +08:00
|
|
|
|
|
|
|
|
2016-01-16 09:28:26 +08:00
|
|
|
def setup_diagnostics(experiment_file, repository_path):
|
|
|
|
def render_diagnostic(self, diagnostic):
|
2016-01-16 09:58:45 +08:00
|
|
|
message = "While compiling {}\n".format(experiment_file) + \
|
2016-01-16 09:28:26 +08:00
|
|
|
_render_diagnostic(diagnostic, colored=False)
|
|
|
|
if repository_path is not None:
|
|
|
|
message = message.replace(repository_path, "<repository>")
|
2016-01-16 09:58:45 +08:00
|
|
|
|
2016-01-17 01:47:35 +08:00
|
|
|
if diagnostic.level == "warning":
|
2016-01-16 09:58:45 +08:00
|
|
|
logging.warn(message)
|
|
|
|
else:
|
|
|
|
logging.error(message)
|
2016-01-16 09:28:26 +08:00
|
|
|
|
|
|
|
# This is kind of gross, but 1) we do not have any explicit connection
|
|
|
|
# between the worker and a coredevice.core.Core instance at all,
|
|
|
|
# and 2) the diagnostic engine really ought to be per-Core, since
|
|
|
|
# that's what uses it and the repository path is per-Core.
|
|
|
|
# So I don't know how to implement this properly for now.
|
|
|
|
#
|
|
|
|
# This hack is as good or bad as any other solution that involves
|
|
|
|
# putting inherently local objects (the diagnostic engine) into
|
|
|
|
# global slots, and there isn't any point in making it prettier by
|
|
|
|
# wrapping it in layers of indirection.
|
2016-04-05 15:38:49 +08:00
|
|
|
artiq.coredevice.core._DiagnosticEngine.render_diagnostic = \
|
|
|
|
render_diagnostic
|
2016-01-16 09:28:26 +08:00
|
|
|
|
2017-03-27 17:53:07 +08:00
|
|
|
def put_exception_report():
|
|
|
|
_, exc, _ = sys.exc_info()
|
|
|
|
# When we get CompileError, a more suitable diagnostic has already
|
|
|
|
# been printed.
|
|
|
|
if not isinstance(exc, CompileError):
|
|
|
|
short_exc_info = type(exc).__name__
|
|
|
|
exc_str = str(exc)
|
|
|
|
if exc_str:
|
|
|
|
short_exc_info += ": " + exc_str.splitlines()[0]
|
|
|
|
lines = ["Terminating with exception ("+short_exc_info+")\n"]
|
|
|
|
if hasattr(exc, "artiq_core_exception"):
|
|
|
|
lines.append(str(exc.artiq_core_exception))
|
|
|
|
if hasattr(exc, "parent_traceback"):
|
|
|
|
lines += exc.parent_traceback
|
|
|
|
lines += traceback.format_exception_only(type(exc), exc)
|
|
|
|
logging.error("".join(lines).rstrip(),
|
|
|
|
exc_info=not hasattr(exc, "parent_traceback"))
|
|
|
|
put_object({"action": "exception"})
|
|
|
|
|
2016-01-17 01:47:35 +08:00
|
|
|
|
2015-03-10 06:34:09 +08:00
|
|
|
def main():
|
2016-01-26 21:59:36 +08:00
|
|
|
global ipc
|
|
|
|
|
2016-01-27 04:30:28 +08:00
|
|
|
multiline_log_config(level=int(sys.argv[2]))
|
2016-01-26 21:59:36 +08:00
|
|
|
ipc = pipe_ipc.ChildComm(sys.argv[1])
|
2015-03-10 06:34:09 +08:00
|
|
|
|
|
|
|
start_time = None
|
2017-04-26 18:33:10 +08:00
|
|
|
run_time = None
|
2015-03-10 06:34:09 +08:00
|
|
|
rid = None
|
2015-05-17 16:11:00 +08:00
|
|
|
expid = None
|
2015-03-10 06:34:09 +08:00
|
|
|
exp = None
|
|
|
|
exp_inst = None
|
2015-11-24 22:34:33 +08:00
|
|
|
repository_path = None
|
2015-01-13 19:12:19 +08:00
|
|
|
|
2016-06-12 00:11:11 +08:00
|
|
|
device_mgr = DeviceManager(ParentDeviceDB,
|
2016-09-05 00:53:44 +08:00
|
|
|
virtual_devices={"scheduler": Scheduler(),
|
|
|
|
"ccb": CCB()})
|
2015-10-12 17:18:23 +08:00
|
|
|
dataset_mgr = DatasetManager(ParentDatasetDB)
|
2015-01-13 19:12:19 +08:00
|
|
|
|
2016-08-06 12:01:49 +08:00
|
|
|
import_cache.install_hook()
|
|
|
|
|
2015-04-05 17:49:41 +08:00
|
|
|
try:
|
2015-03-10 06:34:09 +08:00
|
|
|
while True:
|
|
|
|
obj = get_object()
|
|
|
|
action = obj["action"]
|
2015-07-09 19:18:12 +08:00
|
|
|
if action == "build":
|
2017-08-03 17:27:16 +08:00
|
|
|
start_time = time.time()
|
2015-03-10 06:34:09 +08:00
|
|
|
rid = obj["rid"]
|
2015-05-17 16:11:00 +08:00
|
|
|
expid = obj["expid"]
|
2015-08-07 15:51:56 +08:00
|
|
|
if obj["wd"] is not None:
|
|
|
|
# Using repository
|
2015-11-24 22:34:33 +08:00
|
|
|
experiment_file = os.path.join(obj["wd"], expid["file"])
|
|
|
|
repository_path = obj["wd"]
|
2015-08-07 15:51:56 +08:00
|
|
|
else:
|
2015-11-24 22:34:33 +08:00
|
|
|
experiment_file = expid["file"]
|
2016-01-16 09:28:26 +08:00
|
|
|
repository_path = None
|
|
|
|
setup_diagnostics(experiment_file, repository_path)
|
2015-11-24 22:34:33 +08:00
|
|
|
exp = get_exp(experiment_file, expid["class_name"])
|
2015-10-12 17:18:23 +08:00
|
|
|
device_mgr.virtual_devices["scheduler"].set_run_info(
|
2015-10-30 13:41:18 +08:00
|
|
|
rid, obj["pipeline_name"], expid, obj["priority"])
|
2017-08-03 17:27:16 +08:00
|
|
|
start_local_time = time.localtime(start_time)
|
2016-04-08 10:21:26 +08:00
|
|
|
dirname = os.path.join("results",
|
2017-08-03 17:27:16 +08:00
|
|
|
time.strftime("%Y-%m-%d", start_local_time),
|
|
|
|
time.strftime("%H", start_local_time))
|
2016-04-08 10:21:26 +08:00
|
|
|
os.makedirs(dirname, exist_ok=True)
|
|
|
|
os.chdir(dirname)
|
2016-04-16 19:31:07 +08:00
|
|
|
argument_mgr = ProcessArgumentManager(expid["arguments"])
|
2019-02-03 06:47:57 +08:00
|
|
|
exp_inst = exp((device_mgr, dataset_mgr, argument_mgr, {}))
|
2015-03-10 06:34:09 +08:00
|
|
|
put_object({"action": "completed"})
|
2015-07-09 19:18:12 +08:00
|
|
|
elif action == "prepare":
|
|
|
|
exp_inst.prepare()
|
|
|
|
put_object({"action": "completed"})
|
2015-03-10 06:34:09 +08:00
|
|
|
elif action == "run":
|
2017-08-03 17:27:16 +08:00
|
|
|
run_time = time.time()
|
2015-03-10 06:34:09 +08:00
|
|
|
exp_inst.run()
|
|
|
|
put_object({"action": "completed"})
|
|
|
|
elif action == "analyze":
|
2017-03-27 17:53:07 +08:00
|
|
|
try:
|
|
|
|
exp_inst.analyze()
|
|
|
|
except:
|
|
|
|
# make analyze failure non-fatal, as we may still want to
|
|
|
|
# write results afterwards
|
|
|
|
put_exception_report()
|
|
|
|
else:
|
|
|
|
put_object({"action": "completed"})
|
2015-03-12 02:06:46 +08:00
|
|
|
elif action == "write_results":
|
2016-04-08 10:21:26 +08:00
|
|
|
filename = "{:09}-{}.h5".format(rid, exp.__name__)
|
|
|
|
with h5py.File(filename, "w") as f:
|
2016-10-18 17:08:36 +08:00
|
|
|
dataset_mgr.write_hdf5(f)
|
2016-04-05 15:38:21 +08:00
|
|
|
f["artiq_version"] = artiq_version
|
2016-04-07 23:53:48 +08:00
|
|
|
f["rid"] = rid
|
2017-08-03 17:41:57 +08:00
|
|
|
f["start_time"] = start_time
|
|
|
|
f["run_time"] = run_time
|
2016-04-07 23:53:48 +08:00
|
|
|
f["expid"] = pyon.encode(expid)
|
2015-03-10 06:34:09 +08:00
|
|
|
put_object({"action": "completed"})
|
2015-07-15 16:54:44 +08:00
|
|
|
elif action == "examine":
|
2017-01-07 23:20:17 +08:00
|
|
|
examine(ExamineDeviceMgr, ExamineDatasetMgr, obj["file"])
|
2015-07-15 16:54:44 +08:00
|
|
|
put_object({"action": "completed"})
|
2015-03-10 06:34:09 +08:00
|
|
|
elif action == "terminate":
|
|
|
|
break
|
2017-03-27 17:53:07 +08:00
|
|
|
except:
|
|
|
|
put_exception_report()
|
2015-04-05 17:49:41 +08:00
|
|
|
finally:
|
2015-10-12 17:18:23 +08:00
|
|
|
device_mgr.close_devices()
|
2016-01-26 21:59:36 +08:00
|
|
|
ipc.close()
|
2014-10-05 16:25:31 +08:00
|
|
|
|
2015-10-20 18:11:50 +08:00
|
|
|
|
2014-10-05 16:25:31 +08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|