forked from M-Labs/artiq
master: still save results when analyze fails. Closes #684
This commit is contained in:
parent
70343b244d
commit
432c6b99e2
@ -28,6 +28,7 @@ Release notes
|
|||||||
and ``Comm`` has been renamed ``CommKernel``.
|
and ``Comm`` has been renamed ``CommKernel``.
|
||||||
* The "collision" and "busy" RTIO errors are reported through the log instead of
|
* The "collision" and "busy" RTIO errors are reported through the log instead of
|
||||||
raising exceptions.
|
raising exceptions.
|
||||||
|
* Results are still saved when ``analyze`` raises an exception.
|
||||||
|
|
||||||
|
|
||||||
2.2
|
2.2
|
||||||
|
@ -305,14 +305,16 @@ class AnalyzeStage(TaskObject):
|
|||||||
run.status = RunStatus.analyzing
|
run.status = RunStatus.analyzing
|
||||||
try:
|
try:
|
||||||
await run.analyze()
|
await run.analyze()
|
||||||
|
except:
|
||||||
|
logger.error("got worker exception in analyze stage of RID %d."
|
||||||
|
" Results will still be saved.", run.rid)
|
||||||
|
log_worker_exception()
|
||||||
|
try:
|
||||||
await run.write_results()
|
await run.write_results()
|
||||||
except:
|
except:
|
||||||
logger.error("got worker exception in analyze stage, "
|
logger.error("failed to write results of RID %d.", run.rid)
|
||||||
"deleting RID %d", run.rid)
|
|
||||||
log_worker_exception()
|
log_worker_exception()
|
||||||
self.delete_cb(run.rid)
|
self.delete_cb(run.rid)
|
||||||
else:
|
|
||||||
self.delete_cb(run.rid)
|
|
||||||
|
|
||||||
|
|
||||||
class Pipeline:
|
class Pipeline:
|
||||||
|
@ -196,6 +196,25 @@ def setup_diagnostics(experiment_file, repository_path):
|
|||||||
artiq.coredevice.core._DiagnosticEngine.render_diagnostic = \
|
artiq.coredevice.core._DiagnosticEngine.render_diagnostic = \
|
||||||
render_diagnostic
|
render_diagnostic
|
||||||
|
|
||||||
|
def put_exception_report():
|
||||||
|
_, exc, _ = sys.exc_info()
|
||||||
|
# When we get CompileError, a more suitable diagnostic has already
|
||||||
|
# been printed.
|
||||||
|
if not isinstance(exc, CompileError):
|
||||||
|
short_exc_info = type(exc).__name__
|
||||||
|
exc_str = str(exc)
|
||||||
|
if exc_str:
|
||||||
|
short_exc_info += ": " + exc_str.splitlines()[0]
|
||||||
|
lines = ["Terminating with exception ("+short_exc_info+")\n"]
|
||||||
|
if hasattr(exc, "artiq_core_exception"):
|
||||||
|
lines.append(str(exc.artiq_core_exception))
|
||||||
|
if hasattr(exc, "parent_traceback"):
|
||||||
|
lines += exc.parent_traceback
|
||||||
|
lines += traceback.format_exception_only(type(exc), exc)
|
||||||
|
logging.error("".join(lines).rstrip(),
|
||||||
|
exc_info=not hasattr(exc, "parent_traceback"))
|
||||||
|
put_object({"action": "exception"})
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
global ipc
|
global ipc
|
||||||
@ -251,8 +270,14 @@ def main():
|
|||||||
exp_inst.run()
|
exp_inst.run()
|
||||||
put_object({"action": "completed"})
|
put_object({"action": "completed"})
|
||||||
elif action == "analyze":
|
elif action == "analyze":
|
||||||
exp_inst.analyze()
|
try:
|
||||||
put_object({"action": "completed"})
|
exp_inst.analyze()
|
||||||
|
except:
|
||||||
|
# make analyze failure non-fatal, as we may still want to
|
||||||
|
# write results afterwards
|
||||||
|
put_exception_report()
|
||||||
|
else:
|
||||||
|
put_object({"action": "completed"})
|
||||||
elif action == "write_results":
|
elif action == "write_results":
|
||||||
filename = "{:09}-{}.h5".format(rid, exp.__name__)
|
filename = "{:09}-{}.h5".format(rid, exp.__name__)
|
||||||
with h5py.File(filename, "w") as f:
|
with h5py.File(filename, "w") as f:
|
||||||
@ -267,23 +292,8 @@ def main():
|
|||||||
put_object({"action": "completed"})
|
put_object({"action": "completed"})
|
||||||
elif action == "terminate":
|
elif action == "terminate":
|
||||||
break
|
break
|
||||||
except Exception as exc:
|
except:
|
||||||
# When we get CompileError, a more suitable diagnostic has already
|
put_exception_report()
|
||||||
# been printed.
|
|
||||||
if not isinstance(exc, CompileError):
|
|
||||||
short_exc_info = type(exc).__name__
|
|
||||||
exc_str = str(exc)
|
|
||||||
if exc_str:
|
|
||||||
short_exc_info += ": " + exc_str.splitlines()[0]
|
|
||||||
lines = ["Terminating with exception ("+short_exc_info+")\n"]
|
|
||||||
if hasattr(exc, "artiq_core_exception"):
|
|
||||||
lines.append(str(exc.artiq_core_exception))
|
|
||||||
if hasattr(exc, "parent_traceback"):
|
|
||||||
lines += exc.parent_traceback
|
|
||||||
lines += traceback.format_exception_only(type(exc), exc)
|
|
||||||
logging.error("".join(lines).rstrip(),
|
|
||||||
exc_info=not hasattr(exc, "parent_traceback"))
|
|
||||||
put_object({"action": "exception"})
|
|
||||||
finally:
|
finally:
|
||||||
device_mgr.close_devices()
|
device_mgr.close_devices()
|
||||||
ipc.close()
|
ipc.close()
|
||||||
|
Loading…
Reference in New Issue
Block a user