forked from M-Labs/artiq
1
0
Fork 0

master: still save results when analyze fails. Closes #684

This commit is contained in:
Sebastien Bourdeauducq 2017-03-27 17:53:07 +08:00
parent 70343b244d
commit 432c6b99e2
3 changed files with 37 additions and 24 deletions

View File

@ -28,6 +28,7 @@ Release notes
and ``Comm`` has been renamed ``CommKernel``. and ``Comm`` has been renamed ``CommKernel``.
* The "collision" and "busy" RTIO errors are reported through the log instead of * The "collision" and "busy" RTIO errors are reported through the log instead of
raising exceptions. raising exceptions.
* Results are still saved when ``analyze`` raises an exception.
2.2 2.2

View File

@ -305,14 +305,16 @@ class AnalyzeStage(TaskObject):
run.status = RunStatus.analyzing run.status = RunStatus.analyzing
try: try:
await run.analyze() await run.analyze()
except:
logger.error("got worker exception in analyze stage of RID %d."
" Results will still be saved.", run.rid)
log_worker_exception()
try:
await run.write_results() await run.write_results()
except: except:
logger.error("got worker exception in analyze stage, " logger.error("failed to write results of RID %d.", run.rid)
"deleting RID %d", run.rid)
log_worker_exception() log_worker_exception()
self.delete_cb(run.rid) self.delete_cb(run.rid)
else:
self.delete_cb(run.rid)
class Pipeline: class Pipeline:

View File

@ -196,6 +196,25 @@ def setup_diagnostics(experiment_file, repository_path):
artiq.coredevice.core._DiagnosticEngine.render_diagnostic = \ artiq.coredevice.core._DiagnosticEngine.render_diagnostic = \
render_diagnostic render_diagnostic
def put_exception_report():
_, exc, _ = sys.exc_info()
# When we get CompileError, a more suitable diagnostic has already
# been printed.
if not isinstance(exc, CompileError):
short_exc_info = type(exc).__name__
exc_str = str(exc)
if exc_str:
short_exc_info += ": " + exc_str.splitlines()[0]
lines = ["Terminating with exception ("+short_exc_info+")\n"]
if hasattr(exc, "artiq_core_exception"):
lines.append(str(exc.artiq_core_exception))
if hasattr(exc, "parent_traceback"):
lines += exc.parent_traceback
lines += traceback.format_exception_only(type(exc), exc)
logging.error("".join(lines).rstrip(),
exc_info=not hasattr(exc, "parent_traceback"))
put_object({"action": "exception"})
def main(): def main():
global ipc global ipc
@ -251,8 +270,14 @@ def main():
exp_inst.run() exp_inst.run()
put_object({"action": "completed"}) put_object({"action": "completed"})
elif action == "analyze": elif action == "analyze":
exp_inst.analyze() try:
put_object({"action": "completed"}) exp_inst.analyze()
except:
# make analyze failure non-fatal, as we may still want to
# write results afterwards
put_exception_report()
else:
put_object({"action": "completed"})
elif action == "write_results": elif action == "write_results":
filename = "{:09}-{}.h5".format(rid, exp.__name__) filename = "{:09}-{}.h5".format(rid, exp.__name__)
with h5py.File(filename, "w") as f: with h5py.File(filename, "w") as f:
@ -267,23 +292,8 @@ def main():
put_object({"action": "completed"}) put_object({"action": "completed"})
elif action == "terminate": elif action == "terminate":
break break
except Exception as exc: except:
# When we get CompileError, a more suitable diagnostic has already put_exception_report()
# been printed.
if not isinstance(exc, CompileError):
short_exc_info = type(exc).__name__
exc_str = str(exc)
if exc_str:
short_exc_info += ": " + exc_str.splitlines()[0]
lines = ["Terminating with exception ("+short_exc_info+")\n"]
if hasattr(exc, "artiq_core_exception"):
lines.append(str(exc.artiq_core_exception))
if hasattr(exc, "parent_traceback"):
lines += exc.parent_traceback
lines += traceback.format_exception_only(type(exc), exc)
logging.error("".join(lines).rstrip(),
exc_info=not hasattr(exc, "parent_traceback"))
put_object({"action": "exception"})
finally: finally:
device_mgr.close_devices() device_mgr.close_devices()
ipc.close() ipc.close()