From cc6b808bf81affdf4977e020f2ff1c53763c87e6 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 23 Jan 2016 21:23:02 -0500 Subject: [PATCH] master: finer control of worker exception reporting. Closes #233 --- artiq/master/experiments.py | 11 ++++++++--- artiq/master/scheduler.py | 8 ++++---- artiq/master/worker.py | 16 ++++++++++++++++ artiq/master/worker_impl.py | 18 ++++++++++-------- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/artiq/master/experiments.py b/artiq/master/experiments.py index e1fe67ee8..851d5ac66 100644 --- a/artiq/master/experiments.py +++ b/artiq/master/experiments.py @@ -6,7 +6,8 @@ import logging from functools import partial from artiq.protocols.sync_struct import Notifier -from artiq.master.worker import Worker +from artiq.master.worker import (Worker, WorkerInternalException, + log_worker_exception) from artiq.tools import get_windows_drives, exc_to_warning @@ -21,6 +22,9 @@ async def _get_repository_entries(entry_dict, }) try: description = await worker.examine(os.path.join(root, filename)) + except: + log_worker_exception() + raise finally: await worker.close() for class_name, class_desc in description.items(): @@ -55,8 +59,9 @@ async def _scan_experiments(root, get_device_db, log, subdir=""): try: await _get_repository_entries( entry_dict, root, filename, get_device_db, log) - except: - logger.warning("Skipping file '%s'", filename, exc_info=True) + except Exception as exc: + logger.warning("Skipping file '%s'", filename, + exc_info=not isinstance(exc, WorkerInternalException)) if de.is_dir(): subentries = await _scan_experiments( root, get_device_db, log, diff --git a/artiq/master/scheduler.py b/artiq/master/scheduler.py index 9d14dc2f4..2e375a344 100644 --- a/artiq/master/scheduler.py +++ b/artiq/master/scheduler.py @@ -3,7 +3,7 @@ import logging from enum import Enum from time import time -from artiq.master.worker import Worker +from artiq.master.worker import Worker, log_worker_exception from artiq.tools import asyncio_wait_or_cancel, TaskObject, Condition from artiq.protocols.sync_struct import Notifier @@ -231,7 +231,7 @@ class PrepareStage(TaskObject): except: logger.error("got worker exception in prepare stage, " "deleting RID %d", run.rid) - logger.error("worker exception details", exc_info=True) + log_worker_exception() self.delete_cb(run.rid) else: run.status = RunStatus.prepare_done @@ -281,7 +281,7 @@ class RunStage(TaskObject): except: logger.error("got worker exception in run stage, " "deleting RID %d", run.rid) - logger.error("worker exception details", exc_info=True) + log_worker_exception() self.delete_cb(run.rid) else: if completed: @@ -319,7 +319,7 @@ class AnalyzeStage(TaskObject): except: logger.error("got worker exception in analyze stage, " "deleting RID %d", run.rid) - logger.error("worker exception details", exc_info=True) + log_worker_exception() self.delete_cb(run.rid) else: self.delete_cb(run.rid) diff --git a/artiq/master/worker.py b/artiq/master/worker.py index db8a2ca64..b5877df0f 100644 --- a/artiq/master/worker.py +++ b/artiq/master/worker.py @@ -25,6 +25,20 @@ class WorkerError(Exception): pass +class WorkerInternalException(Exception): + """Exception raised inside the worker, information has been printed + through logging.""" + pass + + +def log_worker_exception(): + exc, _, _ = sys.exc_info() + if exc is WorkerInternalException: + logger.debug("worker exception details", exc_info=True) + else: + logger.error("worker exception details", exc_info=True) + + class Worker: def __init__(self, handlers=dict(), send_timeout=0.5): self.handlers = handlers @@ -167,6 +181,8 @@ class Worker: return True elif action == "pause": return False + elif action == "exception": + raise WorkerInternalException elif action == "create_watchdog": func = self.create_watchdog elif action == "delete_watchdog": diff --git a/artiq/master/worker_impl.py b/artiq/master/worker_impl.py index a7b1323d1..d8bf5cb82 100644 --- a/artiq/master/worker_impl.py +++ b/artiq/master/worker_impl.py @@ -264,15 +264,17 @@ def main(): put_object({"action": "completed"}) elif action == "terminate": break - except CompileError: - pass except Exception as exc: - lines = ["Terminating with exception\n"] - lines += traceback.format_exception_only(type(exc), exc) - if hasattr(exc, "parent_traceback"): - lines += exc.parent_traceback - logging.error("".join(lines).rstrip(), - exc_info=not hasattr(exc, "parent_traceback")) + # When we get CompileError, a more suitable diagnostic has already + # been printed. + if not isinstance(exc, CompileError): + lines = ["Terminating with exception\n"] + lines += traceback.format_exception_only(type(exc), exc) + if hasattr(exc, "parent_traceback"): + lines += exc.parent_traceback + logging.error("".join(lines).rstrip(), + exc_info=not hasattr(exc, "parent_traceback")) + put_object({"action": "exception"}) finally: device_mgr.close_devices()