forked from M-Labs/artiq
1
0
Fork 0

dashboard: reconnect to core moninj

* handle disconnects like core device address changes and do a
  disconnect/connect iteration
* after connection failure wait 10 seconds and try again
* this addresses the slight regression from release-2
  to release-3 where the moninj protocol was made stateful
  (#838 and #1125)
* it would be much better to fix smoltcp/runtime to no loose the
  connection under pressure (#1125)
* the crashes reported in #838 look more like a race condition
* master disconnects still require dashboard restarts

Signed-off-by: Robert Jördens <rj@quartiq.de>
This commit is contained in:
Robert Jördens 2019-02-06 19:08:27 +01:00 committed by Sébastien Bourdeauducq
parent 7994c294af
commit 2de1eaa521
1 changed files with 11 additions and 5 deletions

View File

@ -243,7 +243,7 @@ def setup_from_ddb(ddb):
class _DeviceManager: class _DeviceManager:
def __init__(self): def __init__(self):
self.core_addr = None self.core_addr = None
self.new_core_addr = asyncio.Event() self.reconnect_core = asyncio.Event()
self.core_connection = None self.core_connection = None
self.core_connector_task = asyncio.ensure_future(self.core_connector()) self.core_connector_task = asyncio.ensure_future(self.core_connector())
@ -268,7 +268,7 @@ class _DeviceManager:
if core_addr != self.core_addr: if core_addr != self.core_addr:
self.core_addr = core_addr self.core_addr = core_addr
self.new_core_addr.set() self.reconnect_core.set()
self.dds_sysclk = dds_sysclk self.dds_sysclk = dds_sysclk
@ -383,19 +383,25 @@ class _DeviceManager:
widget.cur_override_level = bool(value) widget.cur_override_level = bool(value)
widget.refresh_display() widget.refresh_display()
def disconnect_cb(self):
logger.error("lost connection to core device moninj")
self.reconnect_core.set()
async def core_connector(self): async def core_connector(self):
while True: while True:
await self.new_core_addr.wait() await self.reconnect_core.wait()
self.new_core_addr.clear() self.reconnect_core.clear()
if self.core_connection is not None: if self.core_connection is not None:
await self.core_connection.close() await self.core_connection.close()
self.core_connection = None self.core_connection = None
new_core_connection = CommMonInj(self.monitor_cb, self.injection_status_cb, new_core_connection = CommMonInj(self.monitor_cb, self.injection_status_cb,
lambda: logger.error("lost connection to core device moninj")) self.disconnect_cb)
try: try:
await new_core_connection.connect(self.core_addr, 1383) await new_core_connection.connect(self.core_addr, 1383)
except: except:
logger.error("failed to connect to core device moninj", exc_info=True) logger.error("failed to connect to core device moninj", exc_info=True)
await asyncio.sleep(10.)
self.reconnect_core.set()
else: else:
self.core_connection = new_core_connection self.core_connection = new_core_connection
for ttl_channel in self.ttl_widgets.keys(): for ttl_channel in self.ttl_widgets.keys():