mirror of https://github.com/m-labs/artiq.git
ctlmgr: graceful controller termination
This commit is contained in:
parent
4b195663f6
commit
54b11a392a
|
@ -4,7 +4,6 @@ import asyncio
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import signal
|
|
||||||
import shlex
|
import shlex
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
@ -46,6 +45,7 @@ class Controller:
|
||||||
self.port = ddb_entry["port"]
|
self.port = ddb_entry["port"]
|
||||||
self.ping_timer = ddb_entry.get("ping_timer", 30)
|
self.ping_timer = ddb_entry.get("ping_timer", 30)
|
||||||
self.ping_timeout = ddb_entry.get("ping_timeout", 30)
|
self.ping_timeout = ddb_entry.get("ping_timeout", 30)
|
||||||
|
self.term_timeout = ddb_entry.get("term_timeout", 30)
|
||||||
|
|
||||||
self.retry_timer_cur = self.retry_timer
|
self.retry_timer_cur = self.retry_timer
|
||||||
self.process = None
|
self.process = None
|
||||||
|
@ -57,21 +57,21 @@ class Controller:
|
||||||
yield from asyncio.wait_for(self.launch_task, None)
|
yield from asyncio.wait_for(self.launch_task, None)
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def _ping_notimeout(self):
|
def _call_controller(self, method):
|
||||||
remote = AsyncioClient()
|
remote = AsyncioClient()
|
||||||
yield from remote.connect_rpc(self.host, self.port, None)
|
yield from remote.connect_rpc(self.host, self.port, None)
|
||||||
try:
|
try:
|
||||||
targets, _ = remote.get_rpc_id()
|
targets, _ = remote.get_rpc_id()
|
||||||
remote.select_rpc_target(targets[0])
|
remote.select_rpc_target(targets[0])
|
||||||
ok = yield from remote.ping()
|
r = yield from getattr(remote, method)()
|
||||||
finally:
|
finally:
|
||||||
remote.close_rpc()
|
remote.close_rpc()
|
||||||
return ok
|
return r
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def _ping(self):
|
def _ping(self):
|
||||||
try:
|
try:
|
||||||
ok = yield from asyncio.wait_for(self._ping_notimeout(),
|
ok = yield from asyncio.wait_for(self._call_controller("ping"),
|
||||||
self.ping_timeout)
|
self.ping_timeout)
|
||||||
if ok:
|
if ok:
|
||||||
self.retry_timer_cur = self.retry_timer
|
self.retry_timer_cur = self.retry_timer
|
||||||
|
@ -92,6 +92,8 @@ class Controller:
|
||||||
logger.warning("Controller %s ping failed", self.name)
|
logger.warning("Controller %s ping failed", self.name)
|
||||||
yield from self._terminate()
|
yield from self._terminate()
|
||||||
return
|
return
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def launcher(self):
|
def launcher(self):
|
||||||
|
@ -117,14 +119,22 @@ class Controller:
|
||||||
def _terminate(self):
|
def _terminate(self):
|
||||||
logger.info("Terminating controller %s", self.name)
|
logger.info("Terminating controller %s", self.name)
|
||||||
if self.process is not None and self.process.returncode is None:
|
if self.process is not None and self.process.returncode is None:
|
||||||
self.process.send_signal(signal.SIGTERM)
|
|
||||||
logger.debug("Signal sent")
|
|
||||||
try:
|
try:
|
||||||
yield from asyncio_process_wait_timeout(self.process, 5.0)
|
yield from asyncio.wait_for(self._call_controller("terminate"),
|
||||||
except asyncio.TimeoutError:
|
self.term_timeout)
|
||||||
logger.warning("Controller %s did not respond to SIGTERM",
|
except:
|
||||||
|
logger.warning("Controller %s did not respond to terminate "
|
||||||
|
"command, killing", self.name)
|
||||||
|
self.process.kill()
|
||||||
|
try:
|
||||||
|
yield from asyncio_process_wait_timeout(self.process,
|
||||||
|
self.term_timeout)
|
||||||
|
except:
|
||||||
|
logger.warning("Controller %s failed to exit, killing",
|
||||||
self.name)
|
self.name)
|
||||||
self.process.send_signal(signal.SIGKILL)
|
self.process.kill()
|
||||||
|
yield from self.process.wait()
|
||||||
|
logger.debug("Controller %s terminated", self.name)
|
||||||
|
|
||||||
|
|
||||||
def get_ip_addresses(host):
|
def get_ip_addresses(host):
|
||||||
|
|
Loading…
Reference in New Issue