forked from M-Labs/artiq
coredevice: Re-enable TCP keepalive
This partially reverts commit b5e1bd3fa2
,
which had removed keepalive. This, however, led to experiments
hanging forever if the core device had dropped the connection
(e.g. to a kernel CPU panic, or the device being rebooted).
The chosen keepalive settings are fairly conservative (with the
10 s timeout) to avoid any possible interaction with smoltcp's
3 s ARP try interval (see GitHub issue #1150), even though this
should be a non-issue now due to the larger ARP cache.
This commit is contained in:
parent
8148fdb8a7
commit
f1fd42ea98
28
artiq/coredevice/comm.py
Normal file
28
artiq/coredevice/comm.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import sys
|
||||||
|
import socket
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def set_keepalive(sock, after_idle, interval, max_fails):
|
||||||
|
if sys.platform.startswith("linux"):
|
||||||
|
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
|
||||||
|
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, after_idle)
|
||||||
|
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval)
|
||||||
|
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, max_fails)
|
||||||
|
elif sys.platform.startswith("win") or sys.platform.startswith("cygwin"):
|
||||||
|
# setting max_fails is not supported, typically ends up being 5 or 10
|
||||||
|
# depending on Windows version
|
||||||
|
sock.ioctl(socket.SIO_KEEPALIVE_VALS,
|
||||||
|
(1, after_idle * 1000, interval * 1000))
|
||||||
|
else:
|
||||||
|
logger.warning("TCP keepalive not supported on platform '%s', ignored",
|
||||||
|
sys.platform)
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_connection(host, port):
|
||||||
|
sock = socket.create_connection((host, port))
|
||||||
|
set_keepalive(sock, 10, 10, 3)
|
||||||
|
logger.debug("connected to %s:%d", host, port)
|
||||||
|
return sock
|
@ -8,6 +8,7 @@ from fractions import Fraction
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
from artiq.coredevice import exceptions
|
from artiq.coredevice import exceptions
|
||||||
|
from artiq.coredevice.comm import initialize_connection
|
||||||
from artiq import __version__ as software_version
|
from artiq import __version__ as software_version
|
||||||
|
|
||||||
|
|
||||||
@ -184,8 +185,7 @@ class CommKernel:
|
|||||||
def open(self):
|
def open(self):
|
||||||
if hasattr(self, "socket"):
|
if hasattr(self, "socket"):
|
||||||
return
|
return
|
||||||
self.socket = socket.create_connection((self.host, self.port))
|
self.socket = initialize_connection(self.host, self.port)
|
||||||
logger.debug("connected to %s:%d", self.host, self.port)
|
|
||||||
self.socket.sendall(b"ARTIQ coredev\n")
|
self.socket.sendall(b"ARTIQ coredev\n")
|
||||||
endian = self._read(1)
|
endian = self._read(1)
|
||||||
if endian == b"e":
|
if endian == b"e":
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
import logging
|
import logging
|
||||||
import socket
|
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
|
from artiq.coredevice.comm import initialize_connection
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -60,8 +61,7 @@ class CommMgmt:
|
|||||||
def open(self):
|
def open(self):
|
||||||
if hasattr(self, "socket"):
|
if hasattr(self, "socket"):
|
||||||
return
|
return
|
||||||
self.socket = socket.create_connection((self.host, self.port))
|
self.socket = initialize_connection(self.host, self.port)
|
||||||
logger.debug("connected to %s:%d", self.host, self.port)
|
|
||||||
self.socket.sendall(b"ARTIQ management\n")
|
self.socket.sendall(b"ARTIQ management\n")
|
||||||
endian = self._read(1)
|
endian = self._read(1)
|
||||||
if endian == b"e":
|
if endian == b"e":
|
||||||
|
Loading…
Reference in New Issue
Block a user