forked from M-Labs/artiq
coredevice: Re-enable TCP keepalive
This partially reverts commit b5e1bd3fa2
,
which had removed keepalive. This, however, led to experiments
hanging forever if the core device had dropped the connection
(e.g. to a kernel CPU panic, or the device being rebooted).
The chosen keepalive settings are fairly conservative (with the
10 s timeout) to avoid any possible interaction with smoltcp's
3 s ARP try interval (see GitHub issue #1150), even though this
should be a non-issue now due to the larger ARP cache.
This commit is contained in:
parent
8148fdb8a7
commit
f1fd42ea98
28
artiq/coredevice/comm.py
Normal file
28
artiq/coredevice/comm.py
Normal file
@ -0,0 +1,28 @@
|
||||
import sys
|
||||
import socket
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def set_keepalive(sock, after_idle, interval, max_fails):
|
||||
if sys.platform.startswith("linux"):
|
||||
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
|
||||
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, after_idle)
|
||||
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval)
|
||||
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, max_fails)
|
||||
elif sys.platform.startswith("win") or sys.platform.startswith("cygwin"):
|
||||
# setting max_fails is not supported, typically ends up being 5 or 10
|
||||
# depending on Windows version
|
||||
sock.ioctl(socket.SIO_KEEPALIVE_VALS,
|
||||
(1, after_idle * 1000, interval * 1000))
|
||||
else:
|
||||
logger.warning("TCP keepalive not supported on platform '%s', ignored",
|
||||
sys.platform)
|
||||
|
||||
|
||||
def initialize_connection(host, port):
|
||||
sock = socket.create_connection((host, port))
|
||||
set_keepalive(sock, 10, 10, 3)
|
||||
logger.debug("connected to %s:%d", host, port)
|
||||
return sock
|
@ -8,6 +8,7 @@ from fractions import Fraction
|
||||
from collections import namedtuple
|
||||
|
||||
from artiq.coredevice import exceptions
|
||||
from artiq.coredevice.comm import initialize_connection
|
||||
from artiq import __version__ as software_version
|
||||
|
||||
|
||||
@ -184,8 +185,7 @@ class CommKernel:
|
||||
def open(self):
|
||||
if hasattr(self, "socket"):
|
||||
return
|
||||
self.socket = socket.create_connection((self.host, self.port))
|
||||
logger.debug("connected to %s:%d", self.host, self.port)
|
||||
self.socket = initialize_connection(self.host, self.port)
|
||||
self.socket.sendall(b"ARTIQ coredev\n")
|
||||
endian = self._read(1)
|
||||
if endian == b"e":
|
||||
|
@ -1,8 +1,9 @@
|
||||
from enum import Enum
|
||||
import logging
|
||||
import socket
|
||||
import struct
|
||||
|
||||
from artiq.coredevice.comm import initialize_connection
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -60,8 +61,7 @@ class CommMgmt:
|
||||
def open(self):
|
||||
if hasattr(self, "socket"):
|
||||
return
|
||||
self.socket = socket.create_connection((self.host, self.port))
|
||||
logger.debug("connected to %s:%d", self.host, self.port)
|
||||
self.socket = initialize_connection(self.host, self.port)
|
||||
self.socket.sendall(b"ARTIQ management\n")
|
||||
endian = self._read(1)
|
||||
if endian == b"e":
|
||||
|
Loading…
Reference in New Issue
Block a user