coredevice: Re-enable TCP keepalive

This partially reverts commit b5e1bd3fa2,
which had removed keepalive. This, however, led to experiments
hanging forever if the core device had dropped the connection
(e.g. to a kernel CPU panic, or the device being rebooted).

The chosen keepalive settings are fairly conservative (with the
10 s timeout) to avoid any possible interaction with smoltcp's
3 s ARP try interval (see GitHub issue #1150), even though this
should be a non-issue now due to the larger ARP cache.
This commit is contained in:
David Nadlinger 2021-01-22 12:19:41 +01:00 committed by Sébastien Bourdeauducq
parent 8148fdb8a7
commit f1fd42ea98
3 changed files with 33 additions and 5 deletions

28
artiq/coredevice/comm.py Normal file
View File

@ -0,0 +1,28 @@
import sys
import socket
import logging
logger = logging.getLogger(__name__)
def set_keepalive(sock, after_idle, interval, max_fails):
if sys.platform.startswith("linux"):
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, after_idle)
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval)
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, max_fails)
elif sys.platform.startswith("win") or sys.platform.startswith("cygwin"):
# setting max_fails is not supported, typically ends up being 5 or 10
# depending on Windows version
sock.ioctl(socket.SIO_KEEPALIVE_VALS,
(1, after_idle * 1000, interval * 1000))
else:
logger.warning("TCP keepalive not supported on platform '%s', ignored",
sys.platform)
def initialize_connection(host, port):
sock = socket.create_connection((host, port))
set_keepalive(sock, 10, 10, 3)
logger.debug("connected to %s:%d", host, port)
return sock

View File

@ -8,6 +8,7 @@ from fractions import Fraction
from collections import namedtuple
from artiq.coredevice import exceptions
from artiq.coredevice.comm import initialize_connection
from artiq import __version__ as software_version
@ -184,8 +185,7 @@ class CommKernel:
def open(self):
if hasattr(self, "socket"):
return
self.socket = socket.create_connection((self.host, self.port))
logger.debug("connected to %s:%d", self.host, self.port)
self.socket = initialize_connection(self.host, self.port)
self.socket.sendall(b"ARTIQ coredev\n")
endian = self._read(1)
if endian == b"e":

View File

@ -1,8 +1,9 @@
from enum import Enum
import logging
import socket
import struct
from artiq.coredevice.comm import initialize_connection
logger = logging.getLogger(__name__)
@ -60,8 +61,7 @@ class CommMgmt:
def open(self):
if hasattr(self, "socket"):
return
self.socket = socket.create_connection((self.host, self.port))
logger.debug("connected to %s:%d", self.host, self.port)
self.socket = initialize_connection(self.host, self.port)
self.socket.sendall(b"ARTIQ management\n")
endian = self._read(1)
if endian == b"e":