From feed91d8b2bb73b49ce89e52678d19e1988254c7 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sat, 12 Nov 2016 23:06:33 +0000 Subject: [PATCH] runtime: buffer RPC send packets. This brings mean RPC time from ~45ms to ~2ms. The cause of the slowness without buffering is, primarily, that lwip is severely pessimized by small writes, whether with Nagle on or off. (In fact, disabling Nagle makes it function *better* on many small writes, which begs the question of what's the point of having Nagle there in the first place.) In practical terms, the slowness appears only when writing a 4-byte buffer (the synchronization segment); writing buffers of other sizes does not trigger the problem. This all is extremely confusing and the fix is partly palliative, but since it seems to work reliably and we're migrating off lwip I think it is unwise to spend any more time debugging this. --- artiq/runtime.rs/src/session.rs | 7 ++++--- artiq/test/coredevice/test_rtio.py | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/artiq/runtime.rs/src/session.rs b/artiq/runtime.rs/src/session.rs index 8ad957a61..8589971a0 100644 --- a/artiq/runtime.rs/src/session.rs +++ b/artiq/runtime.rs/src/session.rs @@ -114,7 +114,7 @@ fn host_read(stream: &mut TcpStream) -> io::Result { Ok(request) } -fn host_write(stream: &mut TcpStream, reply: host::Reply) -> io::Result<()> { +fn host_write(stream: &mut Write, reply: host::Reply) -> io::Result<()> { trace!("comm->host {:?}", reply); reply.write_to(stream) } @@ -389,8 +389,9 @@ fn process_kern_message(waiter: Waiter, match stream { None => unexpected!("unexpected RPC in flash kernel"), Some(ref mut stream) => { - try!(host_write(stream, host::Reply::RpcRequest { async: async })); - try!(rpc::send_args(&mut BufWriter::new(stream), service, tag, data)); + let writer = &mut BufWriter::new(stream); + try!(host_write(writer, host::Reply::RpcRequest { async: async })); + try!(rpc::send_args(writer, service, tag, data)); if !async { session.kernel_state = KernelState::RpcWait } diff --git a/artiq/test/coredevice/test_rtio.py b/artiq/test/coredevice/test_rtio.py index f60abd643..061d5fc3c 100644 --- a/artiq/test/coredevice/test_rtio.py +++ b/artiq/test/coredevice/test_rtio.py @@ -450,6 +450,8 @@ class RPCTest(ExperimentCase): "timings are dependent on CPU load and network conditions") def test_rpc_timing(self): self.execute(RPCTiming) - self.assertGreater(self.dataset_mgr.get("rpc_time_mean"), 100*ns) - self.assertLess(self.dataset_mgr.get("rpc_time_mean"), 15*ms) - self.assertLess(self.dataset_mgr.get("rpc_time_stddev"), 2*ms) + rpc_time_mean = self.dataset_mgr.get("rpc_time_mean") + print(rpc_time_mean) + self.assertGreater(rpc_time_mean, 100*ns) + self.assertLess(rpc_time_mean, 2*ms) + self.assertLess(self.dataset_mgr.get("rpc_time_stddev"), 1*ms)