forked from M-Labs/artiq
1
0
Fork 0

runtime: buffer RPC send packets.

This brings mean RPC time from ~45ms to ~2ms.

The cause of the slowness without buffering is, primarily, that lwip
is severely pessimized by small writes, whether with Nagle on or off.
(In fact, disabling Nagle makes it function *better* on many small
writes, which begs the question of what's the point of having Nagle
there in the first place.) In practical terms, the slowness appears
only when writing a 4-byte buffer (the synchronization segment);
writing buffers of other sizes does not trigger the problem.

This all is extremely confusing and the fix is partly palliative,
but since it seems to work reliably and we're migrating off lwip
I think it is unwise to spend any more time debugging this.
This commit is contained in:
whitequark 2016-11-12 23:06:33 +00:00
parent 3ce1826891
commit feed91d8b2
2 changed files with 9 additions and 6 deletions

View File

@ -114,7 +114,7 @@ fn host_read(stream: &mut TcpStream) -> io::Result<host::Request> {
Ok(request) Ok(request)
} }
fn host_write(stream: &mut TcpStream, reply: host::Reply) -> io::Result<()> { fn host_write(stream: &mut Write, reply: host::Reply) -> io::Result<()> {
trace!("comm->host {:?}", reply); trace!("comm->host {:?}", reply);
reply.write_to(stream) reply.write_to(stream)
} }
@ -389,8 +389,9 @@ fn process_kern_message(waiter: Waiter,
match stream { match stream {
None => unexpected!("unexpected RPC in flash kernel"), None => unexpected!("unexpected RPC in flash kernel"),
Some(ref mut stream) => { Some(ref mut stream) => {
try!(host_write(stream, host::Reply::RpcRequest { async: async })); let writer = &mut BufWriter::new(stream);
try!(rpc::send_args(&mut BufWriter::new(stream), service, tag, data)); try!(host_write(writer, host::Reply::RpcRequest { async: async }));
try!(rpc::send_args(writer, service, tag, data));
if !async { if !async {
session.kernel_state = KernelState::RpcWait session.kernel_state = KernelState::RpcWait
} }

View File

@ -450,6 +450,8 @@ class RPCTest(ExperimentCase):
"timings are dependent on CPU load and network conditions") "timings are dependent on CPU load and network conditions")
def test_rpc_timing(self): def test_rpc_timing(self):
self.execute(RPCTiming) self.execute(RPCTiming)
self.assertGreater(self.dataset_mgr.get("rpc_time_mean"), 100*ns) rpc_time_mean = self.dataset_mgr.get("rpc_time_mean")
self.assertLess(self.dataset_mgr.get("rpc_time_mean"), 15*ms) print(rpc_time_mean)
self.assertLess(self.dataset_mgr.get("rpc_time_stddev"), 2*ms) self.assertGreater(rpc_time_mean, 100*ns)
self.assertLess(rpc_time_mean, 2*ms)
self.assertLess(self.dataset_mgr.get("rpc_time_stddev"), 1*ms)