Host report for async error upon kernel termination (#1791)

Closes #1644
This commit is contained in:
Steve Fan 2021-12-04 13:33:24 +08:00 committed by GitHub
parent 9bbf7eb485
commit 4a6bea479a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 7 deletions

View File

@ -31,6 +31,9 @@ Highlights:
* The configuration entry ``rtio_clock`` supports multiple clocking settings, deprecating the usage
of compile-time options.
* DRTIO: added support for 100MHz clock.
* Previously detected RTIO async errors are reported to the host after each kernel terminates and a
warning is logged. The warning is additional to the one already printed in the core device log upon
detection of the error.
Breaking changes:
@ -44,7 +47,6 @@ Breaking changes:
* DRTIO: Changed message alignment from 32-bits to 64-bits.
* The deprecated ``set_dataset(..., save=...)`` is no longer supported.
ARTIQ-6
-------

View File

@ -621,6 +621,7 @@ class CommKernel:
function = self._read_string()
backtrace = [self._read_int32() for _ in range(self._read_int32())]
self._process_async_error()
traceback = list(reversed(symbolizer(backtrace))) + \
[(filename, line, column, *demangler([function]), None)]
@ -635,6 +636,16 @@ class CommKernel:
python_exn.artiq_core_exception = core_exn
raise python_exn
def _process_async_error(self):
errors = self._read_int8()
if errors > 0:
map_name = lambda y, z: [f"{y}(s)"] if z else []
errors = map_name("collision", errors & 2 ** 0) + \
map_name("busy error", errors & 2 ** 1) + \
map_name("sequence error", errors & 2 ** 2)
logger.warning(f"{(', '.join(errors[:-1]) + ' and ') if len(errors) > 1 else ''}{errors[-1]} "
f"reported during kernel execution")
def serve(self, embedding_map, symbolizer, demangler):
while True:
self._read_header()
@ -646,4 +657,5 @@ class CommKernel:
raise exceptions.ClockFailure
else:
self._read_expect(Reply.KernelFinished)
self._process_async_error()
return

View File

@ -90,7 +90,9 @@ pub enum Reply<'a> {
LoadCompleted,
LoadFailed(&'a str),
KernelFinished,
KernelFinished {
async_errors: u8
},
KernelStartupFailed,
KernelException {
name: &'a str,
@ -100,7 +102,8 @@ pub enum Reply<'a> {
line: u32,
column: u32,
function: &'a str,
backtrace: &'a [usize]
backtrace: &'a [usize],
async_errors: u8
},
RpcRequest { async: bool },
@ -160,14 +163,16 @@ impl<'a> Reply<'a> {
writer.write_string(reason)?;
},
Reply::KernelFinished => {
Reply::KernelFinished { async_errors } => {
writer.write_u8(7)?;
writer.write_u8(async_errors)?;
},
Reply::KernelStartupFailed => {
writer.write_u8(8)?;
},
Reply::KernelException {
name, message, param, file, line, column, function, backtrace
name, message, param, file, line, column, function, backtrace,
async_errors
} => {
writer.write_u8(9)?;
writer.write_string(name)?;
@ -183,6 +188,7 @@ impl<'a> Reply<'a> {
for &addr in backtrace {
writer.write_u32(addr as u32)?
}
writer.write_u8(async_errors)?;
},
Reply::RpcRequest { async } => {

View File

@ -326,6 +326,14 @@ pub mod drtio {
pub fn reset(_io: &Io, _aux_mutex: &Mutex) {}
}
static mut SEEN_ASYNC_ERRORS: u8 = 0;
pub unsafe fn get_async_errors() -> u8 {
let mut errors = SEEN_ASYNC_ERRORS;
SEEN_ASYNC_ERRORS = 0;
errors
}
fn async_error_thread(io: Io) {
loop {
unsafe {
@ -343,6 +351,7 @@ fn async_error_thread(io: Io) {
error!("RTIO sequence error involving channel {}",
csr::rtio_core::sequence_error_channel_read());
}
SEEN_ASYNC_ERRORS = errors;
csr::rtio_core::async_error_write(errors);
}
}

View File

@ -9,6 +9,7 @@ use urc::Urc;
use sched::{ThreadHandle, Io, Mutex, TcpListener, TcpStream, Error as SchedError};
use rtio_clocking;
use rtio_dma::Manager as DmaManager;
use rtio_mgt::get_async_errors;
use cache::Cache;
use kern_hwreq;
use board_artiq::drtio_routing;
@ -431,7 +432,9 @@ fn process_kern_message(io: &Io, aux_mutex: &Mutex,
match stream {
None => return Ok(true),
Some(ref mut stream) =>
host_write(stream, host::Reply::KernelFinished).map_err(|e| e.into())
host_write(stream, host::Reply::KernelFinished {
async_errors: unsafe { get_async_errors() }
}).map_err(|e| e.into())
}
}
&kern::RunException {
@ -458,7 +461,8 @@ fn process_kern_message(io: &Io, aux_mutex: &Mutex,
line: line,
column: column,
function: function,
backtrace: backtrace
backtrace: backtrace,
async_errors: unsafe { get_async_errors() }
}).map_err(|e| e.into())
}
}