forked from M-Labs/artiq
Host report for async error upon kernel termination (#1791)
Closes #1644
This commit is contained in:
parent
9bbf7eb485
commit
4a6bea479a
|
@ -31,6 +31,9 @@ Highlights:
|
||||||
* The configuration entry ``rtio_clock`` supports multiple clocking settings, deprecating the usage
|
* The configuration entry ``rtio_clock`` supports multiple clocking settings, deprecating the usage
|
||||||
of compile-time options.
|
of compile-time options.
|
||||||
* DRTIO: added support for 100MHz clock.
|
* DRTIO: added support for 100MHz clock.
|
||||||
|
* Previously detected RTIO async errors are reported to the host after each kernel terminates and a
|
||||||
|
warning is logged. The warning is additional to the one already printed in the core device log upon
|
||||||
|
detection of the error.
|
||||||
|
|
||||||
Breaking changes:
|
Breaking changes:
|
||||||
|
|
||||||
|
@ -44,7 +47,6 @@ Breaking changes:
|
||||||
* DRTIO: Changed message alignment from 32-bits to 64-bits.
|
* DRTIO: Changed message alignment from 32-bits to 64-bits.
|
||||||
* The deprecated ``set_dataset(..., save=...)`` is no longer supported.
|
* The deprecated ``set_dataset(..., save=...)`` is no longer supported.
|
||||||
|
|
||||||
|
|
||||||
ARTIQ-6
|
ARTIQ-6
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
|
|
@ -621,6 +621,7 @@ class CommKernel:
|
||||||
function = self._read_string()
|
function = self._read_string()
|
||||||
|
|
||||||
backtrace = [self._read_int32() for _ in range(self._read_int32())]
|
backtrace = [self._read_int32() for _ in range(self._read_int32())]
|
||||||
|
self._process_async_error()
|
||||||
|
|
||||||
traceback = list(reversed(symbolizer(backtrace))) + \
|
traceback = list(reversed(symbolizer(backtrace))) + \
|
||||||
[(filename, line, column, *demangler([function]), None)]
|
[(filename, line, column, *demangler([function]), None)]
|
||||||
|
@ -635,6 +636,16 @@ class CommKernel:
|
||||||
python_exn.artiq_core_exception = core_exn
|
python_exn.artiq_core_exception = core_exn
|
||||||
raise python_exn
|
raise python_exn
|
||||||
|
|
||||||
|
def _process_async_error(self):
|
||||||
|
errors = self._read_int8()
|
||||||
|
if errors > 0:
|
||||||
|
map_name = lambda y, z: [f"{y}(s)"] if z else []
|
||||||
|
errors = map_name("collision", errors & 2 ** 0) + \
|
||||||
|
map_name("busy error", errors & 2 ** 1) + \
|
||||||
|
map_name("sequence error", errors & 2 ** 2)
|
||||||
|
logger.warning(f"{(', '.join(errors[:-1]) + ' and ') if len(errors) > 1 else ''}{errors[-1]} "
|
||||||
|
f"reported during kernel execution")
|
||||||
|
|
||||||
def serve(self, embedding_map, symbolizer, demangler):
|
def serve(self, embedding_map, symbolizer, demangler):
|
||||||
while True:
|
while True:
|
||||||
self._read_header()
|
self._read_header()
|
||||||
|
@ -646,4 +657,5 @@ class CommKernel:
|
||||||
raise exceptions.ClockFailure
|
raise exceptions.ClockFailure
|
||||||
else:
|
else:
|
||||||
self._read_expect(Reply.KernelFinished)
|
self._read_expect(Reply.KernelFinished)
|
||||||
|
self._process_async_error()
|
||||||
return
|
return
|
||||||
|
|
|
@ -90,7 +90,9 @@ pub enum Reply<'a> {
|
||||||
LoadCompleted,
|
LoadCompleted,
|
||||||
LoadFailed(&'a str),
|
LoadFailed(&'a str),
|
||||||
|
|
||||||
KernelFinished,
|
KernelFinished {
|
||||||
|
async_errors: u8
|
||||||
|
},
|
||||||
KernelStartupFailed,
|
KernelStartupFailed,
|
||||||
KernelException {
|
KernelException {
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
|
@ -100,7 +102,8 @@ pub enum Reply<'a> {
|
||||||
line: u32,
|
line: u32,
|
||||||
column: u32,
|
column: u32,
|
||||||
function: &'a str,
|
function: &'a str,
|
||||||
backtrace: &'a [usize]
|
backtrace: &'a [usize],
|
||||||
|
async_errors: u8
|
||||||
},
|
},
|
||||||
|
|
||||||
RpcRequest { async: bool },
|
RpcRequest { async: bool },
|
||||||
|
@ -160,14 +163,16 @@ impl<'a> Reply<'a> {
|
||||||
writer.write_string(reason)?;
|
writer.write_string(reason)?;
|
||||||
},
|
},
|
||||||
|
|
||||||
Reply::KernelFinished => {
|
Reply::KernelFinished { async_errors } => {
|
||||||
writer.write_u8(7)?;
|
writer.write_u8(7)?;
|
||||||
|
writer.write_u8(async_errors)?;
|
||||||
},
|
},
|
||||||
Reply::KernelStartupFailed => {
|
Reply::KernelStartupFailed => {
|
||||||
writer.write_u8(8)?;
|
writer.write_u8(8)?;
|
||||||
},
|
},
|
||||||
Reply::KernelException {
|
Reply::KernelException {
|
||||||
name, message, param, file, line, column, function, backtrace
|
name, message, param, file, line, column, function, backtrace,
|
||||||
|
async_errors
|
||||||
} => {
|
} => {
|
||||||
writer.write_u8(9)?;
|
writer.write_u8(9)?;
|
||||||
writer.write_string(name)?;
|
writer.write_string(name)?;
|
||||||
|
@ -183,6 +188,7 @@ impl<'a> Reply<'a> {
|
||||||
for &addr in backtrace {
|
for &addr in backtrace {
|
||||||
writer.write_u32(addr as u32)?
|
writer.write_u32(addr as u32)?
|
||||||
}
|
}
|
||||||
|
writer.write_u8(async_errors)?;
|
||||||
},
|
},
|
||||||
|
|
||||||
Reply::RpcRequest { async } => {
|
Reply::RpcRequest { async } => {
|
||||||
|
|
|
@ -326,6 +326,14 @@ pub mod drtio {
|
||||||
pub fn reset(_io: &Io, _aux_mutex: &Mutex) {}
|
pub fn reset(_io: &Io, _aux_mutex: &Mutex) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static mut SEEN_ASYNC_ERRORS: u8 = 0;
|
||||||
|
|
||||||
|
pub unsafe fn get_async_errors() -> u8 {
|
||||||
|
let mut errors = SEEN_ASYNC_ERRORS;
|
||||||
|
SEEN_ASYNC_ERRORS = 0;
|
||||||
|
errors
|
||||||
|
}
|
||||||
|
|
||||||
fn async_error_thread(io: Io) {
|
fn async_error_thread(io: Io) {
|
||||||
loop {
|
loop {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -343,6 +351,7 @@ fn async_error_thread(io: Io) {
|
||||||
error!("RTIO sequence error involving channel {}",
|
error!("RTIO sequence error involving channel {}",
|
||||||
csr::rtio_core::sequence_error_channel_read());
|
csr::rtio_core::sequence_error_channel_read());
|
||||||
}
|
}
|
||||||
|
SEEN_ASYNC_ERRORS = errors;
|
||||||
csr::rtio_core::async_error_write(errors);
|
csr::rtio_core::async_error_write(errors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@ use urc::Urc;
|
||||||
use sched::{ThreadHandle, Io, Mutex, TcpListener, TcpStream, Error as SchedError};
|
use sched::{ThreadHandle, Io, Mutex, TcpListener, TcpStream, Error as SchedError};
|
||||||
use rtio_clocking;
|
use rtio_clocking;
|
||||||
use rtio_dma::Manager as DmaManager;
|
use rtio_dma::Manager as DmaManager;
|
||||||
|
use rtio_mgt::get_async_errors;
|
||||||
use cache::Cache;
|
use cache::Cache;
|
||||||
use kern_hwreq;
|
use kern_hwreq;
|
||||||
use board_artiq::drtio_routing;
|
use board_artiq::drtio_routing;
|
||||||
|
@ -431,7 +432,9 @@ fn process_kern_message(io: &Io, aux_mutex: &Mutex,
|
||||||
match stream {
|
match stream {
|
||||||
None => return Ok(true),
|
None => return Ok(true),
|
||||||
Some(ref mut stream) =>
|
Some(ref mut stream) =>
|
||||||
host_write(stream, host::Reply::KernelFinished).map_err(|e| e.into())
|
host_write(stream, host::Reply::KernelFinished {
|
||||||
|
async_errors: unsafe { get_async_errors() }
|
||||||
|
}).map_err(|e| e.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
&kern::RunException {
|
&kern::RunException {
|
||||||
|
@ -458,7 +461,8 @@ fn process_kern_message(io: &Io, aux_mutex: &Mutex,
|
||||||
line: line,
|
line: line,
|
||||||
column: column,
|
column: column,
|
||||||
function: function,
|
function: function,
|
||||||
backtrace: backtrace
|
backtrace: backtrace,
|
||||||
|
async_errors: unsafe { get_async_errors() }
|
||||||
}).map_err(|e| e.into())
|
}).map_err(|e| e.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue