mirror of https://github.com/m-labs/artiq.git
firmware: implement an edge profiler.
It doesn't work extremely well, and the results can be somewhat confusing, and it needs a libunwind patch, but it's the best I could do and it seems still useful.
This commit is contained in:
parent
99f7672c79
commit
31c6c79204
|
@ -3,11 +3,15 @@ import subprocess
|
|||
|
||||
|
||||
class Symbolizer:
|
||||
def __init__(self, binary, triple):
|
||||
self._addr2line = subprocess.Popen([
|
||||
def __init__(self, binary, triple, demangle=True):
|
||||
cmdline = [
|
||||
triple + "-addr2line", "--exe=" + binary,
|
||||
"--addresses", "--demangle=rust", "--functions", "--inlines"
|
||||
], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
|
||||
"--addresses", "--functions", "--inlines"
|
||||
]
|
||||
if demangle:
|
||||
cmdline.append("--demangle=rust")
|
||||
self._addr2line = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
universal_newlines=True)
|
||||
|
||||
def symbolize(self, addr):
|
||||
self._addr2line.stdin.write("0x{:08x}\n0\n".format(addr))
|
||||
|
@ -26,17 +30,17 @@ class Symbolizer:
|
|||
|
||||
file, line = self._addr2line.stdout.readline().rstrip().split(":")
|
||||
|
||||
result.append((function, file, line))
|
||||
result.append((function, file, line, addr))
|
||||
|
||||
|
||||
class CallgrindWriter:
|
||||
def __init__(self, output, binary, triple, compression=True):
|
||||
def __init__(self, output, binary, triple, compression=True, demangle=True):
|
||||
self._output = output
|
||||
self._binary = binary
|
||||
self._current = defaultdict(lambda: None)
|
||||
self._ids = defaultdict(lambda: {})
|
||||
self._compression = compression
|
||||
self._symbolizer = Symbolizer(binary, triple)
|
||||
self._symbolizer = Symbolizer(binary, triple, demangle=demangle)
|
||||
|
||||
def _write(self, fmt, *args, **kwargs):
|
||||
self._output.write(fmt.format(*args, **kwargs))
|
||||
|
@ -69,18 +73,20 @@ class CallgrindWriter:
|
|||
self._spec("cob", self._binary)
|
||||
|
||||
def hit(self, addr, count):
|
||||
for function, file, line in self._symbolizer.symbolize(addr):
|
||||
self._spec("fn", function)
|
||||
for function, file, line, addr in self._symbolizer.symbolize(addr):
|
||||
self._spec("fl", file)
|
||||
self._spec("fn", function)
|
||||
self._write("0x{:08x} {} {}", addr, line, count)
|
||||
|
||||
def edge(self, caller, callee, count):
|
||||
function, file, line = next(self._symbolizer.symbolize(callee))
|
||||
self._spec("cfn", function)
|
||||
edges = self._symbolizer.symbolize(callee) + self._symbolizer.symbolize(caller)
|
||||
for (callee, caller) in zip(edges, edges[1:]):
|
||||
function, file, line, addr = callee
|
||||
self._spec("cfl", file)
|
||||
self._write("calls={} 0x{:08x} {}", count, callee, line)
|
||||
self._spec("cfn", function)
|
||||
self._write("calls={} 0x{:08x} {}", count, addr, line)
|
||||
|
||||
function, file, line = next(self._symbolizer.symbolize(caller))
|
||||
self._spec("fn", function)
|
||||
function, file, line, addr = caller
|
||||
self._spec("fl", file)
|
||||
self._write("0x{:08x} {} {}", caller, line, count)
|
||||
self._spec("fn", function)
|
||||
self._write("0x{:08x} {} {}", addr, line, count)
|
||||
|
|
|
@ -21,6 +21,12 @@ void send_to_rtio_log(long long int timestamp, struct slice data);
|
|||
|
||||
FILE *stderr;
|
||||
|
||||
/* called by libunwind */
|
||||
char *getenv(const char *var)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* called by libunwind */
|
||||
int fprintf(FILE *stream, const char *fmt, ...)
|
||||
{
|
||||
|
|
|
@ -171,35 +171,70 @@ _crt0:
|
|||
l.nop
|
||||
|
||||
_exception_handler:
|
||||
.cfi_startproc
|
||||
.cfi_return_column 32
|
||||
.cfi_signal_frame
|
||||
.cfi_def_cfa_offset EXCEPTION_STACK_SIZE
|
||||
l.sw 0x00(r1), r2
|
||||
.cfi_offset 2, 0x00-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x04(r1), r3
|
||||
.cfi_offset 3, 0x04-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x08(r1), r4
|
||||
.cfi_offset 4, 0x08-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x0c(r1), r5
|
||||
.cfi_offset 5, 0x0c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x10(r1), r6
|
||||
.cfi_offset 6, 0x10-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x14(r1), r7
|
||||
.cfi_offset 7, 0x14-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x18(r1), r8
|
||||
.cfi_offset 8, 0x18-EXCEPTION_STACK_SIZE
|
||||
/* r9 saved in HANDLE_EXCEPTION */
|
||||
.cfi_offset 9, 0x1c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x20(r1), r10
|
||||
.cfi_offset 10, 0x20-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x24(r1), r11
|
||||
.cfi_offset 11, 0x24-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x28(r1), r12
|
||||
.cfi_offset 12, 0x28-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x2c(r1), r13
|
||||
.cfi_offset 13, 0x2c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x30(r1), r14
|
||||
.cfi_offset 14, 0x30-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x34(r1), r15
|
||||
.cfi_offset 15, 0x34-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x38(r1), r16
|
||||
.cfi_offset 16, 0x38-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x3c(r1), r17
|
||||
.cfi_offset 17, 0x3c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x40(r1), r18
|
||||
.cfi_offset 18, 0x40-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x44(r1), r19
|
||||
.cfi_offset 19, 0x44-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x48(r1), r20
|
||||
.cfi_offset 20, 0x48-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x4c(r1), r21
|
||||
.cfi_offset 21, 0x4c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x50(r1), r22
|
||||
.cfi_offset 22, 0x50-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x54(r1), r23
|
||||
.cfi_offset 23, 0x54-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x58(r1), r24
|
||||
.cfi_offset 24, 0x58-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x5c(r1), r25
|
||||
.cfi_offset 25, 0x5c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x60(r1), r26
|
||||
.cfi_offset 26, 0x60-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x64(r1), r27
|
||||
.cfi_offset 27, 0x64-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x68(r1), r28
|
||||
.cfi_offset 28, 0x68-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x6c(r1), r29
|
||||
.cfi_offset 29, 0x6c-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x70(r1), r30
|
||||
.cfi_offset 30, 0x70-EXCEPTION_STACK_SIZE
|
||||
l.sw 0x74(r1), r31
|
||||
.cfi_offset 31, 0x74-EXCEPTION_STACK_SIZE
|
||||
|
||||
/* Save return address */
|
||||
l.or r14, r0, r9
|
||||
|
@ -210,6 +245,9 @@ _exception_handler:
|
|||
l.or r4, r0, r1
|
||||
/* Extract exception PC */
|
||||
l.mfspr r5, r0, SPR_EPCR_BASE
|
||||
/* Tell exception PC to the unwinder */
|
||||
l.sw 0x78(r1), r5
|
||||
.cfi_offset 32, 0x78-EXCEPTION_STACK_SIZE
|
||||
/* Extract exception effective address */
|
||||
l.mfspr r6, r0, SPR_EEAR_BASE
|
||||
/* Extract exception SR */
|
||||
|
@ -252,6 +290,7 @@ _exception_handler:
|
|||
l.lwz r31, 0x74(r1)
|
||||
l.jr r9
|
||||
l.nop
|
||||
.cfi_endproc
|
||||
|
||||
.global _cache_init
|
||||
_cache_init:
|
||||
|
|
|
@ -5,24 +5,43 @@ extern crate unwind;
|
|||
extern crate libc;
|
||||
|
||||
use unwind as uw;
|
||||
use libc::c_void;
|
||||
use libc::{c_void, c_int};
|
||||
|
||||
pub fn backtrace<F>(mut f: F) -> Result<(), uw::_Unwind_Reason_Code>
|
||||
const UW_REG_SP: c_int = -2;
|
||||
|
||||
pub fn backtrace<F>(f: F) -> Result<(), uw::_Unwind_Reason_Code>
|
||||
where F: FnMut(usize) -> ()
|
||||
{
|
||||
struct TraceContext<F> {
|
||||
step_fn: F,
|
||||
prev_sp: uw::_Unwind_Word
|
||||
}
|
||||
|
||||
extern fn trace<F>(context: *mut uw::_Unwind_Context, arg: *mut c_void)
|
||||
-> uw::_Unwind_Reason_Code
|
||||
where F: FnMut(usize) -> ()
|
||||
{
|
||||
unsafe {
|
||||
let step_fn = &mut *(arg as *mut F);
|
||||
step_fn(uw::_Unwind_GetIP(context));
|
||||
let trace_context = &mut *(arg as *mut TraceContext<F>);
|
||||
|
||||
// Detect the root of a libfringe thread
|
||||
let cur_sp = uw::_Unwind_GetGR(context, UW_REG_SP);
|
||||
if cur_sp == trace_context.prev_sp {
|
||||
return uw::_URC_END_OF_STACK
|
||||
} else {
|
||||
trace_context.prev_sp = cur_sp;
|
||||
}
|
||||
|
||||
// GetIP gives us the return address, i.e. the address after the delay slot,
|
||||
// but we're interested in the call instruction.
|
||||
(trace_context.step_fn)(uw::_Unwind_GetIP(context) - 2 * 4);
|
||||
uw::_URC_NO_REASON
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
match uw::_Unwind_Backtrace(trace::<F>, &mut f as *mut _ as *mut c_void) {
|
||||
let mut trace_context = TraceContext { step_fn: f, prev_sp: 0 };
|
||||
match uw::_Unwind_Backtrace(trace::<F>, &mut trace_context as *mut _ as *mut c_void) {
|
||||
uw::_URC_NO_REASON => Ok(()),
|
||||
err => Err(err)
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ mod moninj;
|
|||
mod analyzer;
|
||||
|
||||
fn startup() {
|
||||
irq::set_mask(0);
|
||||
irq::set_ie(true);
|
||||
clock::init();
|
||||
info!("ARTIQ runtime starting...");
|
||||
|
@ -305,6 +306,8 @@ pub extern fn abort() {
|
|||
#[lang = "panic_fmt"]
|
||||
pub extern fn panic_fmt(args: core::fmt::Arguments, file: &'static str,
|
||||
line: u32, column: u32) -> ! {
|
||||
irq::set_ie(false);
|
||||
|
||||
println!("panic at {}:{}:{}: {}", file, line, column, args);
|
||||
|
||||
println!("backtrace for software version {}:",
|
||||
|
|
|
@ -10,8 +10,8 @@ use managed::ManagedMap;
|
|||
pub struct Address(NonZeroUsize);
|
||||
|
||||
impl Address {
|
||||
pub fn new(raw: usize) -> Option<Address> {
|
||||
NonZeroUsize::new(raw).map(Address)
|
||||
pub fn new(raw: usize) -> Address {
|
||||
Address(NonZeroUsize::new(raw).expect("null address"))
|
||||
}
|
||||
|
||||
pub fn as_raw(&self) -> usize {
|
||||
|
@ -50,6 +50,10 @@ impl Profile {
|
|||
self.edges.capacity() * edge_size
|
||||
}
|
||||
|
||||
pub fn has_edges(&self) -> bool {
|
||||
self.edges.is_empty()
|
||||
}
|
||||
|
||||
pub fn hits<'a>(&'a mut self) -> ManagedMap<'a, Address, u32> {
|
||||
ManagedMap::Borrowed(&mut self.hits[..])
|
||||
}
|
||||
|
@ -88,6 +92,7 @@ impl Profile {
|
|||
|
||||
#[cfg(has_timer1)]
|
||||
mod imp {
|
||||
use unwind_backtrace::backtrace;
|
||||
use board_misoc::{csr, irq};
|
||||
use super::{Address, Profile};
|
||||
|
||||
|
@ -192,22 +197,46 @@ mod imp {
|
|||
}
|
||||
}
|
||||
|
||||
// Skip frames: ::profiler::sample, ::exception, exception vector.
|
||||
const SKIP_FRAMES: i32 = 3;
|
||||
|
||||
#[inline(always)] // make the top of backtrace predictable
|
||||
fn record(profile: &mut Profile, pc: usize) -> Result<(), ()> {
|
||||
let callee = Address::new(pc).expect("null code address");
|
||||
profile.record_hit(callee)?;
|
||||
let mut result = Ok(());
|
||||
let mut frame = -SKIP_FRAMES;
|
||||
|
||||
// TODO: record edges
|
||||
// If we have storage for edges, use the DWARF unwinder.
|
||||
// Otherwise, don't bother and use a much faster path that just looks at EPCR.
|
||||
// Also, acquiring a meaningful backtrace requires libunwind
|
||||
// with the https://reviews.llvm.org/D46971 patch applied.
|
||||
if profile.has_edges() {
|
||||
let mut prev_pc = 0;
|
||||
let _ = backtrace(|pc| {
|
||||
if frame == 0 {
|
||||
result = result.and_then(|()|
|
||||
profile.record_hit(Address::new(pc)));
|
||||
prev_pc = pc;
|
||||
} else if frame > 0 {
|
||||
result = result.and_then(|()|
|
||||
profile.record_edge(Address::new(pc),
|
||||
Address::new(prev_pc)));
|
||||
}
|
||||
prev_pc = pc;
|
||||
frame += 1;
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
// If we couldn't get anything useful out of a backtrace, at least
|
||||
// record a hit at the exception PC.
|
||||
if frame <= 0 {
|
||||
result = profile.record_hit(Address::new(pc));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[inline(never)] // see above
|
||||
pub fn sample(pc: usize) {
|
||||
unsafe {
|
||||
csr::timer1::ev_pending_write(1);
|
||||
}
|
||||
|
||||
let result = {
|
||||
let mut profile = Lock::take().expect("cannot lock");
|
||||
record(profile.as_mut().expect("profiler not running"), pc)
|
||||
|
@ -216,6 +245,10 @@ mod imp {
|
|||
if result.is_err() {
|
||||
warn!("out of space");
|
||||
stop();
|
||||
} else {
|
||||
unsafe {
|
||||
csr::timer1::ev_pending_write(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,8 +98,8 @@ def get_argparser():
|
|||
help="sampling interval, in microseconds")
|
||||
p_start.add_argument("--hits-size", metavar="ENTRIES", type=int, default=8192,
|
||||
help="hit buffer size")
|
||||
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=0,
|
||||
help="edge buffer size (edge profiling not implemented)")
|
||||
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=8192,
|
||||
help="edge buffer size")
|
||||
|
||||
p_stop = subparsers.add_parser("stop",
|
||||
help="stop profiling")
|
||||
|
@ -113,6 +113,9 @@ def get_argparser():
|
|||
p_save.add_argument("--no-compression",
|
||||
dest="compression", default=True, action="store_false",
|
||||
help="disable profile compression")
|
||||
p_save.add_argument("--no-demangle",
|
||||
dest="demangle", default=True, action="store_false",
|
||||
help="disable symbol demangling")
|
||||
|
||||
# misc debug
|
||||
t_debug = tools.add_parser("debug",
|
||||
|
@ -178,8 +181,8 @@ def main():
|
|||
mgmt.stop_profiler()
|
||||
elif args.action == "save":
|
||||
hits, edges = mgmt.get_profile()
|
||||
writer = CallgrindWriter(args.output, args.firmware,
|
||||
"or1k-linux", args.compression)
|
||||
writer = CallgrindWriter(args.output, args.firmware, "or1k-linux",
|
||||
args.compression, args.demangle)
|
||||
writer.header()
|
||||
for addr, count in hits.items():
|
||||
writer.hit(addr, count)
|
||||
|
|
Loading…
Reference in New Issue