firmware: implement an edge profiler.

It doesn't work extremely well, and the results can be somewhat
confusing, and it needs a libunwind patch, but it's the best I could
do and it seems still useful.
pull/1017/head
whitequark 2018-05-16 21:50:55 +00:00
parent 99f7672c79
commit 31c6c79204
7 changed files with 145 additions and 36 deletions

View File

@ -3,11 +3,15 @@ import subprocess
class Symbolizer:
def __init__(self, binary, triple):
self._addr2line = subprocess.Popen([
def __init__(self, binary, triple, demangle=True):
cmdline = [
triple + "-addr2line", "--exe=" + binary,
"--addresses", "--demangle=rust", "--functions", "--inlines"
], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
"--addresses", "--functions", "--inlines"
]
if demangle:
cmdline.append("--demangle=rust")
self._addr2line = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
universal_newlines=True)
def symbolize(self, addr):
self._addr2line.stdin.write("0x{:08x}\n0\n".format(addr))
@ -26,17 +30,17 @@ class Symbolizer:
file, line = self._addr2line.stdout.readline().rstrip().split(":")
result.append((function, file, line))
result.append((function, file, line, addr))
class CallgrindWriter:
def __init__(self, output, binary, triple, compression=True):
def __init__(self, output, binary, triple, compression=True, demangle=True):
self._output = output
self._binary = binary
self._current = defaultdict(lambda: None)
self._ids = defaultdict(lambda: {})
self._compression = compression
self._symbolizer = Symbolizer(binary, triple)
self._symbolizer = Symbolizer(binary, triple, demangle=demangle)
def _write(self, fmt, *args, **kwargs):
self._output.write(fmt.format(*args, **kwargs))
@ -69,18 +73,20 @@ class CallgrindWriter:
self._spec("cob", self._binary)
def hit(self, addr, count):
for function, file, line in self._symbolizer.symbolize(addr):
self._spec("fn", function)
for function, file, line, addr in self._symbolizer.symbolize(addr):
self._spec("fl", file)
self._spec("fn", function)
self._write("0x{:08x} {} {}", addr, line, count)
def edge(self, caller, callee, count):
function, file, line = next(self._symbolizer.symbolize(callee))
self._spec("cfn", function)
self._spec("cfl", file)
self._write("calls={} 0x{:08x} {}", count, callee, line)
edges = self._symbolizer.symbolize(callee) + self._symbolizer.symbolize(caller)
for (callee, caller) in zip(edges, edges[1:]):
function, file, line, addr = callee
self._spec("cfl", file)
self._spec("cfn", function)
self._write("calls={} 0x{:08x} {}", count, addr, line)
function, file, line = next(self._symbolizer.symbolize(caller))
self._spec("fn", function)
self._spec("fl", file)
self._write("0x{:08x} {} {}", caller, line, count)
function, file, line, addr = caller
self._spec("fl", file)
self._spec("fn", function)
self._write("0x{:08x} {} {}", addr, line, count)

View File

@ -21,6 +21,12 @@ void send_to_rtio_log(long long int timestamp, struct slice data);
FILE *stderr;
/* called by libunwind */
char *getenv(const char *var)
{
return NULL;
}
/* called by libunwind */
int fprintf(FILE *stream, const char *fmt, ...)
{

View File

@ -171,35 +171,70 @@ _crt0:
l.nop
_exception_handler:
.cfi_startproc
.cfi_return_column 32
.cfi_signal_frame
.cfi_def_cfa_offset EXCEPTION_STACK_SIZE
l.sw 0x00(r1), r2
.cfi_offset 2, 0x00-EXCEPTION_STACK_SIZE
l.sw 0x04(r1), r3
.cfi_offset 3, 0x04-EXCEPTION_STACK_SIZE
l.sw 0x08(r1), r4
.cfi_offset 4, 0x08-EXCEPTION_STACK_SIZE
l.sw 0x0c(r1), r5
.cfi_offset 5, 0x0c-EXCEPTION_STACK_SIZE
l.sw 0x10(r1), r6
.cfi_offset 6, 0x10-EXCEPTION_STACK_SIZE
l.sw 0x14(r1), r7
.cfi_offset 7, 0x14-EXCEPTION_STACK_SIZE
l.sw 0x18(r1), r8
.cfi_offset 8, 0x18-EXCEPTION_STACK_SIZE
/* r9 saved in HANDLE_EXCEPTION */
.cfi_offset 9, 0x1c-EXCEPTION_STACK_SIZE
l.sw 0x20(r1), r10
.cfi_offset 10, 0x20-EXCEPTION_STACK_SIZE
l.sw 0x24(r1), r11
.cfi_offset 11, 0x24-EXCEPTION_STACK_SIZE
l.sw 0x28(r1), r12
.cfi_offset 12, 0x28-EXCEPTION_STACK_SIZE
l.sw 0x2c(r1), r13
.cfi_offset 13, 0x2c-EXCEPTION_STACK_SIZE
l.sw 0x30(r1), r14
.cfi_offset 14, 0x30-EXCEPTION_STACK_SIZE
l.sw 0x34(r1), r15
.cfi_offset 15, 0x34-EXCEPTION_STACK_SIZE
l.sw 0x38(r1), r16
.cfi_offset 16, 0x38-EXCEPTION_STACK_SIZE
l.sw 0x3c(r1), r17
.cfi_offset 17, 0x3c-EXCEPTION_STACK_SIZE
l.sw 0x40(r1), r18
.cfi_offset 18, 0x40-EXCEPTION_STACK_SIZE
l.sw 0x44(r1), r19
.cfi_offset 19, 0x44-EXCEPTION_STACK_SIZE
l.sw 0x48(r1), r20
.cfi_offset 20, 0x48-EXCEPTION_STACK_SIZE
l.sw 0x4c(r1), r21
.cfi_offset 21, 0x4c-EXCEPTION_STACK_SIZE
l.sw 0x50(r1), r22
.cfi_offset 22, 0x50-EXCEPTION_STACK_SIZE
l.sw 0x54(r1), r23
.cfi_offset 23, 0x54-EXCEPTION_STACK_SIZE
l.sw 0x58(r1), r24
.cfi_offset 24, 0x58-EXCEPTION_STACK_SIZE
l.sw 0x5c(r1), r25
.cfi_offset 25, 0x5c-EXCEPTION_STACK_SIZE
l.sw 0x60(r1), r26
.cfi_offset 26, 0x60-EXCEPTION_STACK_SIZE
l.sw 0x64(r1), r27
.cfi_offset 27, 0x64-EXCEPTION_STACK_SIZE
l.sw 0x68(r1), r28
.cfi_offset 28, 0x68-EXCEPTION_STACK_SIZE
l.sw 0x6c(r1), r29
.cfi_offset 29, 0x6c-EXCEPTION_STACK_SIZE
l.sw 0x70(r1), r30
.cfi_offset 30, 0x70-EXCEPTION_STACK_SIZE
l.sw 0x74(r1), r31
.cfi_offset 31, 0x74-EXCEPTION_STACK_SIZE
/* Save return address */
l.or r14, r0, r9
@ -210,6 +245,9 @@ _exception_handler:
l.or r4, r0, r1
/* Extract exception PC */
l.mfspr r5, r0, SPR_EPCR_BASE
/* Tell exception PC to the unwinder */
l.sw 0x78(r1), r5
.cfi_offset 32, 0x78-EXCEPTION_STACK_SIZE
/* Extract exception effective address */
l.mfspr r6, r0, SPR_EEAR_BASE
/* Extract exception SR */
@ -252,6 +290,7 @@ _exception_handler:
l.lwz r31, 0x74(r1)
l.jr r9
l.nop
.cfi_endproc
.global _cache_init
_cache_init:

View File

@ -5,24 +5,43 @@ extern crate unwind;
extern crate libc;
use unwind as uw;
use libc::c_void;
use libc::{c_void, c_int};
pub fn backtrace<F>(mut f: F) -> Result<(), uw::_Unwind_Reason_Code>
const UW_REG_SP: c_int = -2;
pub fn backtrace<F>(f: F) -> Result<(), uw::_Unwind_Reason_Code>
where F: FnMut(usize) -> ()
{
struct TraceContext<F> {
step_fn: F,
prev_sp: uw::_Unwind_Word
}
extern fn trace<F>(context: *mut uw::_Unwind_Context, arg: *mut c_void)
-> uw::_Unwind_Reason_Code
where F: FnMut(usize) -> ()
{
unsafe {
let step_fn = &mut *(arg as *mut F);
step_fn(uw::_Unwind_GetIP(context));
let trace_context = &mut *(arg as *mut TraceContext<F>);
// Detect the root of a libfringe thread
let cur_sp = uw::_Unwind_GetGR(context, UW_REG_SP);
if cur_sp == trace_context.prev_sp {
return uw::_URC_END_OF_STACK
} else {
trace_context.prev_sp = cur_sp;
}
// GetIP gives us the return address, i.e. the address after the delay slot,
// but we're interested in the call instruction.
(trace_context.step_fn)(uw::_Unwind_GetIP(context) - 2 * 4);
uw::_URC_NO_REASON
}
}
unsafe {
match uw::_Unwind_Backtrace(trace::<F>, &mut f as *mut _ as *mut c_void) {
let mut trace_context = TraceContext { step_fn: f, prev_sp: 0 };
match uw::_Unwind_Backtrace(trace::<F>, &mut trace_context as *mut _ as *mut c_void) {
uw::_URC_NO_REASON => Ok(()),
err => Err(err)
}

View File

@ -57,6 +57,7 @@ mod moninj;
mod analyzer;
fn startup() {
irq::set_mask(0);
irq::set_ie(true);
clock::init();
info!("ARTIQ runtime starting...");
@ -305,6 +306,8 @@ pub extern fn abort() {
#[lang = "panic_fmt"]
pub extern fn panic_fmt(args: core::fmt::Arguments, file: &'static str,
line: u32, column: u32) -> ! {
irq::set_ie(false);
println!("panic at {}:{}:{}: {}", file, line, column, args);
println!("backtrace for software version {}:",

View File

@ -10,8 +10,8 @@ use managed::ManagedMap;
pub struct Address(NonZeroUsize);
impl Address {
pub fn new(raw: usize) -> Option<Address> {
NonZeroUsize::new(raw).map(Address)
pub fn new(raw: usize) -> Address {
Address(NonZeroUsize::new(raw).expect("null address"))
}
pub fn as_raw(&self) -> usize {
@ -50,6 +50,10 @@ impl Profile {
self.edges.capacity() * edge_size
}
pub fn has_edges(&self) -> bool {
self.edges.is_empty()
}
pub fn hits<'a>(&'a mut self) -> ManagedMap<'a, Address, u32> {
ManagedMap::Borrowed(&mut self.hits[..])
}
@ -88,6 +92,7 @@ impl Profile {
#[cfg(has_timer1)]
mod imp {
use unwind_backtrace::backtrace;
use board_misoc::{csr, irq};
use super::{Address, Profile};
@ -192,22 +197,46 @@ mod imp {
}
}
// Skip frames: ::profiler::sample, ::exception, exception vector.
const SKIP_FRAMES: i32 = 3;
#[inline(always)] // make the top of backtrace predictable
fn record(profile: &mut Profile, pc: usize) -> Result<(), ()> {
let callee = Address::new(pc).expect("null code address");
profile.record_hit(callee)?;
let mut result = Ok(());
let mut frame = -SKIP_FRAMES;
// TODO: record edges
// If we have storage for edges, use the DWARF unwinder.
// Otherwise, don't bother and use a much faster path that just looks at EPCR.
// Also, acquiring a meaningful backtrace requires libunwind
// with the https://reviews.llvm.org/D46971 patch applied.
if profile.has_edges() {
let mut prev_pc = 0;
let _ = backtrace(|pc| {
if frame == 0 {
result = result.and_then(|()|
profile.record_hit(Address::new(pc)));
prev_pc = pc;
} else if frame > 0 {
result = result.and_then(|()|
profile.record_edge(Address::new(pc),
Address::new(prev_pc)));
}
prev_pc = pc;
frame += 1;
});
}
Ok(())
// If we couldn't get anything useful out of a backtrace, at least
// record a hit at the exception PC.
if frame <= 0 {
result = profile.record_hit(Address::new(pc));
}
result
}
#[inline(never)] // see above
pub fn sample(pc: usize) {
unsafe {
csr::timer1::ev_pending_write(1);
}
let result = {
let mut profile = Lock::take().expect("cannot lock");
record(profile.as_mut().expect("profiler not running"), pc)
@ -216,6 +245,10 @@ mod imp {
if result.is_err() {
warn!("out of space");
stop();
} else {
unsafe {
csr::timer1::ev_pending_write(1);
}
}
}
}

View File

@ -98,8 +98,8 @@ def get_argparser():
help="sampling interval, in microseconds")
p_start.add_argument("--hits-size", metavar="ENTRIES", type=int, default=8192,
help="hit buffer size")
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=0,
help="edge buffer size (edge profiling not implemented)")
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=8192,
help="edge buffer size")
p_stop = subparsers.add_parser("stop",
help="stop profiling")
@ -113,6 +113,9 @@ def get_argparser():
p_save.add_argument("--no-compression",
dest="compression", default=True, action="store_false",
help="disable profile compression")
p_save.add_argument("--no-demangle",
dest="demangle", default=True, action="store_false",
help="disable symbol demangling")
# misc debug
t_debug = tools.add_parser("debug",
@ -178,8 +181,8 @@ def main():
mgmt.stop_profiler()
elif args.action == "save":
hits, edges = mgmt.get_profile()
writer = CallgrindWriter(args.output, args.firmware,
"or1k-linux", args.compression)
writer = CallgrindWriter(args.output, args.firmware, "or1k-linux",
args.compression, args.demangle)
writer.header()
for addr, count in hits.items():
writer.hit(addr, count)