remove profiler

This commit is contained in:
Sebastien Bourdeauducq 2021-08-12 11:43:19 +08:00
parent e3edb505e3
commit 477b1516d3
7 changed files with 1 additions and 531 deletions

View File

@ -20,10 +20,6 @@ class Request(Enum):
ConfigRemove = 14 ConfigRemove = 14
ConfigErase = 15 ConfigErase = 15
StartProfiler = 9
StopProfiler = 10
GetProfile = 11
Reboot = 5 Reboot = 5
DebugAllocator = 8 DebugAllocator = 8
@ -38,8 +34,6 @@ class Reply(Enum):
ConfigData = 7 ConfigData = 7
Profile = 5
RebootImminent = 3 RebootImminent = 3
@ -189,40 +183,6 @@ class CommMgmt:
self._write_header(Request.ConfigErase) self._write_header(Request.ConfigErase)
self._read_expect(Reply.Success) self._read_expect(Reply.Success)
def start_profiler(self, interval, edges_size, hits_size):
self._write_header(Request.StartProfiler)
self._write_int32(interval)
self._write_int32(edges_size)
self._write_int32(hits_size)
self._read_expect(Reply.Success)
def stop_profiler(self):
self._write_header(Request.StopProfiler)
self._read_expect(Reply.Success)
def stop_profiler(self):
self._write_header(Request.StopProfiler)
self._read_expect(Reply.Success)
def get_profile(self):
self._write_header(Request.GetProfile)
self._read_expect(Reply.Profile)
hits = {}
for _ in range(self._read_int32()):
addr = self._read_int32()
count = self._read_int32()
hits[addr] = count
edges = {}
for _ in range(self._read_int32()):
caller = self._read_int32()
callee = self._read_int32()
count = self._read_int32()
edges[(caller, callee)] = count
return hits, edges
def reboot(self): def reboot(self):
self._write_header(Request.Reboot) self._write_header(Request.Reboot)
self._read_expect(Reply.RebootImminent) self._read_expect(Reply.RebootImminent)

View File

@ -1,92 +0,0 @@
from collections import defaultdict
import subprocess
class Symbolizer:
def __init__(self, binary, triple, demangle=True):
cmdline = [
triple + "-addr2line", "--exe=" + binary,
"--addresses", "--functions", "--inlines"
]
if demangle:
cmdline.append("--demangle=rust")
self._addr2line = subprocess.Popen(cmdline, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
universal_newlines=True)
def symbolize(self, addr):
self._addr2line.stdin.write("0x{:08x}\n0\n".format(addr))
self._addr2line.stdin.flush()
self._addr2line.stdout.readline() # 0x[addr]
result = []
while True:
function = self._addr2line.stdout.readline().rstrip()
# check for end marker
if function == "0x00000000": # 0x00000000
self._addr2line.stdout.readline() # ??
self._addr2line.stdout.readline() # ??:0
return result
file, line = self._addr2line.stdout.readline().rstrip().split(":")
result.append((function, file, line, addr))
class CallgrindWriter:
def __init__(self, output, binary, triple, compression=True, demangle=True):
self._output = output
self._binary = binary
self._current = defaultdict(lambda: None)
self._ids = defaultdict(lambda: {})
self._compression = compression
self._symbolizer = Symbolizer(binary, triple, demangle=demangle)
def _write(self, fmt, *args, **kwargs):
self._output.write(fmt.format(*args, **kwargs))
self._output.write("\n")
def _spec(self, spec, value):
if self._current[spec] == value:
return
self._current[spec] = value
if not self._compression or value == "??":
self._write("{}={}", spec, value)
return
spec_ids = self._ids[spec]
if value in spec_ids:
self._write("{}=({})", spec, spec_ids[value])
else:
spec_ids[value] = len(spec_ids) + 1
self._write("{}=({}) {}", spec, spec_ids[value], value)
def header(self):
self._write("# callgrind format")
self._write("version: 1")
self._write("creator: ARTIQ")
self._write("positions: instr line")
self._write("events: Hits")
self._write("")
self._spec("ob", self._binary)
self._spec("cob", self._binary)
def hit(self, addr, count):
for function, file, line, addr in self._symbolizer.symbolize(addr):
self._spec("fl", file)
self._spec("fn", function)
self._write("0x{:08x} {} {}", addr, line, count)
def edge(self, caller, callee, count):
edges = self._symbolizer.symbolize(callee) + self._symbolizer.symbolize(caller)
for (callee, caller) in zip(edges, edges[1:]):
function, file, line, addr = callee
self._spec("cfl", file)
self._spec("cfn", function)
self._write("calls={} 0x{:08x} {}", count, addr, line)
function, file, line, addr = caller
self._spec("fl", file)
self._spec("fn", function)
self._write("0x{:08x} {} {}", addr, line, count)

View File

@ -63,14 +63,6 @@ pub enum Request {
ConfigRemove { key: String }, ConfigRemove { key: String },
ConfigErase, ConfigErase,
StartProfiler {
interval_us: u32,
hits_size: u32,
edges_size: u32,
},
StopProfiler,
GetProfile,
Reboot, Reboot,
DebugAllocator, DebugAllocator,
@ -129,14 +121,6 @@ impl Request {
}, },
15 => Request::ConfigErase, 15 => Request::ConfigErase,
9 => Request::StartProfiler {
interval_us: reader.read_u32()?,
hits_size: reader.read_u32()?,
edges_size: reader.read_u32()?,
},
10 => Request::StopProfiler,
11 => Request::GetProfile,
5 => Request::Reboot, 5 => Request::Reboot,
8 => Request::DebugAllocator, 8 => Request::DebugAllocator,
@ -172,11 +156,6 @@ impl<'a> Reply<'a> {
writer.write_bytes(bytes)?; writer.write_bytes(bytes)?;
}, },
Reply::Profile => {
writer.write_u8(5)?;
// profile data follows
}
Reply::RebootImminent => { Reply::RebootImminent => {
writer.write_u8(3)?; writer.write_u8(3)?;
} }

View File

@ -50,7 +50,6 @@ mod cache;
mod rtio_dma; mod rtio_dma;
mod mgmt; mod mgmt;
mod profiler;
mod kernel; mod kernel;
mod kern_hwreq; mod kern_hwreq;
mod session; mod session;
@ -263,14 +262,7 @@ pub extern fn exception(vect: u32, _regs: *const u32, pc: u32, ea: u32) {
let vect = irq::Exception::try_from(vect).expect("unknown exception"); let vect = irq::Exception::try_from(vect).expect("unknown exception");
match vect { match vect {
irq::Exception::Interrupt => irq::Exception::Interrupt =>
while irq::pending_mask() != 0 { panic!("spurious irq {}", irq::pending_mask().trailing_zeros())
match () {
#[cfg(has_timer1)]
() if irq::is_pending(csr::TIMER1_INTERRUPT) =>
profiler::sample(pc as usize),
_ => panic!("spurious irq {}", irq::pending_mask().trailing_zeros())
}
},
_ => { _ => {
fn hexdump(addr: u32) { fn hexdump(addr: u32) {
let addr = (addr - addr % 4) as *const u32; let addr = (addr - addr % 4) as *const u32;

View File

@ -5,7 +5,6 @@ use board_misoc::{config, spiflash};
use logger_artiq::BufferLogger; use logger_artiq::BufferLogger;
use mgmt_proto::*; use mgmt_proto::*;
use sched::{Io, TcpListener, TcpStream, Error as SchedError}; use sched::{Io, TcpListener, TcpStream, Error as SchedError};
use profiler;
impl From<SchedError> for Error<SchedError> { impl From<SchedError> for Error<SchedError> {
fn from(value: SchedError) -> Error<SchedError> { fn from(value: SchedError) -> Error<SchedError> {
@ -103,53 +102,11 @@ fn worker(io: &Io, stream: &mut TcpStream) -> Result<(), Error<SchedError>> {
}?; }?;
} }
Request::StartProfiler { interval_us, hits_size, edges_size } => {
match profiler::start(interval_us as u64,
hits_size as usize, edges_size as usize) {
Ok(()) => Reply::Success.write_to(stream)?,
Err(()) => Reply::Unavailable.write_to(stream)?
}
}
Request::StopProfiler => {
profiler::stop();
Reply::Success.write_to(stream)?;
}
Request::GetProfile => {
profiler::pause(|profile| {
let profile = match profile {
None => return Reply::Unavailable.write_to(stream),
Some(profile) => profile
};
Reply::Profile.write_to(stream)?;
{
let hits = profile.hits();
stream.write_u32(hits.len() as u32)?;
for (&addr, &count) in hits.iter() {
stream.write_u32(addr.as_raw() as u32)?;
stream.write_u32(count)?;
}
}
{
let edges = profile.edges();
stream.write_u32(edges.len() as u32)?;
for (&(caller, callee), &count) in edges.iter() {
stream.write_u32(caller.as_raw() as u32)?;
stream.write_u32(callee.as_raw() as u32)?;
stream.write_u32(count)?;
}
}
Ok(())
})?;
}
Request::Reboot => { Request::Reboot => {
Reply::RebootImminent.write_to(stream)?; Reply::RebootImminent.write_to(stream)?;
stream.close()?; stream.close()?;
stream.flush()?; stream.flush()?;
profiler::stop();
warn!("restarting"); warn!("restarting");
unsafe { spiflash::reload(); } unsafe { spiflash::reload(); }
} }

View File

@ -1,278 +0,0 @@
#![cfg_attr(not(has_timer1), allow(dead_code))]
use core::mem;
use core::fmt;
use core::num::NonZeroUsize;
use alloc::Vec;
use managed::ManagedMap;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Address(NonZeroUsize);
impl Address {
pub fn new(raw: usize) -> Address {
Address(NonZeroUsize::new(raw).expect("null address"))
}
pub fn as_raw(&self) -> usize {
self.0.get()
}
}
pub struct Profile {
hits: Vec<Option<(Address, u32)>>,
edges: Vec<Option<((Address, Address), u32)>>,
}
impl fmt::Debug for Profile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Profile {{ hits: vec![...; {}], edges: vec![...; {}] }}",
self.hits.len(), self.edges.len())
}
}
impl Profile {
pub fn new(hits_size: usize, edges_size: usize) -> Profile {
let mut hits = vec![None; hits_size];
hits.shrink_to_fit();
let mut edges = vec![None; edges_size];
edges.shrink_to_fit();
Profile {
hits: hits.into(),
edges: edges.into(),
}
}
pub fn overhead(&self) -> usize {
let hit_size = mem::size_of::<Option<(Address, u32)>>();
let edge_size = mem::size_of::<Option<(Address, u32)>>();
self.hits.capacity() * hit_size +
self.edges.capacity() * edge_size
}
pub fn has_edges(&self) -> bool {
self.edges.is_empty()
}
pub fn hits<'a>(&'a mut self) -> ManagedMap<'a, Address, u32> {
ManagedMap::Borrowed(&mut self.hits[..])
}
pub fn edges<'a>(&'a mut self) -> ManagedMap<'a, (Address, Address), u32> {
ManagedMap::Borrowed(&mut self.edges[..])
}
pub fn record_hit(&mut self, addr: Address) -> Result<(), ()> {
let mut hits = self.hits();
if let Some(count) = hits.get_mut(&addr) {
return Ok(*count = count.saturating_add(1))
}
if let Err(_) = hits.insert(addr, 1) {
return Err(())
}
return Ok(())
}
#[allow(dead_code)]
pub fn record_edge(&mut self, caller: Address, callee: Address) -> Result<(), ()> {
let mut edges = self.edges();
if let Some(count) = edges.get_mut(&(caller, callee)) {
return Ok(*count = count.saturating_add(1))
}
if let Err(_) = edges.insert((caller, callee), 1) {
return Err(())
}
Ok(())
}
}
#[cfg(has_timer1)]
mod imp {
use unwind_backtrace::backtrace;
use board_misoc::{csr, irq};
use super::{Address, Profile};
static mut PROFILE: Option<Profile> = None;
mod lock {
use core::ops::{Deref, DerefMut};
use core::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT};
static LOCKED: AtomicUsize = ATOMIC_USIZE_INIT;
pub struct Lock;
impl Lock {
pub fn take() -> Result<Lock, ()> {
if LOCKED.swap(1, Ordering::SeqCst) != 0 {
Err(())
} else {
Ok(Lock)
}
}
}
impl Deref for Lock {
type Target = Option<super::Profile>;
fn deref(&self) -> &Option<super::Profile> {
unsafe { &super::PROFILE }
}
}
impl DerefMut for Lock {
fn deref_mut(&mut self) -> &mut Option<super::Profile> {
unsafe { &mut super::PROFILE }
}
}
impl Drop for Lock {
fn drop(&mut self) {
LOCKED.store(0, Ordering::SeqCst)
}
}
}
use self::lock::Lock;
pub fn start(interval_us: u64, hits_size: usize, edges_size: usize) -> Result<(), ()> {
stop();
let profile = Profile::new(hits_size, edges_size);
info!("starting at {}us interval using {} heap bytes",
interval_us, profile.overhead());
*Lock::take().expect("cannot lock") = Some(profile);
unsafe {
let reload = csr::CONFIG_CLOCK_FREQUENCY as u64 * interval_us / 1_000_000;
csr::timer1::load_write(reload);
csr::timer1::reload_write(reload);
csr::timer1::ev_pending_write(1);
csr::timer1::ev_enable_write(1);
irq::enable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(1);
}
Ok(())
}
pub fn stop() {
unsafe {
if csr::timer1::en_read() == 0 || csr::timer1::ev_enable_read() == 0 {
return
}
irq::disable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(0);
*Lock::take().expect("cannot lock") = None;
info!("stopped");
}
}
pub fn pause<F: FnOnce(Option<&mut Profile>) -> R, R>(f: F) -> R {
unsafe {
if csr::timer1::en_read() == 0 {
return f(None)
}
irq::disable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(0);
let result = {
let mut profile = Lock::take().expect("cannot lock");
f(profile.as_mut())
};
irq::enable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(1);
result
}
}
// Skip frames: ::profiler::sample, ::exception, exception vector.
const SKIP_FRAMES: i32 = 3;
#[inline(always)] // make the top of backtrace predictable
fn record(profile: &mut Profile, exn_pc: usize) -> Result<(), ()> {
let mut result = Ok(());
let mut frame = -SKIP_FRAMES;
// If we have storage for edges, use the DWARF unwinder.
// Otherwise, don't bother and use a much faster path that just looks at EPCR.
// Also, acquiring a meaningful backtrace requires libunwind
// with the https://reviews.llvm.org/D46971 patch applied.
if profile.has_edges() {
let mut prev_pc = 0;
let _ = backtrace(|pc| {
// Backtrace gives us the return address, i.e. the address after the delay slot,
// but we're interested in the call instruction, *except* when going through
// the frame directly below the exception frame, which has the address that's
// being executed.
let pc = if pc != exn_pc { pc - 2 * 4 } else { pc };
if frame == 0 {
result = result.and_then(|()|
profile.record_hit(Address::new(pc)));
prev_pc = pc;
} else if frame > 0 {
result = result.and_then(|()|
profile.record_edge(Address::new(pc),
Address::new(prev_pc)));
}
prev_pc = pc;
frame += 1;
});
}
// If we couldn't get anything useful out of a backtrace, at least
// record a hit at the exception PC.
if frame <= 0 {
result = profile.record_hit(Address::new(exn_pc));
}
result
}
#[inline(never)] // see above
pub fn sample(pc: usize) {
let result = {
let mut profile = Lock::take().expect("cannot lock");
record(profile.as_mut().expect("profiler not running"), pc)
};
if result.is_err() {
warn!("out of space");
stop();
} else {
unsafe {
csr::timer1::ev_pending_write(1);
}
}
}
}
#[cfg(not(has_timer1))]
mod imp {
#![allow(dead_code)]
pub fn start(_interval_us: u64, _hits_size: usize, _edges_size: usize) -> Result<(), ()> {
error!("timer not available");
Err(())
}
pub fn stop() {}
pub fn pause<F: FnOnce(Option<&mut super::Profile>) -> R, R>(f: F) -> R {
f(None)
}
pub fn sample(_pc: usize) {}
}
pub use self::imp::*;

View File

@ -9,7 +9,6 @@ from artiq import __version__ as artiq_version
from artiq.master.databases import DeviceDB from artiq.master.databases import DeviceDB
from artiq.coredevice.comm_kernel import CommKernel from artiq.coredevice.comm_kernel import CommKernel
from artiq.coredevice.comm_mgmt import CommMgmt from artiq.coredevice.comm_mgmt import CommMgmt
from artiq.coredevice.profiler import CallgrindWriter
def get_argparser(): def get_argparser():
@ -86,38 +85,6 @@ def get_argparser():
t_boot = tools.add_parser("reboot", t_boot = tools.add_parser("reboot",
help="reboot the running system") help="reboot the running system")
# profiling
t_profile = tools.add_parser("profile",
help="account for communications CPU time")
subparsers = t_profile.add_subparsers(dest="action")
subparsers.required = True
p_start = subparsers.add_parser("start",
help="start profiling")
p_start.add_argument("--interval", metavar="MICROS", type=int, default=2000,
help="sampling interval, in microseconds")
p_start.add_argument("--hits-size", metavar="ENTRIES", type=int, default=8192,
help="hit buffer size")
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=8192,
help="edge buffer size")
p_stop = subparsers.add_parser("stop",
help="stop profiling")
p_save = subparsers.add_parser("save",
help="save profile")
p_save.add_argument("output", metavar="OUTPUT", type=argparse.FileType("w"),
help="file to save profile to, in Callgrind format")
p_save.add_argument("firmware", metavar="FIRMWARE", type=str,
help="path to firmware ELF file")
p_save.add_argument("--no-compression",
dest="compression", default=True, action="store_false",
help="disable profile compression")
p_save.add_argument("--no-demangle",
dest="demangle", default=True, action="store_false",
help="disable symbol demangling")
# misc debug # misc debug
t_debug = tools.add_parser("debug", t_debug = tools.add_parser("debug",
help="specialized debug functions") help="specialized debug functions")
@ -174,21 +141,6 @@ def main():
if args.tool == "reboot": if args.tool == "reboot":
mgmt.reboot() mgmt.reboot()
if args.tool == "profile":
if args.action == "start":
mgmt.start_profiler(args.interval, args.hits_size, args.edges_size)
elif args.action == "stop":
mgmt.stop_profiler()
elif args.action == "save":
hits, edges = mgmt.get_profile()
writer = CallgrindWriter(args.output, args.firmware, "or1k-linux",
args.compression, args.demangle)
writer.header()
for addr, count in hits.items():
writer.hit(addr, count)
for (caller, callee), count in edges.items():
writer.edge(caller, callee, count)
if args.tool == "debug": if args.tool == "debug":
if args.action == "allocator": if args.action == "allocator":
mgmt.debug_allocator() mgmt.debug_allocator()