firmware: implement a sampling profiler.

Does not yet support constructing call graphs.
pull/1017/head
whitequark 2018-05-04 23:44:51 +00:00
parent 9857dfcd7d
commit b1d349cc1b
11 changed files with 554 additions and 13 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@ __pycache__/
*.elf
*.fbi
*.pcap
*.prof
.ipynb_checkpoints
/doc/manual/_build
/build

View File

@ -14,6 +14,10 @@ class Request(Enum):
SetLogFilter = 3
SetUartLogFilter = 6
StartProfiler = 9
StopProfiler = 10
GetProfile = 11
Hotswap = 4
Reboot = 5
@ -22,9 +26,12 @@ class Request(Enum):
class Reply(Enum):
Success = 1
Unavailable = 4
LogContent = 2
Profile = 5
RebootImminent = 3
@ -145,6 +152,40 @@ class CommMgmt:
self._write_int8(getattr(LogLevel, level).value)
self._read_expect(Reply.Success)
def start_profiler(self, interval, edges_size, hits_size):
self._write_header(Request.StartProfiler)
self._write_int32(interval)
self._write_int32(edges_size)
self._write_int32(hits_size)
self._read_expect(Reply.Success)
def stop_profiler(self):
self._write_header(Request.StopProfiler)
self._read_expect(Reply.Success)
def stop_profiler(self):
self._write_header(Request.StopProfiler)
self._read_expect(Reply.Success)
def get_profile(self):
self._write_header(Request.GetProfile)
self._read_expect(Reply.Profile)
hits = {}
for _ in range(self._read_int32()):
addr = self._read_int32()
count = self._read_int32()
hits[addr] = count
edges = {}
for _ in range(self._read_int32()):
caller = self._read_int32()
callee = self._read_int32()
count = self._read_int32()
edges[(caller, callee)] = count
return hits, edges
def hotswap(self, firmware):
self._write_header(Request.Hotswap)
self._write_bytes(firmware)

View File

@ -193,6 +193,11 @@ name = "managed"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "managed"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proto"
version = "0.0.0"
@ -221,6 +226,7 @@ dependencies = [
"fringe 1.1.0 (git+https://github.com/m-labs/libfringe?rev=bd23494)",
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"logger_artiq 0.0.0",
"managed 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"proto 0.0.0",
"smoltcp 0.4.0 (git+https://github.com/m-labs/smoltcp?rev=181083f)",
"std_artiq 0.0.0",
@ -300,6 +306,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2"
"checksum log_buffer 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ec57723b84bbe7bdf76aa93169c9b59e67473317c6de3a83cb2a0f8ccb2aa493"
"checksum managed 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "43e2737ecabe4ae36a68061398bf27d2bfd0763f4c3c837a398478459494c4b7"
"checksum managed 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5a31885241e61ba264d780d2e6686e7e59561c947b4581470364eb3e10102d86"
"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7"
"checksum smoltcp 0.4.0 (git+https://github.com/m-labs/smoltcp?rev=181083f)" = "<none>"
"checksum walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff"

View File

@ -14,6 +14,14 @@ pub enum Request {
#[cfg(feature = "log")]
SetUartLogFilter(log::LevelFilter),
StartProfiler {
interval_us: u32,
hits_size: u32,
edges_size: u32,
},
StopProfiler,
GetProfile,
Hotswap(Vec<u8>),
Reboot,
@ -22,9 +30,12 @@ pub enum Request {
pub enum Reply<'a> {
Success,
Unavailable,
LogContent(&'a str),
Profile,
RebootImminent,
}
@ -52,6 +63,13 @@ impl Request {
3 => Request::SetLogFilter(read_log_level_filter(reader)?),
#[cfg(feature = "log")]
6 => Request::SetUartLogFilter(read_log_level_filter(reader)?),
9 => Request::StartProfiler {
interval_us: reader.read_u32()?,
hits_size: reader.read_u32()?,
edges_size: reader.read_u32()?,
},
10 => Request::StopProfiler,
11 => Request::GetProfile,
4 => Request::Hotswap(reader.read_bytes()?),
5 => Request::Reboot,
8 => Request::DebugAllocator,
@ -65,16 +83,25 @@ impl<'a> Reply<'a> {
match *self {
Reply::Success => {
writer.write_u8(1)?;
},
}
Reply::Unavailable => {
writer.write_u8(4)?;
}
Reply::LogContent(ref log) => {
writer.write_u8(2)?;
writer.write_string(log)?;
},
}
Reply::Profile => {
writer.write_u8(5)?;
// profile data follows
}
Reply::RebootImminent => {
writer.write_u8(3)?;
},
}
}
Ok(())
}

View File

@ -17,6 +17,7 @@ build_artiq = { path = "../libbuild_artiq" }
byteorder = { version = "1.0", default-features = false }
cslice = { version = "0.3" }
log = { version = "0.4", default-features = false }
managed = { version = "0.6", default-features = false, features = ["alloc", "map"] }
board = { path = "../libboard", features = ["uart_console", "smoltcp"] }
alloc_list = { path = "../liballoc_list" }
std_artiq = { path = "../libstd_artiq", features = ["alloc", "io_error_alloc"] }

View File

@ -1,5 +1,5 @@
#![no_std]
#![feature(lang_items, alloc, global_allocator)]
#![feature(lang_items, alloc, global_allocator, try_from, nonzero, nll)]
extern crate alloc;
extern crate cslice;
@ -7,6 +7,7 @@ extern crate cslice;
extern crate log;
extern crate byteorder;
extern crate fringe;
extern crate managed;
extern crate smoltcp;
extern crate alloc_list;
@ -22,8 +23,10 @@ extern crate amp;
#[cfg(has_drtio)]
extern crate drtioaux;
use core::convert::TryFrom;
use smoltcp::wire::{EthernetAddress, IpAddress, IpCidr};
use board::irq;
use board::config;
#[cfg(has_ethmac)]
use board::ethmac;
@ -39,6 +42,7 @@ mod cache;
mod rtio_dma;
mod mgmt;
mod profiler;
mod kernel;
mod kern_hwreq;
mod watchdog;
@ -49,6 +53,7 @@ mod moninj;
mod analyzer;
fn startup() {
irq::set_ie(true);
board::clock::init();
info!("ARTIQ runtime starting...");
info!("software version {}", include_str!(concat!(env!("OUT_DIR"), "/git-describe")));
@ -271,7 +276,19 @@ pub extern fn main() -> i32 {
#[no_mangle]
pub extern fn exception(vect: u32, _regs: *const u32, pc: u32, ea: u32) {
panic!("exception {:?} at PC 0x{:x}, EA 0x{:x}", vect, pc, ea)
let vect = irq::Exception::try_from(vect).expect("unknown exception");
match vect {
irq::Exception::Interrupt =>
while irq::pending_mask() != 0 {
match () {
#[cfg(has_timer1)]
() if irq::is_pending(::board::csr::TIMER1_INTERRUPT) =>
profiler::sample(pc as usize),
_ => panic!("spurious irq {}", irq::pending_mask().trailing_zeros())
}
},
_ => panic!("exception {:?} at PC 0x{:x}, EA 0x{:x}", vect, pc, ea)
}
}
#[no_mangle]

View File

@ -6,6 +6,7 @@ use sched::Io;
use sched::{TcpListener, TcpStream};
use proto::WriteExt;
use mgmt_proto::*;
use profiler;
fn check_magic(stream: &mut TcpStream) -> io::Result<()> {
const MAGIC: &'static [u8] = b"ARTIQ management\n";
@ -30,7 +31,7 @@ fn worker(io: &Io, stream: &mut TcpStream) -> io::Result<()> {
let mut buffer = io.until_ok(|| logger.buffer())?;
Reply::LogContent(buffer.extract()).write_to(stream)
})?;
},
}
Request::ClearLog => {
BufferLogger::with(|logger| -> io::Result<()> {
@ -39,7 +40,7 @@ fn worker(io: &Io, stream: &mut TcpStream) -> io::Result<()> {
})?;
Reply::Success.write_to(stream)?;
},
}
Request::PullLog => {
BufferLogger::with(|logger| -> io::Result<()> {
@ -69,35 +70,79 @@ fn worker(io: &Io, stream: &mut TcpStream) -> io::Result<()> {
buffer.clear();
}
})?;
},
}
Request::SetLogFilter(level) => {
info!("changing log level to {}", level);
log::set_max_level(level);
Reply::Success.write_to(stream)?;
},
}
Request::SetUartLogFilter(level) => {
info!("changing UART log level to {}", level);
BufferLogger::with(|logger|
logger.set_uart_log_level(level));
Reply::Success.write_to(stream)?;
},
}
Request::StartProfiler { interval_us, hits_size, edges_size } => {
match profiler::start(interval_us as u64,
hits_size as usize, edges_size as usize) {
Ok(()) => Reply::Success.write_to(stream)?,
Err(()) => Reply::Unavailable.write_to(stream)?
}
}
Request::StopProfiler => {
profiler::stop();
Reply::Success.write_to(stream)?;
}
Request::GetProfile => {
profiler::pause(|profile| {
let profile = match profile {
None => return Reply::Unavailable.write_to(stream),
Some(profile) => profile
};
Reply::Profile.write_to(stream)?;
{
let hits = profile.hits();
stream.write_u32(hits.len() as u32)?;
for (&addr, &count) in hits.iter() {
stream.write_u32(addr.as_raw() as u32)?;
stream.write_u32(count)?;
}
}
{
let edges = profile.edges();
stream.write_u32(edges.len() as u32)?;
for (&(caller, callee), &count) in edges.iter() {
stream.write_u32(caller.as_raw() as u32)?;
stream.write_u32(callee.as_raw() as u32)?;
stream.write_u32(count)?;
}
}
Ok(())
})?;
}
Request::Hotswap(firmware) => {
profiler::stop();
warn!("hotswapping firmware");
Reply::RebootImminent.write_to(stream)?;
stream.close()?;
stream.flush()?;
unsafe { boot::hotswap(&firmware) }
},
}
Request::Reboot => {
Reply::RebootImminent.write_to(stream)?;
stream.close()?;
warn!("restarting");
unsafe { boot::reset() }
},
}
Request::DebugAllocator =>
unsafe { println!("{}", ::ALLOC) },

View File

@ -0,0 +1,242 @@
#![cfg_attr(not(has_timer1), allow(dead_code))]
use core::mem;
use core::fmt;
use core::nonzero::NonZero;
use alloc::Vec;
use managed::ManagedMap;
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Address(NonZero<usize>);
impl Address {
pub fn new(raw: usize) -> Option<Address> {
NonZero::new(raw).map(Address)
}
pub fn as_raw(&self) -> usize {
self.0.get()
}
}
pub struct Profile {
hits: Vec<Option<(Address, u32)>>,
edges: Vec<Option<((Address, Address), u32)>>,
}
impl fmt::Debug for Profile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Profile {{ hits: vec![...; {}], edges: vec![...; {}] }}",
self.hits.len(), self.edges.len())
}
}
impl Profile {
pub fn new(hits_size: usize, edges_size: usize) -> Profile {
let mut hits = vec![None; hits_size];
hits.shrink_to_fit();
let mut edges = vec![None; edges_size];
edges.shrink_to_fit();
Profile {
hits: hits.into(),
edges: edges.into(),
}
}
pub fn overhead(&self) -> usize {
let hit_size = mem::size_of::<Option<(Address, u32)>>();
let edge_size = mem::size_of::<Option<(Address, u32)>>();
self.hits.capacity() * hit_size +
self.edges.capacity() * edge_size
}
pub fn hits<'a>(&'a mut self) -> ManagedMap<'a, Address, u32> {
ManagedMap::Borrowed(&mut self.hits[..])
}
pub fn edges<'a>(&'a mut self) -> ManagedMap<'a, (Address, Address), u32> {
ManagedMap::Borrowed(&mut self.edges[..])
}
pub fn record_hit(&mut self, addr: Address) -> Result<(), ()> {
let mut hits = self.hits();
match hits.get_mut(&addr) {
Some(count) => *count = count.saturating_add(1),
None => {
if let Err(_) = hits.insert(addr, 1) {
return Err(())
}
}
}
Ok(())
}
#[allow(dead_code)]
pub fn record_edge(&mut self, caller: Address, callee: Address) -> Result<(), ()> {
let mut edges = self.edges();
match edges.get_mut(&(caller, callee)) {
Some(count) => *count = count.saturating_add(1),
None => {
if let Err(_) = edges.insert((caller, callee), 1) {
return Err(())
}
}
}
Ok(())
}
}
#[cfg(has_timer1)]
mod imp {
use board::{csr, irq};
use super::{Address, Profile};
static mut PROFILE: Option<Profile> = None;
mod lock {
use core::ops::{Deref, DerefMut};
use core::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT};
static LOCKED: AtomicUsize = ATOMIC_USIZE_INIT;
pub struct Lock;
impl Lock {
pub fn take() -> Result<Lock, ()> {
if LOCKED.swap(1, Ordering::SeqCst) != 0 {
Err(())
} else {
Ok(Lock)
}
}
}
impl Deref for Lock {
type Target = Option<super::Profile>;
fn deref(&self) -> &Option<super::Profile> {
unsafe { &super::PROFILE }
}
}
impl DerefMut for Lock {
fn deref_mut(&mut self) -> &mut Option<super::Profile> {
unsafe { &mut super::PROFILE }
}
}
impl Drop for Lock {
fn drop(&mut self) {
LOCKED.store(0, Ordering::SeqCst)
}
}
}
use self::lock::Lock;
pub fn start(interval_us: u64, hits_size: usize, edges_size: usize) -> Result<(), ()> {
stop();
let profile = Profile::new(hits_size, edges_size);
info!("starting at {}us interval using {} heap bytes",
interval_us, profile.overhead());
*Lock::take().expect("cannot lock") = Some(profile);
unsafe {
let reload = csr::CONFIG_CLOCK_FREQUENCY as u64 * interval_us / 1_000_000;
csr::timer1::load_write(reload);
csr::timer1::reload_write(reload);
csr::timer1::ev_pending_write(1);
csr::timer1::ev_enable_write(1);
irq::enable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(1);
}
Ok(())
}
pub fn stop() {
unsafe {
if csr::timer1::en_read() == 0 || csr::timer1::ev_enable_read() == 0 {
return
}
irq::disable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(0);
*Lock::take().expect("cannot lock") = None;
info!("stopped");
}
}
pub fn pause<F: FnOnce(Option<&mut Profile>) -> R, R>(f: F) -> R {
unsafe {
if csr::timer1::en_read() == 0 {
return f(None)
}
irq::disable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(0);
let result = {
let mut profile = Lock::take().expect("cannot lock");
f(profile.as_mut())
};
irq::enable(csr::TIMER1_INTERRUPT);
csr::timer1::en_write(1);
result
}
}
#[inline(always)] // make the top of backtrace predictable
fn record(profile: &mut Profile, pc: usize) -> Result<(), ()> {
let callee = Address::new(pc).expect("null code address");
profile.record_hit(callee)?;
// TODO: record edges
Ok(())
}
#[inline(never)] // see above
pub fn sample(pc: usize) {
unsafe {
csr::timer1::ev_pending_write(1);
}
let result = {
let mut profile = Lock::take().expect("cannot lock");
record(profile.as_mut().expect("profiler not running"), pc)
};
if result.is_err() {
warn!("out of space");
stop();
}
}
}
#[cfg(not(has_timer1))]
mod imp {
#![allow(dead_code)]
pub fn start(_interval_us: u64, _hits_size: usize, _edges_size: usize) -> Result<(), ()> {
error!("timer not available");
Err(())
}
pub fn stop() {}
pub fn pause<F: FnOnce(Option<&mut super::Profile>) -> R, R>(f: F) -> R {
f(None)
}
pub fn sample(_pc: usize) {}
}
pub use self::imp::*;

View File

@ -0,0 +1,155 @@
#!/usr/bin/env python3
import argparse
import sys
import struct
from collections import defaultdict
import subprocess
from artiq.tools import verbosity_args, init_logger
from artiq.master.databases import DeviceDB
from artiq.coredevice.comm_mgmt import CommMgmt
class Symbolizer:
def __init__(self, binary):
self._addr2line = subprocess.Popen([
"or1k-linux-addr2line", "--exe=" + binary,
"--addresses", "--demangle=rust", "--functions", "--inlines"
], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
def symbolize(self, addr):
self._addr2line.stdin.write("0x{:08x}\n0\n".format(addr))
self._addr2line.stdin.flush()
self._addr2line.stdout.readline() # 0x[addr]
result = []
while True:
function = self._addr2line.stdout.readline().rstrip()
# check for end marker
if function == "0x00000000": # 0x00000000
self._addr2line.stdout.readline() # ??
self._addr2line.stdout.readline() # ??:0
return result
file, line = self._addr2line.stdout.readline().rstrip().split(":")
result.append((function, file, line))
class CallgrindWriter:
def __init__(self, output, binary, compression=True):
self._output = output
self._binary = binary
self._current = defaultdict(lambda: None)
self._ids = defaultdict(lambda: {})
self._compression = compression
self._symbolizer = Symbolizer(binary)
def _write(self, fmt, *args, **kwargs):
self._output.write(fmt.format(*args, **kwargs))
self._output.write("\n")
def _spec(self, spec, value):
if self._current[spec] == value:
return
self._current[spec] = value
if not self._compression or value == "??":
self._write("{}={}", spec, value)
return
spec_ids = self._ids[spec]
if value in spec_ids:
self._write("{}=({})", spec, spec_ids[value])
else:
spec_ids[value] = len(spec_ids) + 1
self._write("{}=({}) {}", spec, spec_ids[value], value)
def header(self):
self._write("# callgrind format")
self._write("version: 1")
self._write("creator: ARTIQ")
self._write("positions: instr line")
self._write("events: Hits")
self._write("")
self._spec("ob", self._binary)
self._spec("cob", self._binary)
def hit(self, addr, count):
for function, file, line in self._symbolizer.symbolize(addr):
self._spec("fn", function)
self._spec("fl", file)
self._write("0x{:08x} {} {}", addr, line, count)
def edge(self, caller, callee, count):
function, file, line = next(self._symbolizer.symbolize(callee))
self._spec("cfn", function)
self._spec("cfl", file)
self._write("calls={} 0x{:08x} {}", count, callee, line)
function, file, line = next(self._symbolizer.symbolize(caller))
self._spec("fn", function)
self._spec("fl", file)
self._write("0x{:08x} {} {}", caller, line, count)
def get_argparser():
parser = argparse.ArgumentParser(description="ARTIQ core device profiling tool")
verbosity_args(parser)
parser.add_argument("--device-db", default="device_db.py",
help="device database file (default: '%(default)s')")
subparsers = parser.add_subparsers(dest="action")
subparsers.required = True
p_start = subparsers.add_parser("start",
help="start profiling")
p_start.add_argument("--interval", metavar="MICROS", type=int, default=2000,
help="sampling interval, in microseconds")
p_start.add_argument("--hits-size", metavar="ENTRIES", type=int, default=8192,
help="hit buffer size")
p_start.add_argument("--edges-size", metavar="ENTRIES", type=int, default=0,
help="edge buffer size (edge profiling not implemented)")
p_stop = subparsers.add_parser("stop",
help="stop profiling")
p_save = subparsers.add_parser("save",
help="save profile")
p_save.add_argument("output", metavar="OUTPUT", type=argparse.FileType("w"),
help="file to save profile to, in Callgrind format")
p_save.add_argument("firmware", metavar="FIRMWARE", type=str,
help="path to firmware ELF file")
p_save.add_argument("--no-compression", default=True, action='store_false',
help="disable profile compression")
return parser
def main():
args = get_argparser().parse_args()
init_logger(args)
core_addr = DeviceDB(args.device_db).get("core")["arguments"]["host"]
mgmt = CommMgmt(core_addr)
try:
if args.action == "start":
mgmt.start_profiler(args.interval, args.hits_size, args.edges_size)
elif args.action == "stop":
mgmt.stop_profiler()
elif args.action == "save":
hits, edges = mgmt.get_profile()
writer = CallgrindWriter(args.output, args.firmware, not args.no_compression)
writer.header()
for addr, count in hits.items():
writer.hit(addr, count)
for (caller, callee), count in edges.items():
writer.edge(caller, callee, count)
finally:
mgmt.close()
if __name__ == "__main__":
main()

View File

@ -10,7 +10,7 @@ from migen.build.xilinx.vivado import XilinxVivadoToolchain
from migen.build.xilinx.ise import XilinxISEToolchain
from misoc.interconnect.csr import *
from misoc.cores import gpio
from misoc.cores import gpio, timer
from misoc.targets.kc705 import MiniSoC, soc_kc705_args, soc_kc705_argdict
from misoc.integration.builder import builder_args, builder_argdict
@ -231,6 +231,10 @@ class _StandaloneBase(MiniSoC, AMPSoC):
if isinstance(self.platform.toolchain, XilinxISEToolchain):
self.platform.toolchain.bitgen_opt += " -g compress"
self.submodules.timer1 = timer.Timer()
self.csr_devices.append("timer1")
self.interrupt_devices.append("timer1")
self.submodules.leds = gpio.GPIOOut(Cat(
self.platform.request("user_led", 0),
self.platform.request("user_led", 1)))

View File

@ -26,6 +26,7 @@ console_scripts = [
"artiq_corelog = artiq.frontend.artiq_corelog:main",
"artiq_coreboot = artiq.frontend.artiq_coreboot:main",
"artiq_coredebug = artiq.frontend.artiq_coredebug:main",
"artiq_coreprofile = artiq.frontend.artiq_coreprofile:main",
"artiq_ctlmgr = artiq.frontend.artiq_ctlmgr:main",
"artiq_devtool = artiq.frontend.artiq_devtool:main",
"artiq_pcap = artiq.frontend.artiq_pcap:main",