From 2cfa6091f315362f1e1b2c919b78349434dbdf2b Mon Sep 17 00:00:00 2001 From: David Mak Date: Sat, 1 Jun 2024 15:10:43 +0800 Subject: [PATCH] core: WIP - Add tracer runtime --- nac3artiq/src/lib.rs | 10 ++- nac3core/Cargo.toml | 4 + nac3core/build.rs | 42 +++++++--- nac3core/src/codegen/extern_fns.rs | 52 ++++++++++++- nac3core/src/codegen/mod.rs | 15 ++++ nac3core/src/codegen/test.rs | 17 +++- nac3core/src/codegen/tracert/mod.rs | 103 +++++++++++++++++++++++++ nac3core/src/codegen/tracert/tracert.c | 2 + nac3standalone/Cargo.toml | 3 + nac3standalone/demo/demo.c | 47 +++++++++-- nac3standalone/src/main.rs | 34 ++++++-- 11 files changed, 300 insertions(+), 29 deletions(-) create mode 100644 nac3core/src/codegen/tracert/mod.rs create mode 100644 nac3core/src/codegen/tracert/tracert.c diff --git a/nac3artiq/src/lib.rs b/nac3artiq/src/lib.rs index 04344e2..72ad23d 100644 --- a/nac3artiq/src/lib.rs +++ b/nac3artiq/src/lib.rs @@ -47,6 +47,7 @@ use parking_lot::{Mutex, RwLock}; use nac3core::{ codegen::irrt::load_irrt, + codegen::tracert::TraceRuntimeConfig, codegen::{concrete_type::ConcreteTypeStore, CodeGenTask, WithCall, WorkerRegistry}, symbol_resolver::SymbolResolver, toplevel::{ @@ -632,8 +633,13 @@ impl Nac3 { let membuffer = membuffers.clone(); py.allow_threads(|| { - let (registry, handles) = - WorkerRegistry::create_workers(threads, top_level.clone(), &self.llvm_options, &f); + let (registry, handles) = WorkerRegistry::create_workers( + threads, + top_level.clone(), + &self.llvm_options, + &TraceRuntimeConfig::default(), + &f, + ); registry.add_task(task); registry.wait_tasks_complete(handles); diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 724e0c8..c57546f 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["M-Labs"] edition = "2021" +[features] +tracing = [] + [dependencies] itertools = "0.13" crossbeam = "0.8" @@ -25,4 +28,5 @@ indoc = "2.0" insta = "=1.11.0" [build-dependencies] +itertools = "0.13" regex = "1.10" diff --git a/nac3core/build.rs b/nac3core/build.rs index 91060f9..cc40718 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -1,4 +1,4 @@ -use regex::Regex; +use std::ffi::OsStr; use std::{ env, fs::File, @@ -7,16 +7,23 @@ use std::{ process::{Command, Stdio}, }; -fn main() { - const FILE: &str = "src/codegen/irrt/irrt.c"; +use itertools::Itertools; +use regex::Regex; +/// Extracts the extension-less filename from a [`Path`]. +fn path_to_extless_filename(path: &Path) -> &str { + path.file_name().map(Path::new).and_then(Path::file_stem).and_then(OsStr::to_str).unwrap() +} + +/// Compiles a source C file into LLVM bitcode. +fn compile_file_to_ir(path: &Path, filename_without_ext: &str) { /* * HACK: Sadly, clang doesn't let us emit generic LLVM bitcode. * Compiling for WASM32 and filtering the output with regex is the closest we can get. */ let flags: &[&str] = &[ "--target=wasm32", - FILE, + path.to_str().unwrap(), "-fno-discard-value-names", match env::var("PROFILE").as_deref() { Ok("debug") => "-O0", @@ -31,7 +38,7 @@ fn main() { "-", ]; - println!("cargo:rerun-if-changed={FILE}"); + println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); let out_dir = env::var("OUT_DIR").unwrap(); let out_path = Path::new(&out_dir); @@ -44,12 +51,11 @@ fn main() { }) .unwrap(); - // https://github.com/rust-lang/regex/issues/244 - let output = std::str::from_utf8(&output.stdout).unwrap().replace("\r\n", "\n"); + let output = std::str::from_utf8(&output.stdout).unwrap(); let mut filtered_output = String::with_capacity(output.len()); let regex_filter = Regex::new(r"(?ms:^define.*?\}$)|(?m:^declare.*?$)").unwrap(); - for f in regex_filter.captures_iter(&output) { + for f in regex_filter.captures_iter(output) { assert_eq!(f.len(), 1); filtered_output.push_str(&f[0]); filtered_output.push('\n'); @@ -61,18 +67,32 @@ fn main() { println!("cargo:rerun-if-env-changed=DEBUG_DUMP_IRRT"); if env::var("DEBUG_DUMP_IRRT").is_ok() { - let mut file = File::create(out_path.join("irrt.ll")).unwrap(); + let mut file = File::create(out_path.join(format!("{filename_without_ext}.ll"))).unwrap(); file.write_all(output.as_bytes()).unwrap(); - let mut file = File::create(out_path.join("irrt-filtered.ll")).unwrap(); + let mut file = + File::create(out_path.join(format!("{filename_without_ext}-filtered.ll"))).unwrap(); file.write_all(filtered_output.as_bytes()).unwrap(); } let mut llvm_as = Command::new("llvm-as-irrt") .stdin(Stdio::piped()) .arg("-o") - .arg(out_path.join("irrt.bc")) + .arg(out_path.join(format!("{filename_without_ext}.bc"))) .spawn() .unwrap(); llvm_as.stdin.as_mut().unwrap().write_all(filtered_output.as_bytes()).unwrap(); assert!(llvm_as.wait().unwrap().success()); } + +fn main() { + const IRRT_SOURCE_PATHS: &[&str] = + &["src/codegen/irrt/irrt.c", "src/codegen/tracert/tracert.c"]; + + assert!(IRRT_SOURCE_PATHS.iter().map(Path::new).map(path_to_extless_filename).all_unique()); + + for path in IRRT_SOURCE_PATHS { + let path = Path::new(path); + + compile_file_to_ir(path, path_to_extless_filename(path)) + } +} diff --git a/nac3core/src/codegen/extern_fns.rs b/nac3core/src/codegen/extern_fns.rs index c22d69d..57db00e 100644 --- a/nac3core/src/codegen/extern_fns.rs +++ b/nac3core/src/codegen/extern_fns.rs @@ -1,6 +1,8 @@ use inkwell::attributes::{Attribute, AttributeLoc}; -use inkwell::values::{BasicValueEnum, CallSiteValue, FloatValue, IntValue}; -use itertools::Either; +use inkwell::values::{BasicValue, BasicValueEnum, CallSiteValue, FloatValue, IntValue}; +use inkwell::AddressSpace; +use itertools::{Either, Itertools}; +use std::iter::once; use crate::codegen::CodeGenContext; @@ -611,3 +613,49 @@ pub fn call_nextafter<'ctx>( .map(Either::unwrap_left) .unwrap() } + +/// Invokes the `printf` function. +pub fn call_printf<'ctx>( + ctx: &CodeGenContext<'ctx, '_>, + format: &str, + args: &[BasicValueEnum<'ctx>], +) -> IntValue<'ctx> { + const FN_NAME: &str = "printf"; + + let llvm_i8 = ctx.ctx.i8_type(); + let llvm_i32 = ctx.ctx.i32_type(); + let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default()); + + let extern_fn = ctx.module.get_function(FN_NAME).unwrap_or_else(|| { + let fn_type = llvm_i32.fn_type(&[llvm_pi8.into()], true); + let func = ctx.module.add_function(FN_NAME, fn_type, None); + for attr in ["nofree", "nounwind"] { + func.add_attribute( + AttributeLoc::Function, + ctx.ctx.create_enum_attribute(Attribute::get_named_enum_kind_id(attr), 0), + ); + } + + func + }); + + let pformat = ctx + .builder + .build_global_string_ptr(&format!("{format}\0"), "") + .map(|v| v.as_basic_value_enum()) + .map(BasicValueEnum::into_pointer_value) + .unwrap(); + + let fn_args = once(&pformat.as_basic_value_enum()) + .chain(args) + .copied() + .map(BasicValueEnum::into) + .collect_vec(); + + ctx.builder + .build_call(extern_fn, fn_args.as_slice(), "") + .map(CallSiteValue::try_as_basic_value) + .map(|v| v.map_left(BasicValueEnum::into_int_value)) + .map(Either::unwrap_left) + .unwrap() +} diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 517c5c9..9be83eb 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -1,5 +1,6 @@ use crate::{ codegen::classes::{ListType, NDArrayType, ProxyType, RangeType}, + codegen::tracert::{TraceRuntimeConfig, TraceRuntimeState}, symbol_resolver::{StaticValue, SymbolResolver}, toplevel::{helper::PrimDef, numpy::unpack_ndarray_var_tys, TopLevelContext, TopLevelDef}, typecheck::{ @@ -43,6 +44,7 @@ pub mod irrt; pub mod llvm_intrinsics; pub mod numpy; pub mod stmt; +pub mod tracert; #[cfg(test)] mod test; @@ -190,6 +192,8 @@ pub struct CodeGenContext<'ctx, 'a> { /// See [need_sret]. pub need_sret: bool, + pub tracert_state: Option, + /// The current source location. pub current_loc: Location, } @@ -237,6 +241,8 @@ pub struct WorkerRegistry { /// LLVM-related options for code generation. pub llvm_options: CodeGenLLVMOptions, + + tracert_config: TraceRuntimeConfig, } impl WorkerRegistry { @@ -246,6 +252,7 @@ impl WorkerRegistry { generators: Vec>, top_level_ctx: Arc, llvm_options: &CodeGenLLVMOptions, + tracert_config: &TraceRuntimeConfig, f: &Arc, ) -> (Arc, Vec>) { let (sender, receiver) = unbounded(); @@ -267,6 +274,7 @@ impl WorkerRegistry { wait_condvar, top_level_ctx, llvm_options: llvm_options.clone(), + tracert_config: tracert_config.clone(), }); let mut handles = Vec::new(); @@ -882,6 +890,13 @@ pub fn gen_func_impl< unifier, static_value_store, need_sret: has_sret, + tracert_state: if cfg!(feature = "tracing") + || registry.tracert_config.enabled_tags.is_empty() + { + None + } else { + Some(TraceRuntimeState::create(registry.tracert_config.clone())) + }, current_loc: Location::default(), debug_info: (dibuilder, compile_unit, func_scope.as_debug_info_scope()), }; diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index 22416a1..751c250 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -2,6 +2,7 @@ use crate::{ codegen::{ classes::{ListType, NDArrayType, ProxyType, RangeType}, concrete_type::ConcreteTypeStore, + tracert::TraceRuntimeConfig, CodeGenContext, CodeGenLLVMOptions, CodeGenTargetMachineOptions, CodeGenTask, CodeGenerator, DefaultCodeGenerator, WithCall, WorkerRegistry, }, @@ -226,7 +227,13 @@ fn test_primitives() { opt_level: OptimizationLevel::Default, target: CodeGenTargetMachineOptions::from_host_triple(), }; - let (registry, handles) = WorkerRegistry::create_workers(threads, top_level, &llvm_options, &f); + let (registry, handles) = WorkerRegistry::create_workers( + threads, + top_level, + &llvm_options, + &TraceRuntimeConfig::default(), + &f, + ); registry.add_task(task); registry.wait_tasks_complete(handles); } @@ -411,7 +418,13 @@ fn test_simple_call() { opt_level: OptimizationLevel::Default, target: CodeGenTargetMachineOptions::from_host_triple(), }; - let (registry, handles) = WorkerRegistry::create_workers(threads, top_level, &llvm_options, &f); + let (registry, handles) = WorkerRegistry::create_workers( + threads, + top_level, + &llvm_options, + &TraceRuntimeConfig::default(), + &f, + ); registry.add_task(task); registry.wait_tasks_complete(handles); } diff --git a/nac3core/src/codegen/tracert/mod.rs b/nac3core/src/codegen/tracert/mod.rs new file mode 100644 index 0000000..a134045 --- /dev/null +++ b/nac3core/src/codegen/tracert/mod.rs @@ -0,0 +1,103 @@ +use crate::codegen::{extern_fns, CodeGenContext}; +use inkwell::context::Context; +use inkwell::memory_buffer::MemoryBuffer; +use inkwell::module::Module; +use inkwell::values::BasicValueEnum; +use inkwell::AtomicOrdering; + +#[derive(Clone, Default, Eq, PartialEq)] +pub struct TraceRuntimeConfig { + pub enabled_tags: Vec, +} + +#[derive(Eq, Clone, PartialEq)] +pub struct TraceRuntimeState { + config: TraceRuntimeConfig, + indent: usize, +} + +impl TraceRuntimeState { + #[must_use] + pub fn create(config: TraceRuntimeConfig) -> TraceRuntimeState { + TraceRuntimeState { config, indent: 0 } + } +} + +#[must_use] +pub fn load_tracert<'ctx>(ctx: &'ctx Context, config: &TraceRuntimeConfig) -> Option> { + if cfg!(feature = "tracing") && !config.enabled_tags.is_empty() { + let bitcode_buf = MemoryBuffer::create_from_memory_range( + include_bytes!(concat!(env!("OUT_DIR"), "/tracert.bc")), + "tracert_bitcode_buffer", + ); + let module = Module::parse_bitcode_from_buffer(&bitcode_buf, ctx).unwrap(); + + return Some(module); + } + + None +} + +// TODO: Might need to redesign how trace logging should be done + +pub fn trace_log<'ctx>( + ctx: &mut CodeGenContext<'ctx, '_>, + tag: &'static str, + format: &'static str, + args: &[BasicValueEnum<'ctx>], +) { + if ctx.tracert_state.is_none() { + return; + } + + // TODO: Add indentation + let str = format!("[TRACING] {tag} - {format}\n\0"); + extern_fns::call_printf(ctx, &str, args); +} + +pub fn trace_log_with_location<'ctx>( + ctx: &CodeGenContext<'ctx, '_>, + tag: &'static str, + format: &str, + args: &[BasicValueEnum<'ctx>], + file: &'static str, + line: u32, + column: u32, +) { + if ctx.tracert_state.is_none() { + return; + } + + // TODO: Add indentation + let str = format!("[TRACING] {file}:{line}:{column}:{tag} - {format}\n\0"); + extern_fns::call_printf(ctx, &str, args); +} + +pub fn trace_push_level(ctx: &mut CodeGenContext<'_, '_>) { + let Some(tracert_state) = &mut ctx.tracert_state else { + return; + }; + + debug_assert!(tracert_state.indent < usize::MAX); + if tracert_state.indent < usize::MAX { + tracert_state.indent = tracert_state.indent.saturating_add(1); + } +} + +pub fn trace_pop_level(ctx: &mut CodeGenContext<'_, '_>) { + let Some(tracert_state) = &mut ctx.tracert_state else { + return; + }; + + debug_assert!(tracert_state.indent > 0); + if tracert_state.indent > 0 { + tracert_state.indent = tracert_state.indent.saturating_sub(1); + } +} + +#[inline] +pub fn mfence(ctx: &mut CodeGenContext<'_, '_>) { + if ctx.tracert_state.is_some() { + ctx.builder.build_fence(AtomicOrdering::SequentiallyConsistent, 0, "").unwrap(); + } +} diff --git a/nac3core/src/codegen/tracert/tracert.c b/nac3core/src/codegen/tracert/tracert.c new file mode 100644 index 0000000..823e48f --- /dev/null +++ b/nac3core/src/codegen/tracert/tracert.c @@ -0,0 +1,2 @@ +// stdio.h +int printf(const char *format, ...); diff --git a/nac3standalone/Cargo.toml b/nac3standalone/Cargo.toml index a55a26b..c2ed0b1 100644 --- a/nac3standalone/Cargo.toml +++ b/nac3standalone/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" authors = ["M-Labs"] edition = "2021" +[features] +tracing = ["nac3core/tracing"] + [dependencies] parking_lot = "0.12" nac3parser = { path = "../nac3parser" } diff --git a/nac3standalone/demo/demo.c b/nac3standalone/demo/demo.c index e674a5f..ac9f6bd 100644 --- a/nac3standalone/demo/demo.c +++ b/nac3standalone/demo/demo.c @@ -73,13 +73,20 @@ void output_int32_list(struct cslice *slice) { putchar('\n'); } -void output_str(struct cslice *slice) { - const char *data = (const char *) slice->data; +void output_str_impl(const struct cslice* slice, bool newline) { + const char *data = (const char *) slice->data; - for (usize i = 0; i < slice->len; ++i) { - putchar(data[i]); - } - putchar('\n'); + for (usize i = 0; i < slice->len; ++i) { + putchar(data[i]); + } + + if (newline) { + putchar('\n'); + } +} + +void output_str(struct cslice *slice) { + output_str_impl(slice, true); } uint64_t dbg_stack_address(__attribute__((unused)) struct cslice *slice) { @@ -94,8 +101,34 @@ uint32_t __nac3_personality(uint32_t state, uint32_t exception_object, uint32_t __builtin_unreachable(); } +struct exception { + uint32_t name; + struct cslice file; + uint32_t line; + uint32_t col; + struct cslice func; + struct cslice message; + uint64_t param0; + uint64_t param1; + uint64_t param2; +}; + +void output_exception(const struct exception *ex) { + fputs("exception { location: ", stdout); + output_str_impl(&ex->file, false); + printf(":%u:%u, func: ", ex->line, ex->col); + output_str_impl(&ex->func, false); + fputs(", message: ", stdout); + output_str_impl(&ex->message, false); + printf(", params: [%lu, %lu, %lu] }\n", ex->param0, ex->param1, ex->param2); +} + uint32_t __nac3_raise(uint32_t state, uint32_t exception_object, uint32_t context) { - printf("__nac3_raise(state: %u, exception_object: %u, context: %u)\n", state, exception_object, context); + printf("__nac3_raise(state: %#10x, exception_object: %#10x, context: %#10x)\n", state, exception_object, context); + + struct exception *ex = (struct exception *)((0x7ffffffful << 16) | state); + output_exception(ex); + exit(101); __builtin_unreachable(); } diff --git a/nac3standalone/src/main.rs b/nac3standalone/src/main.rs index 128c8d1..31eea5b 100644 --- a/nac3standalone/src/main.rs +++ b/nac3standalone/src/main.rs @@ -20,8 +20,11 @@ use std::{collections::HashMap, fs, path::Path, sync::Arc}; use nac3core::{ codegen::{ - concrete_type::ConcreteTypeStore, irrt::load_irrt, CodeGenLLVMOptions, - CodeGenTargetMachineOptions, CodeGenTask, DefaultCodeGenerator, WithCall, WorkerRegistry, + concrete_type::ConcreteTypeStore, + irrt::load_irrt, + tracert::{load_tracert, TraceRuntimeConfig}, + CodeGenLLVMOptions, CodeGenTargetMachineOptions, CodeGenTask, DefaultCodeGenerator, + WithCall, WorkerRegistry, }, symbol_resolver::SymbolResolver, toplevel::{ @@ -76,6 +79,9 @@ struct CommandLineArgs { /// Additional target features to enable/disable, specified using the `+`/`-` prefixes. #[arg(long)] target_features: Option, + + #[arg(long)] + trace: Vec, } fn handle_typevar_definition( @@ -242,8 +248,16 @@ fn main() { const SIZE_T: u32 = usize::BITS; let cli = CommandLineArgs::parse(); - let CommandLineArgs { file_name, threads, opt_level, emit_llvm, triple, mcpu, target_features } = - cli; + let CommandLineArgs { + file_name, + threads, + opt_level, + emit_llvm, + triple, + mcpu, + target_features, + trace, + } = cli; Target::initialize_all(&InitializationConfig::default()); @@ -272,6 +286,7 @@ fn main() { // The default behavior for -O where n>3 defaults to O3 for both Clang and GCC _ => OptimizationLevel::Aggressive, }; + let tracert_config = TraceRuntimeConfig { enabled_tags: trace }; let program = match fs::read_to_string(file_name.clone()) { Ok(program) => program, @@ -393,7 +408,8 @@ fn main() { let threads = (0..threads) .map(|i| Box::new(DefaultCodeGenerator::new(format!("module{i}"), SIZE_T))) .collect(); - let (registry, handles) = WorkerRegistry::create_workers(threads, top_level, &llvm_options, &f); + let (registry, handles) = + WorkerRegistry::create_workers(threads, top_level, &llvm_options, &tracert_config, &f); registry.add_task(task); registry.wait_tasks_complete(handles); @@ -424,6 +440,14 @@ fn main() { } main.link_in_module(irrt).unwrap(); + if let Some(tracert) = load_tracert(&context, &tracert_config) { + if emit_llvm { + tracert.write_bitcode_to_path(Path::new("tracert.bc")); + } + + main.link_in_module(tracert).unwrap(); + } + let mut function_iter = main.get_first_function(); while let Some(func) = function_iter { if func.count_basic_blocks() > 0 && func.get_name().to_str().unwrap() != "run" {