From e00ff7f4e66e3e7dc8ce0035787732c75d5d7035 Mon Sep 17 00:00:00 2001 From: lyken Date: Fri, 12 Jul 2024 21:52:55 +0800 Subject: [PATCH 1/9] core/irrt: comment build.rs & move irrt to nac3core/irrt --- nac3core/build.rs | 33 +++++++++++++++++------- nac3core/{src/codegen => }/irrt/irrt.cpp | 0 2 files changed, 23 insertions(+), 10 deletions(-) rename nac3core/{src/codegen => }/irrt/irrt.cpp (100%) diff --git a/nac3core/build.rs b/nac3core/build.rs index 38e3382f..f83b7bc7 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -8,7 +8,11 @@ use std::{ }; fn main() { - const FILE: &str = "src/codegen/irrt/irrt.cpp"; + let out_dir = env::var("OUT_DIR").unwrap(); + let out_dir = Path::new(&out_dir); + let irrt_dir = Path::new("irrt"); + + let irrt_cpp_path = irrt_dir.join("irrt.cpp"); /* * HACK: Sadly, clang doesn't let us emit generic LLVM bitcode. @@ -16,7 +20,6 @@ fn main() { */ let flags: &[&str] = &[ "--target=wasm32", - FILE, "-x", "c++", "-fno-discard-value-names", @@ -33,12 +36,13 @@ fn main() { "-Wextra", "-o", "-", + irrt_cpp_path.to_str().unwrap(), ]; - println!("cargo:rerun-if-changed={FILE}"); - let out_dir = env::var("OUT_DIR").unwrap(); - let out_path = Path::new(&out_dir); + // Tell Cargo to rerun if any file under `irrt_dir` (recursive) changes + println!("cargo:rerun-if-changed={}", irrt_dir.to_str().unwrap()); + // Compile IRRT and capture the LLVM IR output let output = Command::new("clang-irrt") .args(flags) .output() @@ -52,6 +56,11 @@ fn main() { let output = std::str::from_utf8(&output.stdout).unwrap().replace("\r\n", "\n"); let mut filtered_output = String::with_capacity(output.len()); + // Filter out irrelevant IR + // + // Regex: + // - `(?ms:^define.*?\}$)` captures LLVM `define` blocks + // - `(?m:^declare.*?$)` captures LLVM `declare` lines let regex_filter = Regex::new(r"(?ms:^define.*?\}$)|(?m:^declare.*?$)").unwrap(); for f in regex_filter.captures_iter(&output) { assert_eq!(f.len(), 1); @@ -63,18 +72,22 @@ fn main() { .unwrap() .replace_all(&filtered_output, ""); - println!("cargo:rerun-if-env-changed=DEBUG_DUMP_IRRT"); - if env::var("DEBUG_DUMP_IRRT").is_ok() { - let mut file = File::create(out_path.join("irrt.ll")).unwrap(); + // For debugging + // Doing `DEBUG_DUMP_IRRT=1 cargo build -p nac3core` dumps the LLVM IR generated + const DEBUG_DUMP_IRRT: &str = "DEBUG_DUMP_IRRT"; + println!("cargo:rerun-if-env-changed={DEBUG_DUMP_IRRT}"); + if env::var(DEBUG_DUMP_IRRT).is_ok() { + let mut file = File::create(out_dir.join("irrt.ll")).unwrap(); file.write_all(output.as_bytes()).unwrap(); - let mut file = File::create(out_path.join("irrt-filtered.ll")).unwrap(); + + let mut file = File::create(out_dir.join("irrt-filtered.ll")).unwrap(); file.write_all(filtered_output.as_bytes()).unwrap(); } let mut llvm_as = Command::new("llvm-as-irrt") .stdin(Stdio::piped()) .arg("-o") - .arg(out_path.join("irrt.bc")) + .arg(out_dir.join("irrt.bc")) .spawn() .unwrap(); llvm_as.stdin.as_mut().unwrap().write_all(filtered_output.as_bytes()).unwrap(); diff --git a/nac3core/src/codegen/irrt/irrt.cpp b/nac3core/irrt/irrt.cpp similarity index 100% rename from nac3core/src/codegen/irrt/irrt.cpp rename to nac3core/irrt/irrt.cpp -- 2.44.1 From 4065f984479c1f3484187f877bd7d0321476edc3 Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 27 Aug 2024 10:27:31 +0800 Subject: [PATCH 2/9] add .clang-format --- .clang-format | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..9fc7f42a --- /dev/null +++ b/.clang-format @@ -0,0 +1,32 @@ +BasedOnStyle: LLVM + +Language: Cpp +Standard: Cpp11 + +AccessModifierOffset: -1 +AlignEscapedNewlines: Left +AlwaysBreakAfterReturnType: None +AlwaysBreakTemplateDeclarations: Yes +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Inline +BinPackParameters: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: AfterColon +BreakInheritanceList: AfterColon +ColumnLimit: 120 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ContinuationIndentWidth: 4 +DerivePointerAlignment: false +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 4 +MaxEmptyLinesToKeep: 1 +PointerAlignment: Left +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: true +SpaceAfterTemplateKeyword: false +SpacesBeforeTrailingComments: 2 +TabWidth: 4 +UseTab: Never -- 2.44.1 From aabd6826647dec7e8963e0f79c644721df950bc9 Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 27 Aug 2024 10:27:54 +0800 Subject: [PATCH 3/9] core/irrt: reformat --- nac3core/irrt/irrt.cpp | 230 ++++++++++++++--------------------------- 1 file changed, 76 insertions(+), 154 deletions(-) diff --git a/nac3core/irrt/irrt.cpp b/nac3core/irrt/irrt.cpp index 6032518d..ce8ce625 100644 --- a/nac3core/irrt/irrt.cpp +++ b/nac3core/irrt/irrt.cpp @@ -11,19 +11,19 @@ using NDIndex = uint32_t; using SliceIndex = int32_t; namespace { -template +template const T& max(const T& a, const T& b) { return a > b ? a : b; } -template +template const T& min(const T& a, const T& b) { return a > b ? b : a; } // adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c // need to make sure `exp >= 0` before calling this function -template +template T __nac3_int_exp_impl(T base, T exp) { T res = 1; /* repeated squaring method */ @@ -37,13 +37,8 @@ T __nac3_int_exp_impl(T base, T exp) { return res; } -template -SizeT __nac3_ndarray_calc_size_impl( - const SizeT* list_data, - SizeT list_len, - SizeT begin_idx, - SizeT end_idx -) { +template +SizeT __nac3_ndarray_calc_size_impl(const SizeT* list_data, SizeT list_len, SizeT begin_idx, SizeT end_idx) { __builtin_assume(end_idx <= list_len); SizeT num_elems = 1; @@ -55,13 +50,8 @@ SizeT __nac3_ndarray_calc_size_impl( return num_elems; } -template -void __nac3_ndarray_calc_nd_indices_impl( - SizeT index, - const SizeT* dims, - SizeT num_dims, - NDIndex* idxs -) { +template +void __nac3_ndarray_calc_nd_indices_impl(SizeT index, const SizeT* dims, SizeT num_dims, NDIndex* idxs) { SizeT stride = 1; for (SizeT dim = 0; dim < num_dims; dim++) { SizeT i = num_dims - dim - 1; @@ -71,13 +61,8 @@ void __nac3_ndarray_calc_nd_indices_impl( } } -template -SizeT __nac3_ndarray_flatten_index_impl( - const SizeT* dims, - SizeT num_dims, - const NDIndex* indices, - SizeT num_indices -) { +template +SizeT __nac3_ndarray_flatten_index_impl(const SizeT* dims, SizeT num_dims, const NDIndex* indices, SizeT num_indices) { SizeT idx = 0; SizeT stride = 1; for (SizeT i = 0; i < num_dims; ++i) { @@ -92,14 +77,12 @@ SizeT __nac3_ndarray_flatten_index_impl( return idx; } -template -void __nac3_ndarray_calc_broadcast_impl( - const SizeT* lhs_dims, - SizeT lhs_ndims, - const SizeT* rhs_dims, - SizeT rhs_ndims, - SizeT* out_dims -) { +template +void __nac3_ndarray_calc_broadcast_impl(const SizeT* lhs_dims, + SizeT lhs_ndims, + const SizeT* rhs_dims, + SizeT rhs_ndims, + SizeT* out_dims) { SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; for (SizeT i = 0; i < max_ndims; ++i) { @@ -123,13 +106,11 @@ void __nac3_ndarray_calc_broadcast_impl( } } -template -void __nac3_ndarray_calc_broadcast_idx_impl( - const SizeT* src_dims, - SizeT src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { +template +void __nac3_ndarray_calc_broadcast_idx_impl(const SizeT* src_dims, + SizeT src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { for (SizeT i = 0; i < src_ndims; ++i) { SizeT src_i = src_ndims - i - 1; out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; @@ -138,17 +119,14 @@ void __nac3_ndarray_calc_broadcast_idx_impl( } // namespace extern "C" { -#define DEF_nac3_int_exp_(T) \ - T __nac3_int_exp_##T(T base, T exp) {\ - return __nac3_int_exp_impl(base, exp);\ +#define DEF_nac3_int_exp_(T) \ + T __nac3_int_exp_##T(T base, T exp) { \ + return __nac3_int_exp_impl(base, exp); \ } -DEF_nac3_int_exp_(int32_t) -DEF_nac3_int_exp_(int64_t) -DEF_nac3_int_exp_(uint32_t) -DEF_nac3_int_exp_(uint64_t) +DEF_nac3_int_exp_(int32_t) DEF_nac3_int_exp_(int64_t) DEF_nac3_int_exp_(uint32_t) DEF_nac3_int_exp_(uint64_t) -SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { + SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { if (i < 0) { i = len + i; } @@ -160,11 +138,7 @@ SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { return i; } -SliceIndex __nac3_range_slice_len( - const SliceIndex start, - const SliceIndex end, - const SliceIndex step -) { +SliceIndex __nac3_range_slice_len(const SliceIndex start, const SliceIndex end, const SliceIndex step) { SliceIndex diff = end - start; if (diff > 0 && step > 0) { return ((diff - 1) / step) + 1; @@ -181,61 +155,47 @@ SliceIndex __nac3_range_slice_len( // - The end index is *inclusive*, // - The length of src and dest slice size should already // be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) -SliceIndex __nac3_list_slice_assign_var_size( - SliceIndex dest_start, - SliceIndex dest_end, - SliceIndex dest_step, - uint8_t* dest_arr, - SliceIndex dest_arr_len, - SliceIndex src_start, - SliceIndex src_end, - SliceIndex src_step, - uint8_t* src_arr, - SliceIndex src_arr_len, - const SliceIndex size -) { +SliceIndex __nac3_list_slice_assign_var_size(SliceIndex dest_start, + SliceIndex dest_end, + SliceIndex dest_step, + uint8_t* dest_arr, + SliceIndex dest_arr_len, + SliceIndex src_start, + SliceIndex src_end, + SliceIndex src_step, + uint8_t* src_arr, + SliceIndex src_arr_len, + const SliceIndex size) { /* if dest_arr_len == 0, do nothing since we do not support extending list */ - if (dest_arr_len == 0) return dest_arr_len; + if (dest_arr_len == 0) + return dest_arr_len; /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ if (src_step == dest_step && dest_step == 1) { const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; if (src_len > 0) { - __builtin_memmove( - dest_arr + dest_start * size, - src_arr + src_start * size, - src_len * size - ); + __builtin_memmove(dest_arr + dest_start * size, src_arr + src_start * size, src_len * size); } if (dest_len > 0) { /* dropping */ - __builtin_memmove( - dest_arr + (dest_start + src_len) * size, - dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size - ); + __builtin_memmove(dest_arr + (dest_start + src_len) * size, dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size); } /* shrink size */ return dest_arr_len - (dest_len - src_len); } /* if two range overlaps, need alloca */ - uint8_t need_alloca = - (dest_arr == src_arr) - && !( - max(dest_start, dest_end) < min(src_start, src_end) - || max(src_start, src_end) < min(dest_start, dest_end) - ); + uint8_t need_alloca = (dest_arr == src_arr) + && !(max(dest_start, dest_end) < min(src_start, src_end) + || max(src_start, src_end) < min(dest_start, dest_end)); if (need_alloca) { - uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); + uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); __builtin_memcpy(tmp, src_arr, src_arr_len * size); src_arr = tmp; } SliceIndex src_ind = src_start; SliceIndex dest_ind = dest_start; - for (; - (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); - src_ind += src_step, dest_ind += dest_step - ) { + for (; (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); src_ind += src_step, dest_ind += dest_step) { /* for constant optimization */ if (size == 1) { __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); @@ -251,11 +211,8 @@ SliceIndex __nac3_list_slice_assign_var_size( /* only dest_step == 1 can we shrink the dest list. */ /* size should be ensured prior to calling this function */ if (dest_step == 1 && dest_end >= dest_start) { - __builtin_memmove( - dest_arr + dest_ind * size, - dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size - ); + __builtin_memmove(dest_arr + dest_ind * size, dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size); return dest_arr_len - (dest_end - dest_ind) - 1; } return dest_arr_len; @@ -320,95 +277,60 @@ double __nac3_j0(double x) { return j0(x); } -uint32_t __nac3_ndarray_calc_size( - const uint32_t* list_data, - uint32_t list_len, - uint32_t begin_idx, - uint32_t end_idx -) { +uint32_t __nac3_ndarray_calc_size(const uint32_t* list_data, uint32_t list_len, uint32_t begin_idx, uint32_t end_idx) { return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); } -uint64_t __nac3_ndarray_calc_size64( - const uint64_t* list_data, - uint64_t list_len, - uint64_t begin_idx, - uint64_t end_idx -) { +uint64_t +__nac3_ndarray_calc_size64(const uint64_t* list_data, uint64_t list_len, uint64_t begin_idx, uint64_t end_idx) { return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); } -void __nac3_ndarray_calc_nd_indices( - uint32_t index, - const uint32_t* dims, - uint32_t num_dims, - NDIndex* idxs -) { +void __nac3_ndarray_calc_nd_indices(uint32_t index, const uint32_t* dims, uint32_t num_dims, NDIndex* idxs) { __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); } -void __nac3_ndarray_calc_nd_indices64( - uint64_t index, - const uint64_t* dims, - uint64_t num_dims, - NDIndex* idxs -) { +void __nac3_ndarray_calc_nd_indices64(uint64_t index, const uint64_t* dims, uint64_t num_dims, NDIndex* idxs) { __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); } -uint32_t __nac3_ndarray_flatten_index( - const uint32_t* dims, - uint32_t num_dims, - const NDIndex* indices, - uint32_t num_indices -) { +uint32_t +__nac3_ndarray_flatten_index(const uint32_t* dims, uint32_t num_dims, const NDIndex* indices, uint32_t num_indices) { return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); } -uint64_t __nac3_ndarray_flatten_index64( - const uint64_t* dims, - uint64_t num_dims, - const NDIndex* indices, - uint64_t num_indices -) { +uint64_t +__nac3_ndarray_flatten_index64(const uint64_t* dims, uint64_t num_dims, const NDIndex* indices, uint64_t num_indices) { return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); } -void __nac3_ndarray_calc_broadcast( - const uint32_t* lhs_dims, - uint32_t lhs_ndims, - const uint32_t* rhs_dims, - uint32_t rhs_ndims, - uint32_t* out_dims -) { +void __nac3_ndarray_calc_broadcast(const uint32_t* lhs_dims, + uint32_t lhs_ndims, + const uint32_t* rhs_dims, + uint32_t rhs_ndims, + uint32_t* out_dims) { return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); } -void __nac3_ndarray_calc_broadcast64( - const uint64_t* lhs_dims, - uint64_t lhs_ndims, - const uint64_t* rhs_dims, - uint64_t rhs_ndims, - uint64_t* out_dims -) { +void __nac3_ndarray_calc_broadcast64(const uint64_t* lhs_dims, + uint64_t lhs_ndims, + const uint64_t* rhs_dims, + uint64_t rhs_ndims, + uint64_t* out_dims) { return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); } -void __nac3_ndarray_calc_broadcast_idx( - const uint32_t* src_dims, - uint32_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { +void __nac3_ndarray_calc_broadcast_idx(const uint32_t* src_dims, + uint32_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); } -void __nac3_ndarray_calc_broadcast_idx64( - const uint64_t* src_dims, - uint64_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { +void __nac3_ndarray_calc_broadcast_idx64(const uint64_t* src_dims, + uint64_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); } } // extern "C" \ No newline at end of file -- 2.44.1 From 96073fe6bc65bdcb89c6d87ec5a0ba8655e7e16e Mon Sep 17 00:00:00 2001 From: lyken Date: Mon, 26 Aug 2024 15:19:22 +0800 Subject: [PATCH 4/9] core/irrt: use +std=c++20 to compile To explicitly set the C++ variant and avoid inconsistencies. --- nac3core/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nac3core/build.rs b/nac3core/build.rs index f83b7bc7..78c26147 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -22,6 +22,7 @@ fn main() { "--target=wasm32", "-x", "c++", + "-std=c++20", "-fno-discard-value-names", "-fno-exceptions", "-fno-rtti", -- 2.44.1 From 4c0f976ef1b8a0385b5ee2f15f7c28cbf1bca9d3 Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 9 Jul 2024 21:02:20 +0800 Subject: [PATCH 5/9] core/irrt: build.rs capture IR defined types --- nac3core/build.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nac3core/build.rs b/nac3core/build.rs index 78c26147..f3a84d9e 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -62,7 +62,9 @@ fn main() { // Regex: // - `(?ms:^define.*?\}$)` captures LLVM `define` blocks // - `(?m:^declare.*?$)` captures LLVM `declare` lines - let regex_filter = Regex::new(r"(?ms:^define.*?\}$)|(?m:^declare.*?$)").unwrap(); + // - `(?m:^%.+?=\s*type\s*\{.+?\}$)` captures LLVM `type` declarations + let regex_filter = + Regex::new(r"(?ms:^define.*?\}$)|(?m:^declare.*?$)|(?m:^%.+?=\s*type\s*\{.+?\}$)").unwrap(); for f in regex_filter.captures_iter(&output) { assert_eq!(f.len(), 1); filtered_output.push_str(&f[0]); -- 2.44.1 From fce5ba3dbfec865f1ec4c4601ea201ff4c612043 Mon Sep 17 00:00:00 2001 From: lyken Date: Sat, 13 Jul 2024 14:02:50 +0800 Subject: [PATCH 6/9] core/irrt: build.rs capture IR defined constants --- nac3core/build.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nac3core/build.rs b/nac3core/build.rs index f3a84d9e..d70f9424 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -63,8 +63,11 @@ fn main() { // - `(?ms:^define.*?\}$)` captures LLVM `define` blocks // - `(?m:^declare.*?$)` captures LLVM `declare` lines // - `(?m:^%.+?=\s*type\s*\{.+?\}$)` captures LLVM `type` declarations - let regex_filter = - Regex::new(r"(?ms:^define.*?\}$)|(?m:^declare.*?$)|(?m:^%.+?=\s*type\s*\{.+?\}$)").unwrap(); + // - `(?m:^@.+?=.+$)` captures global constants + let regex_filter = Regex::new( + r"(?ms:^define.*?\}$)|(?m:^declare.*?$)|(?m:^%.+?=\s*type\s*\{.+?\}$)|(?m:^@.+?=.+$)", + ) + .unwrap(); for f in regex_filter.captures_iter(&output) { assert_eq!(f.len(), 1); filtered_output.push_str(&f[0]); -- 2.44.1 From fb41645863fe7a9a777bb84be5ee6ed984939177 Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 27 Aug 2024 10:31:49 +0800 Subject: [PATCH 7/9] core/irrt: split into headers --- nac3core/irrt/irrt.cpp | 341 +------------------------------ nac3core/irrt/irrt/int_types.hpp | 13 ++ nac3core/irrt/irrt/list.hpp | 75 +++++++ nac3core/irrt/irrt/math.hpp | 93 +++++++++ nac3core/irrt/irrt/math_util.hpp | 13 ++ nac3core/irrt/irrt/ndarray.hpp | 144 +++++++++++++ nac3core/irrt/irrt/slice.hpp | 28 +++ 7 files changed, 371 insertions(+), 336 deletions(-) create mode 100644 nac3core/irrt/irrt/int_types.hpp create mode 100644 nac3core/irrt/irrt/list.hpp create mode 100644 nac3core/irrt/irrt/math.hpp create mode 100644 nac3core/irrt/irrt/math_util.hpp create mode 100644 nac3core/irrt/irrt/ndarray.hpp create mode 100644 nac3core/irrt/irrt/slice.hpp diff --git a/nac3core/irrt/irrt.cpp b/nac3core/irrt/irrt.cpp index ce8ce625..1bedd84f 100644 --- a/nac3core/irrt/irrt.cpp +++ b/nac3core/irrt/irrt.cpp @@ -1,336 +1,5 @@ -using int8_t = _BitInt(8); -using uint8_t = unsigned _BitInt(8); -using int32_t = _BitInt(32); -using uint32_t = unsigned _BitInt(32); -using int64_t = _BitInt(64); -using uint64_t = unsigned _BitInt(64); - -// NDArray indices are always `uint32_t`. -using NDIndex = uint32_t; -// The type of an index or a value describing the length of a range/slice is always `int32_t`. -using SliceIndex = int32_t; - -namespace { -template -const T& max(const T& a, const T& b) { - return a > b ? a : b; -} - -template -const T& min(const T& a, const T& b) { - return a > b ? b : a; -} - -// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c -// need to make sure `exp >= 0` before calling this function -template -T __nac3_int_exp_impl(T base, T exp) { - T res = 1; - /* repeated squaring method */ - do { - if (exp & 1) { - res *= base; /* for n odd */ - } - exp >>= 1; - base *= base; - } while (exp); - return res; -} - -template -SizeT __nac3_ndarray_calc_size_impl(const SizeT* list_data, SizeT list_len, SizeT begin_idx, SizeT end_idx) { - __builtin_assume(end_idx <= list_len); - - SizeT num_elems = 1; - for (SizeT i = begin_idx; i < end_idx; ++i) { - SizeT val = list_data[i]; - __builtin_assume(val > 0); - num_elems *= val; - } - return num_elems; -} - -template -void __nac3_ndarray_calc_nd_indices_impl(SizeT index, const SizeT* dims, SizeT num_dims, NDIndex* idxs) { - SizeT stride = 1; - for (SizeT dim = 0; dim < num_dims; dim++) { - SizeT i = num_dims - dim - 1; - __builtin_assume(dims[i] > 0); - idxs[i] = (index / stride) % dims[i]; - stride *= dims[i]; - } -} - -template -SizeT __nac3_ndarray_flatten_index_impl(const SizeT* dims, SizeT num_dims, const NDIndex* indices, SizeT num_indices) { - SizeT idx = 0; - SizeT stride = 1; - for (SizeT i = 0; i < num_dims; ++i) { - SizeT ri = num_dims - i - 1; - if (ri < num_indices) { - idx += stride * indices[ri]; - } - - __builtin_assume(dims[i] > 0); - stride *= dims[ri]; - } - return idx; -} - -template -void __nac3_ndarray_calc_broadcast_impl(const SizeT* lhs_dims, - SizeT lhs_ndims, - const SizeT* rhs_dims, - SizeT rhs_ndims, - SizeT* out_dims) { - SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; - - for (SizeT i = 0; i < max_ndims; ++i) { - const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; - const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; - SizeT* out_dim = &out_dims[max_ndims - i - 1]; - - if (lhs_dim_sz == nullptr) { - *out_dim = *rhs_dim_sz; - } else if (rhs_dim_sz == nullptr) { - *out_dim = *lhs_dim_sz; - } else if (*lhs_dim_sz == 1) { - *out_dim = *rhs_dim_sz; - } else if (*rhs_dim_sz == 1) { - *out_dim = *lhs_dim_sz; - } else if (*lhs_dim_sz == *rhs_dim_sz) { - *out_dim = *lhs_dim_sz; - } else { - __builtin_unreachable(); - } - } -} - -template -void __nac3_ndarray_calc_broadcast_idx_impl(const SizeT* src_dims, - SizeT src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx) { - for (SizeT i = 0; i < src_ndims; ++i) { - SizeT src_i = src_ndims - i - 1; - out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; - } -} -} // namespace - -extern "C" { -#define DEF_nac3_int_exp_(T) \ - T __nac3_int_exp_##T(T base, T exp) { \ - return __nac3_int_exp_impl(base, exp); \ - } - -DEF_nac3_int_exp_(int32_t) DEF_nac3_int_exp_(int64_t) DEF_nac3_int_exp_(uint32_t) DEF_nac3_int_exp_(uint64_t) - - SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { - if (i < 0) { - i = len + i; - } - if (i < 0) { - return 0; - } else if (i > len) { - return len; - } - return i; -} - -SliceIndex __nac3_range_slice_len(const SliceIndex start, const SliceIndex end, const SliceIndex step) { - SliceIndex diff = end - start; - if (diff > 0 && step > 0) { - return ((diff - 1) / step) + 1; - } else if (diff < 0 && step < 0) { - return ((diff + 1) / step) + 1; - } else { - return 0; - } -} - -// Handle list assignment and dropping part of the list when -// both dest_step and src_step are +1. -// - All the index must *not* be out-of-bound or negative, -// - The end index is *inclusive*, -// - The length of src and dest slice size should already -// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) -SliceIndex __nac3_list_slice_assign_var_size(SliceIndex dest_start, - SliceIndex dest_end, - SliceIndex dest_step, - uint8_t* dest_arr, - SliceIndex dest_arr_len, - SliceIndex src_start, - SliceIndex src_end, - SliceIndex src_step, - uint8_t* src_arr, - SliceIndex src_arr_len, - const SliceIndex size) { - /* if dest_arr_len == 0, do nothing since we do not support extending list */ - if (dest_arr_len == 0) - return dest_arr_len; - /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ - if (src_step == dest_step && dest_step == 1) { - const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; - const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; - if (src_len > 0) { - __builtin_memmove(dest_arr + dest_start * size, src_arr + src_start * size, src_len * size); - } - if (dest_len > 0) { - /* dropping */ - __builtin_memmove(dest_arr + (dest_start + src_len) * size, dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size); - } - /* shrink size */ - return dest_arr_len - (dest_len - src_len); - } - /* if two range overlaps, need alloca */ - uint8_t need_alloca = (dest_arr == src_arr) - && !(max(dest_start, dest_end) < min(src_start, src_end) - || max(src_start, src_end) < min(dest_start, dest_end)); - if (need_alloca) { - uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); - __builtin_memcpy(tmp, src_arr, src_arr_len * size); - src_arr = tmp; - } - SliceIndex src_ind = src_start; - SliceIndex dest_ind = dest_start; - for (; (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); src_ind += src_step, dest_ind += dest_step) { - /* for constant optimization */ - if (size == 1) { - __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); - } else if (size == 4) { - __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); - } else if (size == 8) { - __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); - } else { - /* memcpy for var size, cannot overlap after previous alloca */ - __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); - } - } - /* only dest_step == 1 can we shrink the dest list. */ - /* size should be ensured prior to calling this function */ - if (dest_step == 1 && dest_end >= dest_start) { - __builtin_memmove(dest_arr + dest_ind * size, dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size); - return dest_arr_len - (dest_end - dest_ind) - 1; - } - return dest_arr_len; -} - -int32_t __nac3_isinf(double x) { - return __builtin_isinf(x); -} - -int32_t __nac3_isnan(double x) { - return __builtin_isnan(x); -} - -double tgamma(double arg); - -double __nac3_gamma(double z) { - // Handling for denormals - // | x | Python gamma(x) | C tgamma(x) | - // --- | ----------------- | --------------- | ----------- | - // (1) | nan | nan | nan | - // (2) | -inf | -inf | inf | - // (3) | inf | inf | inf | - // (4) | 0.0 | inf | inf | - // (5) | {-1.0, -2.0, ...} | inf | nan | - - // (1)-(3) - if (__builtin_isinf(z) || __builtin_isnan(z)) { - return z; - } - - double v = tgamma(z); - - // (4)-(5) - return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; -} - -double lgamma(double arg); - -double __nac3_gammaln(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: gammaln(-inf) -> -inf - // - libm : lgamma(-inf) -> inf - - if (__builtin_isinf(x)) { - return x; - } - - return lgamma(x); -} - -double j0(double x); - -double __nac3_j0(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: j0(inf) -> nan - // - libm : j0(inf) -> 0.0 - - if (__builtin_isinf(x)) { - return __builtin_nan(""); - } - - return j0(x); -} - -uint32_t __nac3_ndarray_calc_size(const uint32_t* list_data, uint32_t list_len, uint32_t begin_idx, uint32_t end_idx) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); -} - -uint64_t -__nac3_ndarray_calc_size64(const uint64_t* list_data, uint64_t list_len, uint64_t begin_idx, uint64_t end_idx) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); -} - -void __nac3_ndarray_calc_nd_indices(uint32_t index, const uint32_t* dims, uint32_t num_dims, NDIndex* idxs) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); -} - -void __nac3_ndarray_calc_nd_indices64(uint64_t index, const uint64_t* dims, uint64_t num_dims, NDIndex* idxs) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); -} - -uint32_t -__nac3_ndarray_flatten_index(const uint32_t* dims, uint32_t num_dims, const NDIndex* indices, uint32_t num_indices) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); -} - -uint64_t -__nac3_ndarray_flatten_index64(const uint64_t* dims, uint64_t num_dims, const NDIndex* indices, uint64_t num_indices) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); -} - -void __nac3_ndarray_calc_broadcast(const uint32_t* lhs_dims, - uint32_t lhs_ndims, - const uint32_t* rhs_dims, - uint32_t rhs_ndims, - uint32_t* out_dims) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); -} - -void __nac3_ndarray_calc_broadcast64(const uint64_t* lhs_dims, - uint64_t lhs_ndims, - const uint64_t* rhs_dims, - uint64_t rhs_ndims, - uint64_t* out_dims) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); -} - -void __nac3_ndarray_calc_broadcast_idx(const uint32_t* src_dims, - uint32_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); -} - -void __nac3_ndarray_calc_broadcast_idx64(const uint64_t* src_dims, - uint64_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); -} -} // extern "C" \ No newline at end of file +#include +#include +#include +#include +#include diff --git a/nac3core/irrt/irrt/int_types.hpp b/nac3core/irrt/irrt/int_types.hpp new file mode 100644 index 00000000..87900961 --- /dev/null +++ b/nac3core/irrt/irrt/int_types.hpp @@ -0,0 +1,13 @@ +#pragma once + +using int8_t = _BitInt(8); +using uint8_t = unsigned _BitInt(8); +using int32_t = _BitInt(32); +using uint32_t = unsigned _BitInt(32); +using int64_t = _BitInt(64); +using uint64_t = unsigned _BitInt(64); + +// NDArray indices are always `uint32_t`. +using NDIndex = uint32_t; +// The type of an index or a value describing the length of a range/slice is always `int32_t`. +using SliceIndex = int32_t; \ No newline at end of file diff --git a/nac3core/irrt/irrt/list.hpp b/nac3core/irrt/irrt/list.hpp new file mode 100644 index 00000000..09841639 --- /dev/null +++ b/nac3core/irrt/irrt/list.hpp @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +extern "C" { +// Handle list assignment and dropping part of the list when +// both dest_step and src_step are +1. +// - All the index must *not* be out-of-bound or negative, +// - The end index is *inclusive*, +// - The length of src and dest slice size should already +// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) +SliceIndex __nac3_list_slice_assign_var_size(SliceIndex dest_start, + SliceIndex dest_end, + SliceIndex dest_step, + uint8_t* dest_arr, + SliceIndex dest_arr_len, + SliceIndex src_start, + SliceIndex src_end, + SliceIndex src_step, + uint8_t* src_arr, + SliceIndex src_arr_len, + const SliceIndex size) { + /* if dest_arr_len == 0, do nothing since we do not support extending list */ + if (dest_arr_len == 0) + return dest_arr_len; + /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ + if (src_step == dest_step && dest_step == 1) { + const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; + const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; + if (src_len > 0) { + __builtin_memmove(dest_arr + dest_start * size, src_arr + src_start * size, src_len * size); + } + if (dest_len > 0) { + /* dropping */ + __builtin_memmove(dest_arr + (dest_start + src_len) * size, dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size); + } + /* shrink size */ + return dest_arr_len - (dest_len - src_len); + } + /* if two range overlaps, need alloca */ + uint8_t need_alloca = (dest_arr == src_arr) + && !(max(dest_start, dest_end) < min(src_start, src_end) + || max(src_start, src_end) < min(dest_start, dest_end)); + if (need_alloca) { + uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); + __builtin_memcpy(tmp, src_arr, src_arr_len * size); + src_arr = tmp; + } + SliceIndex src_ind = src_start; + SliceIndex dest_ind = dest_start; + for (; (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); src_ind += src_step, dest_ind += dest_step) { + /* for constant optimization */ + if (size == 1) { + __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); + } else if (size == 4) { + __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); + } else if (size == 8) { + __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); + } else { + /* memcpy for var size, cannot overlap after previous alloca */ + __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); + } + } + /* only dest_step == 1 can we shrink the dest list. */ + /* size should be ensured prior to calling this function */ + if (dest_step == 1 && dest_end >= dest_start) { + __builtin_memmove(dest_arr + dest_ind * size, dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size); + return dest_arr_len - (dest_end - dest_ind) - 1; + } + return dest_arr_len; +} +} // extern "C" \ No newline at end of file diff --git a/nac3core/irrt/irrt/math.hpp b/nac3core/irrt/irrt/math.hpp new file mode 100644 index 00000000..ff10f3f3 --- /dev/null +++ b/nac3core/irrt/irrt/math.hpp @@ -0,0 +1,93 @@ +#pragma once + +namespace { +// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c +// need to make sure `exp >= 0` before calling this function +template +T __nac3_int_exp_impl(T base, T exp) { + T res = 1; + /* repeated squaring method */ + do { + if (exp & 1) { + res *= base; /* for n odd */ + } + exp >>= 1; + base *= base; + } while (exp); + return res; +} +} // namespace + +#define DEF_nac3_int_exp_(T) \ + T __nac3_int_exp_##T(T base, T exp) { \ + return __nac3_int_exp_impl(base, exp); \ + } + +extern "C" { + +// Putting semicolons here to make clang-format not reformat this into +// a stair shape. +DEF_nac3_int_exp_(int32_t); +DEF_nac3_int_exp_(int64_t); +DEF_nac3_int_exp_(uint32_t); +DEF_nac3_int_exp_(uint64_t); + +int32_t __nac3_isinf(double x) { + return __builtin_isinf(x); +} + +int32_t __nac3_isnan(double x) { + return __builtin_isnan(x); +} + +double tgamma(double arg); + +double __nac3_gamma(double z) { + // Handling for denormals + // | x | Python gamma(x) | C tgamma(x) | + // --- | ----------------- | --------------- | ----------- | + // (1) | nan | nan | nan | + // (2) | -inf | -inf | inf | + // (3) | inf | inf | inf | + // (4) | 0.0 | inf | inf | + // (5) | {-1.0, -2.0, ...} | inf | nan | + + // (1)-(3) + if (__builtin_isinf(z) || __builtin_isnan(z)) { + return z; + } + + double v = tgamma(z); + + // (4)-(5) + return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; +} + +double lgamma(double arg); + +double __nac3_gammaln(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: gammaln(-inf) -> -inf + // - libm : lgamma(-inf) -> inf + + if (__builtin_isinf(x)) { + return x; + } + + return lgamma(x); +} + +double j0(double x); + +double __nac3_j0(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: j0(inf) -> nan + // - libm : j0(inf) -> 0.0 + + if (__builtin_isinf(x)) { + return __builtin_nan(""); + } + + return j0(x); +} +} \ No newline at end of file diff --git a/nac3core/irrt/irrt/math_util.hpp b/nac3core/irrt/irrt/math_util.hpp new file mode 100644 index 00000000..7299df7d --- /dev/null +++ b/nac3core/irrt/irrt/math_util.hpp @@ -0,0 +1,13 @@ +#pragma once + +namespace { +template +const T& max(const T& a, const T& b) { + return a > b ? a : b; +} + +template +const T& min(const T& a, const T& b) { + return a > b ? b : a; +} +} // namespace \ No newline at end of file diff --git a/nac3core/irrt/irrt/ndarray.hpp b/nac3core/irrt/irrt/ndarray.hpp new file mode 100644 index 00000000..a946741b --- /dev/null +++ b/nac3core/irrt/irrt/ndarray.hpp @@ -0,0 +1,144 @@ +#pragma once + +#include + +namespace { +template +SizeT __nac3_ndarray_calc_size_impl(const SizeT* list_data, SizeT list_len, SizeT begin_idx, SizeT end_idx) { + __builtin_assume(end_idx <= list_len); + + SizeT num_elems = 1; + for (SizeT i = begin_idx; i < end_idx; ++i) { + SizeT val = list_data[i]; + __builtin_assume(val > 0); + num_elems *= val; + } + return num_elems; +} + +template +void __nac3_ndarray_calc_nd_indices_impl(SizeT index, const SizeT* dims, SizeT num_dims, NDIndex* idxs) { + SizeT stride = 1; + for (SizeT dim = 0; dim < num_dims; dim++) { + SizeT i = num_dims - dim - 1; + __builtin_assume(dims[i] > 0); + idxs[i] = (index / stride) % dims[i]; + stride *= dims[i]; + } +} + +template +SizeT __nac3_ndarray_flatten_index_impl(const SizeT* dims, SizeT num_dims, const NDIndex* indices, SizeT num_indices) { + SizeT idx = 0; + SizeT stride = 1; + for (SizeT i = 0; i < num_dims; ++i) { + SizeT ri = num_dims - i - 1; + if (ri < num_indices) { + idx += stride * indices[ri]; + } + + __builtin_assume(dims[i] > 0); + stride *= dims[ri]; + } + return idx; +} + +template +void __nac3_ndarray_calc_broadcast_impl(const SizeT* lhs_dims, + SizeT lhs_ndims, + const SizeT* rhs_dims, + SizeT rhs_ndims, + SizeT* out_dims) { + SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; + + for (SizeT i = 0; i < max_ndims; ++i) { + const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; + const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; + SizeT* out_dim = &out_dims[max_ndims - i - 1]; + + if (lhs_dim_sz == nullptr) { + *out_dim = *rhs_dim_sz; + } else if (rhs_dim_sz == nullptr) { + *out_dim = *lhs_dim_sz; + } else if (*lhs_dim_sz == 1) { + *out_dim = *rhs_dim_sz; + } else if (*rhs_dim_sz == 1) { + *out_dim = *lhs_dim_sz; + } else if (*lhs_dim_sz == *rhs_dim_sz) { + *out_dim = *lhs_dim_sz; + } else { + __builtin_unreachable(); + } + } +} + +template +void __nac3_ndarray_calc_broadcast_idx_impl(const SizeT* src_dims, + SizeT src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { + for (SizeT i = 0; i < src_ndims; ++i) { + SizeT src_i = src_ndims - i - 1; + out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; + } +} +} // namespace + +extern "C" { +uint32_t __nac3_ndarray_calc_size(const uint32_t* list_data, uint32_t list_len, uint32_t begin_idx, uint32_t end_idx) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); +} + +uint64_t +__nac3_ndarray_calc_size64(const uint64_t* list_data, uint64_t list_len, uint64_t begin_idx, uint64_t end_idx) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); +} + +void __nac3_ndarray_calc_nd_indices(uint32_t index, const uint32_t* dims, uint32_t num_dims, NDIndex* idxs) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); +} + +void __nac3_ndarray_calc_nd_indices64(uint64_t index, const uint64_t* dims, uint64_t num_dims, NDIndex* idxs) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); +} + +uint32_t +__nac3_ndarray_flatten_index(const uint32_t* dims, uint32_t num_dims, const NDIndex* indices, uint32_t num_indices) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); +} + +uint64_t +__nac3_ndarray_flatten_index64(const uint64_t* dims, uint64_t num_dims, const NDIndex* indices, uint64_t num_indices) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); +} + +void __nac3_ndarray_calc_broadcast(const uint32_t* lhs_dims, + uint32_t lhs_ndims, + const uint32_t* rhs_dims, + uint32_t rhs_ndims, + uint32_t* out_dims) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); +} + +void __nac3_ndarray_calc_broadcast64(const uint64_t* lhs_dims, + uint64_t lhs_ndims, + const uint64_t* rhs_dims, + uint64_t rhs_ndims, + uint64_t* out_dims) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); +} + +void __nac3_ndarray_calc_broadcast_idx(const uint32_t* src_dims, + uint32_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); +} + +void __nac3_ndarray_calc_broadcast_idx64(const uint64_t* src_dims, + uint64_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); +} +} \ No newline at end of file diff --git a/nac3core/irrt/irrt/slice.hpp b/nac3core/irrt/irrt/slice.hpp new file mode 100644 index 00000000..e2ce2d8c --- /dev/null +++ b/nac3core/irrt/irrt/slice.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include + +extern "C" { +SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { + if (i < 0) { + i = len + i; + } + if (i < 0) { + return 0; + } else if (i > len) { + return len; + } + return i; +} + +SliceIndex __nac3_range_slice_len(const SliceIndex start, const SliceIndex end, const SliceIndex step) { + SliceIndex diff = end - start; + if (diff > 0 && step > 0) { + return ((diff - 1) / step) + 1; + } else if (diff < 0 && step < 0) { + return ((diff + 1) / step) + 1; + } else { + return 0; + } +} +} \ No newline at end of file -- 2.44.1 From 38be74d5feaca9b1c25c487cfc86ddfbfb13a378 Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 27 Aug 2024 10:36:51 +0800 Subject: [PATCH 8/9] core/irrt: add exceptions and debug utils --- nac3artiq/src/lib.rs | 9 ++-- nac3core/build.rs | 20 +++++--- nac3core/irrt/irrt.cpp | 1 + nac3core/irrt/irrt/cslice.hpp | 9 ++++ nac3core/irrt/irrt/debug.hpp | 25 ++++++++++ nac3core/irrt/irrt/exception.hpp | 82 ++++++++++++++++++++++++++++++++ nac3core/src/codegen/irrt/mod.rs | 25 ++++++++-- nac3standalone/src/main.rs | 18 +++++-- 8 files changed, 172 insertions(+), 17 deletions(-) create mode 100644 nac3core/irrt/irrt/cslice.hpp create mode 100644 nac3core/irrt/irrt/debug.hpp create mode 100644 nac3core/irrt/irrt/exception.hpp diff --git a/nac3artiq/src/lib.rs b/nac3artiq/src/lib.rs index be2853c7..4ed40aee 100644 --- a/nac3artiq/src/lib.rs +++ b/nac3artiq/src/lib.rs @@ -557,6 +557,10 @@ impl Nac3 { .register_top_level(synthesized.pop().unwrap(), Some(resolver.clone()), "", false) .unwrap(); + // Process IRRT + let context = inkwell::context::Context::create(); + let irrt = load_irrt(&context, resolver.as_ref()); + let fun_signature = FunSignature { args: vec![], ret: self.primitive.none, vars: VarMap::new() }; let mut store = ConcreteTypeStore::new(); @@ -727,7 +731,7 @@ impl Nac3 { membuffer.lock().push(buffer); }); - let context = inkwell::context::Context::create(); + // Link all modules into `main`. let buffers = membuffers.lock(); let main = context .create_module_from_ir(MemoryBuffer::create_from_memory_range(&buffers[0], "main")) @@ -756,8 +760,7 @@ impl Nac3 { ) .unwrap(); - main.link_in_module(load_irrt(&context)) - .map_err(|err| CompileError::new_err(err.to_string()))?; + main.link_in_module(irrt).map_err(|err| CompileError::new_err(err.to_string()))?; let mut function_iter = main.get_first_function(); while let Some(func) = function_iter { diff --git a/nac3core/build.rs b/nac3core/build.rs index d70f9424..5447a696 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -18,7 +18,7 @@ fn main() { * HACK: Sadly, clang doesn't let us emit generic LLVM bitcode. * Compiling for WASM32 and filtering the output with regex is the closest we can get. */ - let flags: &[&str] = &[ + let mut flags: Vec<&str> = vec![ "--target=wasm32", "-x", "c++", @@ -26,20 +26,28 @@ fn main() { "-fno-discard-value-names", "-fno-exceptions", "-fno-rtti", - match env::var("PROFILE").as_deref() { - Ok("debug") => "-O0", - Ok("release") => "-O3", - flavor => panic!("Unknown or missing build flavor {flavor:?}"), - }, "-emit-llvm", "-S", "-Wall", "-Wextra", "-o", "-", + "-I", + irrt_dir.to_str().unwrap(), irrt_cpp_path.to_str().unwrap(), ]; + match env::var("PROFILE").as_deref() { + Ok("debug") => { + flags.push("-O0"); + flags.push("-DIRRT_DEBUG_ASSERT"); + } + Ok("release") => { + flags.push("-O3"); + } + flavor => panic!("Unknown or missing build flavor {flavor:?}"), + } + // Tell Cargo to rerun if any file under `irrt_dir` (recursive) changes println!("cargo:rerun-if-changed={}", irrt_dir.to_str().unwrap()); diff --git a/nac3core/irrt/irrt.cpp b/nac3core/irrt/irrt.cpp index 1bedd84f..f717bf3c 100644 --- a/nac3core/irrt/irrt.cpp +++ b/nac3core/irrt/irrt.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/nac3core/irrt/irrt/cslice.hpp b/nac3core/irrt/irrt/cslice.hpp new file mode 100644 index 00000000..58712396 --- /dev/null +++ b/nac3core/irrt/irrt/cslice.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +template +struct CSlice { + uint8_t* base; + SizeT len; +}; \ No newline at end of file diff --git a/nac3core/irrt/irrt/debug.hpp b/nac3core/irrt/irrt/debug.hpp new file mode 100644 index 00000000..77833ef1 --- /dev/null +++ b/nac3core/irrt/irrt/debug.hpp @@ -0,0 +1,25 @@ +#pragma once + +// Set in nac3core/build.rs +#ifdef IRRT_DEBUG_ASSERT +#define IRRT_DEBUG_ASSERT_BOOL true +#else +#define IRRT_DEBUG_ASSERT_BOOL false +#endif + +#define raise_debug_assert(SizeT, msg, param1, param2, param3) \ + raise_exception(SizeT, EXN_ASSERTION_ERROR, "IRRT debug assert failed: " msg, param1, param2, param3) + +#define debug_assert_eq(SizeT, lhs, rhs) \ + if constexpr (IRRT_DEBUG_ASSERT_BOOL) { \ + if ((lhs) != (rhs)) { \ + raise_debug_assert(SizeT, "LHS = {0}. RHS = {1}", lhs, rhs, NO_PARAM); \ + } \ + } + +#define debug_assert(SizeT, expr) \ + if constexpr (IRRT_DEBUG_ASSERT_BOOL) { \ + if (!(expr)) { \ + raise_debug_assert(SizeT, "Got false.", NO_PARAM, NO_PARAM, NO_PARAM); \ + } \ + } \ No newline at end of file diff --git a/nac3core/irrt/irrt/exception.hpp b/nac3core/irrt/irrt/exception.hpp new file mode 100644 index 00000000..4c1f0cb7 --- /dev/null +++ b/nac3core/irrt/irrt/exception.hpp @@ -0,0 +1,82 @@ +#pragma once + +#include +#include + +/** + * @brief The int type of ARTIQ exception IDs. + */ +typedef int32_t ExceptionId; + +/* + * Set of exceptions C++ IRRT can use. + * Must be synchronized with `setup_irrt_exceptions` in `nac3core/src/codegen/irrt/mod.rs`. + */ +extern "C" { +ExceptionId EXN_INDEX_ERROR; +ExceptionId EXN_VALUE_ERROR; +ExceptionId EXN_ASSERTION_ERROR; +ExceptionId EXN_TYPE_ERROR; +} + +/** + * @brief Extern function to `__nac3_raise` + * + * The parameter `err` could be `Exception` or `Exception`. The caller + * must make sure to pass `Exception`s with the correct `SizeT` depending on the `size_t` of the runtime. + */ +extern "C" void __nac3_raise(void* err); + +namespace { +/** + * @brief NAC3's Exception struct + */ +template +struct Exception { + ExceptionId id; + CSlice filename; + int32_t line; + int32_t column; + CSlice function; + CSlice msg; + int64_t params[3]; +}; + +constexpr int64_t NO_PARAM = 0; + +template +void _raise_exception_helper(ExceptionId id, + const char* filename, + int32_t line, + const char* function, + const char* msg, + int64_t param0, + int64_t param1, + int64_t param2) { + Exception e = { + .id = id, + .filename = {.base = reinterpret_cast(filename), .len = __builtin_strlen(filename)}, + .line = line, + .column = 0, + .function = {.base = reinterpret_cast(function), .len = __builtin_strlen(function)}, + .msg = {.base = reinterpret_cast(msg), .len = __builtin_strlen(msg)}, + }; + e.params[0] = param0; + e.params[1] = param1; + e.params[2] = param2; + __nac3_raise(reinterpret_cast(&e)); + __builtin_unreachable(); +} + +/** + * @brief Raise an exception with location details (location in the IRRT source files). + * @param SizeT The runtime `size_t` type. + * @param id The ID of the exception to raise. + * @param msg A global constant C-string of the error message. + * + * `param0` to `param2` are optional format arguments of `msg`. They should be set to + * `NO_PARAM` to indicate they are unused. + */ +#define raise_exception(SizeT, id, msg, param0, param1, param2) \ + _raise_exception_helper(id, __FILE__, __LINE__, __FUNCTION__, msg, param0, param1, param2) +} // namespace \ No newline at end of file diff --git a/nac3core/src/codegen/irrt/mod.rs b/nac3core/src/codegen/irrt/mod.rs index 91e62e94..9628392b 100644 --- a/nac3core/src/codegen/irrt/mod.rs +++ b/nac3core/src/codegen/irrt/mod.rs @@ -1,4 +1,4 @@ -use crate::typecheck::typedef::Type; +use crate::{symbol_resolver::SymbolResolver, typecheck::typedef::Type}; use super::{ classes::{ @@ -15,14 +15,14 @@ use inkwell::{ memory_buffer::MemoryBuffer, module::Module, types::{BasicTypeEnum, IntType}, - values::{BasicValueEnum, CallSiteValue, FloatValue, IntValue}, + values::{BasicValue, BasicValueEnum, CallSiteValue, FloatValue, IntValue}, AddressSpace, IntPredicate, }; use itertools::Either; use nac3parser::ast::Expr; #[must_use] -pub fn load_irrt(ctx: &Context) -> Module { +pub fn load_irrt<'ctx>(ctx: &'ctx Context, symbol_resolver: &dyn SymbolResolver) -> Module<'ctx> { let bitcode_buf = MemoryBuffer::create_from_memory_range( include_bytes!(concat!(env!("OUT_DIR"), "/irrt.bc")), "irrt_bitcode_buffer", @@ -38,6 +38,25 @@ pub fn load_irrt(ctx: &Context) -> Module { let function = irrt_mod.get_function(symbol).unwrap(); function.add_attribute(AttributeLoc::Function, ctx.create_enum_attribute(inline_attr, 0)); } + + // Initialize all global `EXN_*` exception IDs in IRRT with the [`SymbolResolver`]. + let exn_id_type = ctx.i32_type(); + let errors = &[ + ("EXN_INDEX_ERROR", "0:IndexError"), + ("EXN_VALUE_ERROR", "0:ValueError"), + ("EXN_ASSERTION_ERROR", "0:AssertionError"), + ("EXN_TYPE_ERROR", "0:TypeError"), + ]; + for (irrt_name, symbol_name) in errors { + let exn_id = symbol_resolver.get_string_id(symbol_name); + let exn_id = exn_id_type.const_int(exn_id as u64, false).as_basic_value_enum(); + + let global = irrt_mod.get_global(irrt_name).unwrap_or_else(|| { + panic!("Exception symbol name '{irrt_name}' should exist in the IRRT LLVM module") + }); + global.set_initializer(&exn_id); + } + irrt_mod } diff --git a/nac3standalone/src/main.rs b/nac3standalone/src/main.rs index cc4811c1..17a5d15e 100644 --- a/nac3standalone/src/main.rs +++ b/nac3standalone/src/main.rs @@ -314,6 +314,15 @@ fn main() { let resolver = Arc::new(Resolver(internal_resolver.clone())) as Arc; + let context = inkwell::context::Context::create(); + + // Process IRRT + let irrt = load_irrt(&context, resolver.as_ref()); + if emit_llvm { + irrt.write_bitcode_to_path(Path::new("irrt.bc")); + } + + // Process the Python script let parser_result = parser::parse_program(&program, file_name.into()).unwrap(); for stmt in parser_result { @@ -418,8 +427,8 @@ fn main() { registry.add_task(task); registry.wait_tasks_complete(handles); + // Link all modules together into `main` let buffers = membuffers.lock(); - let context = inkwell::context::Context::create(); let main = context .create_module_from_ir(MemoryBuffer::create_from_memory_range(&buffers[0], "main")) .unwrap(); @@ -439,12 +448,9 @@ fn main() { main.link_in_module(other).unwrap(); } - let irrt = load_irrt(&context); - if emit_llvm { - irrt.write_bitcode_to_path(Path::new("irrt.bc")); - } main.link_in_module(irrt).unwrap(); + // Private all functions except "run" let mut function_iter = main.get_first_function(); while let Some(func) = function_iter { if func.count_basic_blocks() > 0 && func.get_name().to_str().unwrap() != "run" { @@ -453,6 +459,7 @@ fn main() { function_iter = func.get_next_function(); } + // Optimize `main` let target_machine = llvm_options .target .create_target_machine(llvm_options.opt_level) @@ -466,6 +473,7 @@ fn main() { panic!("Failed to run optimization for module `main`: {}", err.to_string()); } + // Write output target_machine .write_to_file(&main, FileType::Object, Path::new("module.o")) .expect("couldn't write module to file"); -- 2.44.1 From 048950b7f06f5a923097b2b99288970610b7eece Mon Sep 17 00:00:00 2001 From: lyken Date: Tue, 27 Aug 2024 10:37:39 +0800 Subject: [PATCH 9/9] standalone: reformat demo.c --- nac3standalone/demo/demo.c | 43 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/nac3standalone/demo/demo.c b/nac3standalone/demo/demo.c index d07550f0..e2fcca57 100644 --- a/nac3standalone/demo/demo.c +++ b/nac3standalone/demo/demo.c @@ -7,11 +7,11 @@ #include double dbl_nan(void) { - return NAN; + return NAN; } double dbl_inf(void) { - return INFINITY; + return INFINITY; } void output_bool(bool x) { @@ -19,19 +19,19 @@ void output_bool(bool x) { } void output_int32(int32_t x) { - printf("%"PRId32"\n", x); + printf("%" PRId32 "\n", x); } void output_int64(int64_t x) { - printf("%"PRId64"\n", x); + printf("%" PRId64 "\n", x); } void output_uint32(uint32_t x) { - printf("%"PRIu32"\n", x); + printf("%" PRIu32 "\n", x); } void output_uint64(uint64_t x) { - printf("%"PRIu64"\n", x); + printf("%" PRIu64 "\n", x); } void output_float64(double x) { @@ -52,7 +52,7 @@ void output_range(int32_t range[3]) { } void output_asciiart(int32_t x) { - static const char *chars = " .,-:;i+hHM$*#@ "; + static const char* chars = " .,-:;i+hHM$*#@ "; if (x < 0) { putchar('\n'); } else { @@ -61,12 +61,12 @@ void output_asciiart(int32_t x) { } struct cslice { - void *data; + void* data; size_t len; }; -void output_int32_list(struct cslice *slice) { - const int32_t *data = (int32_t *) slice->data; +void output_int32_list(struct cslice* slice) { + const int32_t* data = reinterpret_cast(slice->data); putchar('['); for (size_t i = 0; i < slice->len; ++i) { @@ -80,23 +80,23 @@ void output_int32_list(struct cslice *slice) { putchar('\n'); } -void output_str(struct cslice *slice) { - const char *data = (const char *) slice->data; +void output_str(struct cslice* slice) { + const char* data = reinterpret_cast(slice->data); for (size_t i = 0; i < slice->len; ++i) { putchar(data[i]); } } -void output_strln(struct cslice *slice) { +void output_strln(struct cslice* slice) { output_str(slice); putchar('\n'); } -uint64_t dbg_stack_address(__attribute__((unused)) struct cslice *slice) { +uint64_t dbg_stack_address(__attribute__((unused)) struct cslice* slice) { int i; - void *ptr = (void *) &i; - return (uintptr_t) ptr; + void* ptr = static_cast(&i); + return (uintptr_t)ptr; } uint32_t __nac3_personality(uint32_t state, uint32_t exception_object, uint32_t context) { @@ -119,11 +119,12 @@ struct Exception { uint32_t __nac3_raise(struct Exception* e) { printf("__nac3_raise called. Exception details:\n"); - printf(" ID: %"PRIu32"\n", e->id); - printf(" Location: %*s:%"PRIu32":%"PRIu32"\n" , (int) e->file.len, (const char*) e->file.data, e->line, e->column); - printf(" Function: %*s\n" , (int) e->function.len, (const char*) e->function.data); - printf(" Message: \"%*s\"\n" , (int) e->message.len, (const char*) e->message.data); - printf(" Params: {0}=%"PRId64", {1}=%"PRId64", {2}=%"PRId64"\n", e->param[0], e->param[1], e->param[2]); + printf(" ID: %" PRIu32 "\n", e->id); + printf(" Location: %*s:%" PRIu32 ":%" PRIu32 "\n", static_cast(e->file.len), + reinterpret_cast(e->file.data), e->line, e->column); + printf(" Function: %*s\n", static_cast(e->function.len), reinterpret_cast(e->function.data)); + printf(" Message: \"%*s\"\n", static_cast(e->message.len), reinterpret_cast(e->message.data)); + printf(" Params: {0}=%" PRId64 ", {1}=%" PRId64 ", {2}=%" PRId64 "\n", e->param[0], e->param[1], e->param[2]); exit(101); __builtin_unreachable(); } -- 2.44.1