diff --git a/flake.nix b/flake.nix index 4febca24..a6ce5fce 100644 --- a/flake.nix +++ b/flake.nix @@ -13,6 +13,7 @@ '' mkdir -p $out/bin ln -s ${pkgs.llvmPackages_14.clang-unwrapped}/bin/clang $out/bin/clang-irrt + ln -s ${pkgs.llvmPackages_14.clang}/bin/clang $out/bin/clang-irrt-test ln -s ${pkgs.llvmPackages_14.llvm.out}/bin/llvm-as $out/bin/llvm-as-irrt ''; nac3artiq = pkgs.python3Packages.toPythonModule ( @@ -23,6 +24,7 @@ cargoLock = { lockFile = ./Cargo.lock; }; + cargoTestFlags = [ "--features" "test" ]; passthru.cargoLock = cargoLock; nativeBuildInputs = [ pkgs.python3 pkgs.llvmPackages_14.clang llvm-tools-irrt pkgs.llvmPackages_14.llvm.out llvm-nac3 ]; buildInputs = [ pkgs.python3 llvm-nac3 ]; diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 724e0c8c..6e66f440 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -1,3 +1,6 @@ +[features] +test = [] + [package] name = "nac3core" version = "0.1.0" diff --git a/nac3core/build.rs b/nac3core/build.rs index 38e3382f..59dd26fa 100644 --- a/nac3core/build.rs +++ b/nac3core/build.rs @@ -7,8 +7,8 @@ use std::{ process::{Command, Stdio}, }; -fn main() { - const FILE: &str = "src/codegen/irrt/irrt.cpp"; +fn compile_irrt(irrt_dir: &Path, out_dir: &Path) { + let irrt_cpp_path = irrt_dir.join("irrt.cpp"); /* * HACK: Sadly, clang doesn't let us emit generic LLVM bitcode. @@ -16,7 +16,7 @@ fn main() { */ let flags: &[&str] = &[ "--target=wasm32", - FILE, + irrt_cpp_path.to_str().unwrap(), "-x", "c++", "-fno-discard-value-names", @@ -31,13 +31,13 @@ fn main() { "-S", "-Wall", "-Wextra", + "-I", + irrt_dir.to_str().unwrap(), "-o", "-", ]; - println!("cargo:rerun-if-changed={FILE}"); - let out_dir = env::var("OUT_DIR").unwrap(); - let out_path = Path::new(&out_dir); + println!("cargo:rerun-if-changed={path}", path = irrt_cpp_path.to_str().unwrap()); let output = Command::new("clang-irrt") .args(flags) @@ -65,18 +65,64 @@ fn main() { println!("cargo:rerun-if-env-changed=DEBUG_DUMP_IRRT"); if env::var("DEBUG_DUMP_IRRT").is_ok() { - let mut file = File::create(out_path.join("irrt.ll")).unwrap(); + let mut file = File::create(out_dir.join("irrt.ll")).unwrap(); file.write_all(output.as_bytes()).unwrap(); - let mut file = File::create(out_path.join("irrt-filtered.ll")).unwrap(); + let mut file = File::create(out_dir.join("irrt-filtered.ll")).unwrap(); file.write_all(filtered_output.as_bytes()).unwrap(); } let mut llvm_as = Command::new("llvm-as-irrt") .stdin(Stdio::piped()) .arg("-o") - .arg(out_path.join("irrt.bc")) + .arg(out_dir.join("irrt.bc")) .spawn() .unwrap(); llvm_as.stdin.as_mut().unwrap().write_all(filtered_output.as_bytes()).unwrap(); assert!(llvm_as.wait().unwrap().success()); } + +fn compile_irrt_test(irrt_dir: &Path, out_dir: &Path) { + let irrt_test_cpp_path = irrt_dir.join("irrt_test.cpp"); + let exe_path = out_dir.join("irrt_test.out"); + + let flags: &[&str] = &[ + irrt_test_cpp_path.to_str().unwrap(), + "-x", + "c++", + "-I", + irrt_dir.to_str().unwrap(), + "-g", + "-fno-discard-value-names", + "-O0", + "-Wall", + "-Wextra", + "-lm", // for `tgamma()`, `lgamma()` + "-o", + exe_path.to_str().unwrap(), + ]; + + Command::new("clang-irrt-test") + .args(flags) + .output() + .map(|o| { + assert!(o.status.success(), "{}", std::str::from_utf8(&o.stderr).unwrap()); + o + }) + .unwrap(); + println!("cargo:rerun-if-changed={path}", path = irrt_test_cpp_path.to_str().unwrap()); +} + +fn main() { + let out_dir = env::var("OUT_DIR").unwrap(); + let out_dir = Path::new(&out_dir); + + let irrt_dir = Path::new("src/codegen/irrt"); + + compile_irrt(irrt_dir, out_dir); + + // https://github.com/rust-lang/cargo/issues/2549 + // `cargo test -F test` to also build `irrt_test.cpp + if cfg!(feature = "test") { + compile_irrt_test(irrt_dir, out_dir); + } +} diff --git a/nac3core/src/codegen/irrt/irrt.cpp b/nac3core/src/codegen/irrt/irrt.cpp index 2382663e..4cde95b7 100644 --- a/nac3core/src/codegen/irrt/irrt.cpp +++ b/nac3core/src/codegen/irrt/irrt.cpp @@ -1,428 +1,3 @@ -using int8_t = _BitInt(8); -using uint8_t = unsigned _BitInt(8); -using int32_t = _BitInt(32); -using uint32_t = unsigned _BitInt(32); -using int64_t = _BitInt(64); -using uint64_t = unsigned _BitInt(64); +#include "irrt.hpp" -// NDArray indices are always `uint32_t`. -using NDIndex = uint32_t; -// The type of an index or a value describing the length of a range/slice is always `int32_t`. -using SliceIndex = int32_t; - -namespace { -template -const T& max(const T& a, const T& b) { - return a > b ? a : b; -} - -template -const T& min(const T& a, const T& b) { - return a > b ? b : a; -} - -// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c -// need to make sure `exp >= 0` before calling this function -template -T __nac3_int_exp_impl(T base, T exp) { - T res = 1; - /* repeated squaring method */ - do { - if (exp & 1) { - res *= base; /* for n odd */ - } - exp >>= 1; - base *= base; - } while (exp); - return res; -} - -template -SizeT __nac3_ndarray_calc_size_impl( - const SizeT* list_data, - SizeT list_len, - SizeT begin_idx, - SizeT end_idx -) { - __builtin_assume(end_idx <= list_len); - - SizeT num_elems = 1; - for (SizeT i = begin_idx; i < end_idx; ++i) { - SizeT val = list_data[i]; - __builtin_assume(val > 0); - num_elems *= val; - } - return num_elems; -} - -template -void __nac3_ndarray_calc_nd_indices_impl( - SizeT index, - const SizeT* dims, - SizeT num_dims, - NDIndex* idxs -) { - SizeT stride = 1; - for (SizeT dim = 0; dim < num_dims; dim++) { - SizeT i = num_dims - dim - 1; - __builtin_assume(dims[i] > 0); - idxs[i] = (index / stride) % dims[i]; - stride *= dims[i]; - } -} - -template -SizeT __nac3_ndarray_flatten_index_impl( - const SizeT* dims, - SizeT num_dims, - const NDIndex* indices, - SizeT num_indices -) { - SizeT idx = 0; - SizeT stride = 1; - for (SizeT i = 0; i < num_dims; ++i) { - SizeT ri = num_dims - i - 1; - if (ri < num_indices) { - idx += stride * indices[ri]; - } - - __builtin_assume(dims[i] > 0); - stride *= dims[ri]; - } - return idx; -} - -template -void __nac3_ndarray_calc_broadcast_impl( - const SizeT* lhs_dims, - SizeT lhs_ndims, - const SizeT* rhs_dims, - SizeT rhs_ndims, - SizeT* out_dims -) { - SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; - - for (SizeT i = 0; i < max_ndims; ++i) { - const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; - const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; - SizeT* out_dim = &out_dims[max_ndims - i - 1]; - - if (lhs_dim_sz == nullptr) { - *out_dim = *rhs_dim_sz; - } else if (rhs_dim_sz == nullptr) { - *out_dim = *lhs_dim_sz; - } else if (*lhs_dim_sz == 1) { - *out_dim = *rhs_dim_sz; - } else if (*rhs_dim_sz == 1) { - *out_dim = *lhs_dim_sz; - } else if (*lhs_dim_sz == *rhs_dim_sz) { - *out_dim = *lhs_dim_sz; - } else { - __builtin_unreachable(); - } - } -} - -template -void __nac3_ndarray_calc_broadcast_idx_impl( - const SizeT* src_dims, - SizeT src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { - for (SizeT i = 0; i < src_ndims; ++i) { - SizeT src_i = src_ndims - i - 1; - out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; - } -} -} // namespace - -template -static void __nac3_ndarray_strides_from_shape_impl( - SizeT ndims, - SizeT *shape, - SizeT *dst_strides -) { - SizeT stride_product = 1; - for (SizeT i = 0; i < ndims; i++) { - int dim_i = ndims - i - 1; - dst_strides[dim_i] = stride_product; - stride_product *= shape[dim_i]; - } -} - -extern "C" { -#define DEF_nac3_int_exp_(T) \ - T __nac3_int_exp_##T(T base, T exp) {\ - return __nac3_int_exp_impl(base, exp);\ - } - -DEF_nac3_int_exp_(int32_t) -DEF_nac3_int_exp_(int64_t) -DEF_nac3_int_exp_(uint32_t) -DEF_nac3_int_exp_(uint64_t) - -SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { - if (i < 0) { - i = len + i; - } - if (i < 0) { - return 0; - } else if (i > len) { - return len; - } - return i; -} - -SliceIndex __nac3_range_slice_len( - const SliceIndex start, - const SliceIndex end, - const SliceIndex step -) { - SliceIndex diff = end - start; - if (diff > 0 && step > 0) { - return ((diff - 1) / step) + 1; - } else if (diff < 0 && step < 0) { - return ((diff + 1) / step) + 1; - } else { - return 0; - } -} - -// Handle list assignment and dropping part of the list when -// both dest_step and src_step are +1. -// - All the index must *not* be out-of-bound or negative, -// - The end index is *inclusive*, -// - The length of src and dest slice size should already -// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) -SliceIndex __nac3_list_slice_assign_var_size( - SliceIndex dest_start, - SliceIndex dest_end, - SliceIndex dest_step, - uint8_t* dest_arr, - SliceIndex dest_arr_len, - SliceIndex src_start, - SliceIndex src_end, - SliceIndex src_step, - uint8_t* src_arr, - SliceIndex src_arr_len, - const SliceIndex size -) { - /* if dest_arr_len == 0, do nothing since we do not support extending list */ - if (dest_arr_len == 0) return dest_arr_len; - /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ - if (src_step == dest_step && dest_step == 1) { - const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; - const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; - if (src_len > 0) { - __builtin_memmove( - dest_arr + dest_start * size, - src_arr + src_start * size, - src_len * size - ); - } - if (dest_len > 0) { - /* dropping */ - __builtin_memmove( - dest_arr + (dest_start + src_len) * size, - dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size - ); - } - /* shrink size */ - return dest_arr_len - (dest_len - src_len); - } - /* if two range overlaps, need alloca */ - uint8_t need_alloca = - (dest_arr == src_arr) - && !( - max(dest_start, dest_end) < min(src_start, src_end) - || max(src_start, src_end) < min(dest_start, dest_end) - ); - if (need_alloca) { - uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); - __builtin_memcpy(tmp, src_arr, src_arr_len * size); - src_arr = tmp; - } - SliceIndex src_ind = src_start; - SliceIndex dest_ind = dest_start; - for (; - (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); - src_ind += src_step, dest_ind += dest_step - ) { - /* for constant optimization */ - if (size == 1) { - __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); - } else if (size == 4) { - __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); - } else if (size == 8) { - __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); - } else { - /* memcpy for var size, cannot overlap after previous alloca */ - __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); - } - } - /* only dest_step == 1 can we shrink the dest list. */ - /* size should be ensured prior to calling this function */ - if (dest_step == 1 && dest_end >= dest_start) { - __builtin_memmove( - dest_arr + dest_ind * size, - dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size - ); - return dest_arr_len - (dest_end - dest_ind) - 1; - } - return dest_arr_len; -} - -int32_t __nac3_isinf(double x) { - return __builtin_isinf(x); -} - -int32_t __nac3_isnan(double x) { - return __builtin_isnan(x); -} - -double tgamma(double arg); - -double __nac3_gamma(double z) { - // Handling for denormals - // | x | Python gamma(x) | C tgamma(x) | - // --- | ----------------- | --------------- | ----------- | - // (1) | nan | nan | nan | - // (2) | -inf | -inf | inf | - // (3) | inf | inf | inf | - // (4) | 0.0 | inf | inf | - // (5) | {-1.0, -2.0, ...} | inf | nan | - - // (1)-(3) - if (__builtin_isinf(z) || __builtin_isnan(z)) { - return z; - } - - double v = tgamma(z); - - // (4)-(5) - return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; -} - -double lgamma(double arg); - -double __nac3_gammaln(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: gammaln(-inf) -> -inf - // - libm : lgamma(-inf) -> inf - - if (__builtin_isinf(x)) { - return x; - } - - return lgamma(x); -} - -double j0(double x); - -double __nac3_j0(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: j0(inf) -> nan - // - libm : j0(inf) -> 0.0 - - if (__builtin_isinf(x)) { - return __builtin_nan(""); - } - - return j0(x); -} - -uint32_t __nac3_ndarray_calc_size( - const uint32_t* list_data, - uint32_t list_len, - uint32_t begin_idx, - uint32_t end_idx -) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); -} - -uint64_t __nac3_ndarray_calc_size64( - const uint64_t* list_data, - uint64_t list_len, - uint64_t begin_idx, - uint64_t end_idx -) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); -} - -void __nac3_ndarray_calc_nd_indices( - uint32_t index, - const uint32_t* dims, - uint32_t num_dims, - NDIndex* idxs -) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); -} - -void __nac3_ndarray_calc_nd_indices64( - uint64_t index, - const uint64_t* dims, - uint64_t num_dims, - NDIndex* idxs -) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); -} - -uint32_t __nac3_ndarray_flatten_index( - const uint32_t* dims, - uint32_t num_dims, - const NDIndex* indices, - uint32_t num_indices -) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); -} - -uint64_t __nac3_ndarray_flatten_index64( - const uint64_t* dims, - uint64_t num_dims, - const NDIndex* indices, - uint64_t num_indices -) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); -} - -void __nac3_ndarray_calc_broadcast( - const uint32_t* lhs_dims, - uint32_t lhs_ndims, - const uint32_t* rhs_dims, - uint32_t rhs_ndims, - uint32_t* out_dims -) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); -} - -void __nac3_ndarray_calc_broadcast64( - const uint64_t* lhs_dims, - uint64_t lhs_ndims, - const uint64_t* rhs_dims, - uint64_t rhs_ndims, - uint64_t* out_dims -) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); -} - -void __nac3_ndarray_calc_broadcast_idx( - const uint32_t* src_dims, - uint32_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); -} - -void __nac3_ndarray_calc_broadcast_idx64( - const uint64_t* src_dims, - uint64_t src_ndims, - const NDIndex* in_idx, - NDIndex* out_idx -) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); -} -} // extern "C" \ No newline at end of file +// All the implementations are from `irrt.hpp` diff --git a/nac3core/src/codegen/irrt/irrt.hpp b/nac3core/src/codegen/irrt/irrt.hpp new file mode 100644 index 00000000..5327f685 --- /dev/null +++ b/nac3core/src/codegen/irrt/irrt.hpp @@ -0,0 +1,437 @@ +#ifndef IRRT_DONT_TYPEDEF_INTS +typedef _BitInt(8) int8_t; +typedef unsigned _BitInt(8) uint8_t; +typedef _BitInt(32) int32_t; +typedef unsigned _BitInt(32) uint32_t; +typedef _BitInt(64) int64_t; +typedef unsigned _BitInt(64) uint64_t; +#endif + +// NDArray indices are always `uint32_t`. +typedef uint32_t NDIndex; +// The type of an index or a value describing the length of a range/slice is +// always `int32_t`. +typedef int32_t SliceIndex; + +template +static T max(T a, T b) { + return a > b ? a : b; +} + +template +static T min(T a, T b) { + return a > b ? b : a; +} + +// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c +// need to make sure `exp >= 0` before calling this function +template +static T __nac3_int_exp_impl(T base, T exp) { + T res = 1; + /* repeated squaring method */ + do { + if (exp & 1) { + res *= base; /* for n odd */ + } + exp >>= 1; + base *= base; + } while (exp); + return res; +} + +template +static SizeT __nac3_ndarray_calc_size_impl( + const SizeT *list_data, + SizeT list_len, + SizeT begin_idx, + SizeT end_idx +) { + __builtin_assume(end_idx <= list_len); + + SizeT num_elems = 1; + for (SizeT i = begin_idx; i < end_idx; ++i) { + SizeT val = list_data[i]; + __builtin_assume(val > 0); + num_elems *= val; + } + return num_elems; +} + +template +static void __nac3_ndarray_calc_nd_indices_impl( + SizeT index, + const SizeT *dims, + SizeT num_dims, + NDIndex *idxs +) { + SizeT stride = 1; + for (SizeT dim = 0; dim < num_dims; dim++) { + SizeT i = num_dims - dim - 1; + __builtin_assume(dims[i] > 0); + idxs[i] = (index / stride) % dims[i]; + stride *= dims[i]; + } +} + +template +static SizeT __nac3_ndarray_flatten_index_impl( + const SizeT *dims, + SizeT num_dims, + const NDIndex *indices, + SizeT num_indices +) { + SizeT idx = 0; + SizeT stride = 1; + for (SizeT i = 0; i < num_dims; ++i) { + SizeT ri = num_dims - i - 1; + if (ri < num_indices) { + idx += stride * indices[ri]; + } + + __builtin_assume(dims[i] > 0); + stride *= dims[ri]; + } + return idx; +} + +template +static void __nac3_ndarray_calc_broadcast_impl( + const SizeT *lhs_dims, + SizeT lhs_ndims, + const SizeT *rhs_dims, + SizeT rhs_ndims, + SizeT *out_dims +) { + SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; + + for (SizeT i = 0; i < max_ndims; ++i) { + const SizeT *lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; + const SizeT *rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; + SizeT *out_dim = &out_dims[max_ndims - i - 1]; + + if (lhs_dim_sz == nullptr) { + *out_dim = *rhs_dim_sz; + } else if (rhs_dim_sz == nullptr) { + *out_dim = *lhs_dim_sz; + } else if (*lhs_dim_sz == 1) { + *out_dim = *rhs_dim_sz; + } else if (*rhs_dim_sz == 1) { + *out_dim = *lhs_dim_sz; + } else if (*lhs_dim_sz == *rhs_dim_sz) { + *out_dim = *lhs_dim_sz; + } else { + __builtin_unreachable(); + } + } +} + +template +static void __nac3_ndarray_calc_broadcast_idx_impl( + const SizeT *src_dims, + SizeT src_ndims, + const NDIndex *in_idx, + NDIndex *out_idx +) { + for (SizeT i = 0; i < src_ndims; ++i) { + SizeT src_i = src_ndims - i - 1; + out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; + } +} + +template +static void __nac3_ndarray_strides_from_shape_impl( + SizeT ndims, + SizeT *shape, + SizeT *dst_strides +) { + SizeT stride_product = 1; + for (SizeT i = 0; i < ndims; i++) { + int dim_i = ndims - i - 1; + dst_strides[dim_i] = stride_product; + stride_product *= shape[dim_i]; + } +} + +extern "C" { + #define DEF_nac3_int_exp_(T) \ + T __nac3_int_exp_##T(T base, T exp) {\ + return __nac3_int_exp_impl(base, exp);\ + } + + DEF_nac3_int_exp_(int32_t) + DEF_nac3_int_exp_(int64_t) + DEF_nac3_int_exp_(uint32_t) + DEF_nac3_int_exp_(uint64_t) + + SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { + if (i < 0) { + i = len + i; + } + if (i < 0) { + return 0; + } else if (i > len) { + return len; + } + return i; + } + + SliceIndex __nac3_range_slice_len( + const SliceIndex start, + const SliceIndex end, + const SliceIndex step + ) { + SliceIndex diff = end - start; + if (diff > 0 && step > 0) { + return ((diff - 1) / step) + 1; + } else if (diff < 0 && step < 0) { + return ((diff + 1) / step) + 1; + } else { + return 0; + } + } + + // Handle list assignment and dropping part of the list when + // both dest_step and src_step are +1. + // - All the index must *not* be out-of-bound or negative, + // - The end index is *inclusive*, + // - The length of src and dest slice size should already + // be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) + SliceIndex __nac3_list_slice_assign_var_size( + SliceIndex dest_start, + SliceIndex dest_end, + SliceIndex dest_step, + uint8_t *dest_arr, + SliceIndex dest_arr_len, + SliceIndex src_start, + SliceIndex src_end, + SliceIndex src_step, + uint8_t *src_arr, + SliceIndex src_arr_len, + const SliceIndex size + ) { + /* if dest_arr_len == 0, do nothing since we do not support extending list */ + if (dest_arr_len == 0) return dest_arr_len; + /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ + if (src_step == dest_step && dest_step == 1) { + const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; + const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; + if (src_len > 0) { + __builtin_memmove( + dest_arr + dest_start * size, + src_arr + src_start * size, + src_len * size + ); + } + if (dest_len > 0) { + /* dropping */ + __builtin_memmove( + dest_arr + (dest_start + src_len) * size, + dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size + ); + } + /* shrink size */ + return dest_arr_len - (dest_len - src_len); + } + /* if two range overlaps, need alloca */ + uint8_t need_alloca = + (dest_arr == src_arr) + && !( + max(dest_start, dest_end) < min(src_start, src_end) + || max(src_start, src_end) < min(dest_start, dest_end) + ); + if (need_alloca) { + uint8_t *tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); + __builtin_memcpy(tmp, src_arr, src_arr_len * size); + src_arr = tmp; + } + SliceIndex src_ind = src_start; + SliceIndex dest_ind = dest_start; + for (; + (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); + src_ind += src_step, dest_ind += dest_step + ) { + /* for constant optimization */ + if (size == 1) { + __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); + } else if (size == 4) { + __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); + } else if (size == 8) { + __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); + } else { + /* memcpy for var size, cannot overlap after previous alloca */ + __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); + } + } + /* only dest_step == 1 can we shrink the dest list. */ + /* size should be ensured prior to calling this function */ + if (dest_step == 1 && dest_end >= dest_start) { + __builtin_memmove( + dest_arr + dest_ind * size, + dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size + ); + return dest_arr_len - (dest_end - dest_ind) - 1; + } + return dest_arr_len; + } + + int32_t __nac3_isinf(double x) { + return __builtin_isinf(x); + } + + int32_t __nac3_isnan(double x) { + return __builtin_isnan(x); + } + + double tgamma(double arg); + + double __nac3_gamma(double z) { + // Handling for denormals + // | x | Python gamma(x) | C tgamma(x) | + // --- | ----------------- | --------------- | ----------- | + // (1) | nan | nan | nan | + // (2) | -inf | -inf | inf | + // (3) | inf | inf | inf | + // (4) | 0.0 | inf | inf | + // (5) | {-1.0, -2.0, ...} | inf | nan | + + // (1)-(3) + if (__builtin_isinf(z) || __builtin_isnan(z)) { + return z; + } + + double v = tgamma(z); + + // (4)-(5) + return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; + } + + double lgamma(double arg); + + double __nac3_gammaln(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: gammaln(-inf) -> -inf + // - libm : lgamma(-inf) -> inf + + if (__builtin_isinf(x)) { + return x; + } + + return lgamma(x); + } + + double j0(double x); + + double __nac3_j0(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: j0(inf) -> nan + // - libm : j0(inf) -> 0.0 + + if (__builtin_isinf(x)) { + return __builtin_nan(""); + } + + return j0(x); + } + + uint32_t __nac3_ndarray_calc_size( + const uint32_t *list_data, + uint32_t list_len, + uint32_t begin_idx, + uint32_t end_idx + ) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); + } + + uint64_t __nac3_ndarray_calc_size64( + const uint64_t *list_data, + uint64_t list_len, + uint64_t begin_idx, + uint64_t end_idx + ) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); + } + + void __nac3_ndarray_calc_nd_indices( + uint32_t index, + const uint32_t* dims, + uint32_t num_dims, + NDIndex* idxs + ) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); + } + + void __nac3_ndarray_calc_nd_indices64( + uint64_t index, + const uint64_t* dims, + uint64_t num_dims, + NDIndex* idxs + ) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); + } + + uint32_t __nac3_ndarray_flatten_index( + const uint32_t* dims, + uint32_t num_dims, + const NDIndex* indices, + uint32_t num_indices + ) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); + } + + uint64_t __nac3_ndarray_flatten_index64( + const uint64_t* dims, + uint64_t num_dims, + const NDIndex* indices, + uint64_t num_indices + ) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); + } + + void __nac3_ndarray_calc_broadcast( + const uint32_t *lhs_dims, + uint32_t lhs_ndims, + const uint32_t *rhs_dims, + uint32_t rhs_ndims, + uint32_t *out_dims + ) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); + } + + void __nac3_ndarray_calc_broadcast64( + const uint64_t *lhs_dims, + uint64_t lhs_ndims, + const uint64_t *rhs_dims, + uint64_t rhs_ndims, + uint64_t *out_dims + ) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); + } + + void __nac3_ndarray_calc_broadcast_idx( + const uint32_t *src_dims, + uint32_t src_ndims, + const NDIndex *in_idx, + NDIndex *out_idx + ) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); + } + + void __nac3_ndarray_calc_broadcast_idx64( + const uint64_t *src_dims, + uint64_t src_ndims, + const NDIndex *in_idx, + NDIndex *out_idx + ) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); + } + + void __nac3_ndarray_strides_from_shape(uint32_t ndims, uint32_t* shape, uint32_t* dst_strides) { + __nac3_ndarray_strides_from_shape_impl(ndims, shape, dst_strides); + } + + void __nac3_ndarray_strides_from_shape64(uint64_t ndims, uint64_t* shape, uint64_t* dst_strides) { + __nac3_ndarray_strides_from_shape_impl(ndims, shape, dst_strides); + } +} \ No newline at end of file diff --git a/nac3core/src/codegen/irrt/irrt_test.cpp b/nac3core/src/codegen/irrt/irrt_test.cpp new file mode 100644 index 00000000..edd3f5e6 --- /dev/null +++ b/nac3core/src/codegen/irrt/irrt_test.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +#define IRRT_DONT_TYPEDEF_INTS +#include "irrt.hpp" + +static void __test_fail(const char *file, int line) { + // NOTE: Try to make the location info follow a format that + // VSCode/other IDEs would recognize as a clickable URL. + printf("[!] test_fail() invoked at %s:%d", file, line); + exit(1); +} + +#define test_fail() __test_fail(__FILE__, __LINE__); + +template +bool arrays_match(int len, T *as, T *bs) { + for (int i = 0; i < len; i++) { + if (as[i] != bs[i]) return false; + } + return true; +} + +template +void debug_print_array(const char* format, int len, T *as) { + printf("["); + for (int i = 0; i < len; i++) { + if (i != 0) printf(", "); + printf(format, as[i]); + } + printf("]\n"); +} + +template +bool assert_arrays_match(const char *label, const char *format, int len, T *expected, T *got) { + auto match = arrays_match(len, expected, got); + + if (!match) { + printf("expected %s: ", label); + debug_print_array(format, len, expected); + printf("got %s: ", label); + debug_print_array(format, len, got); + } + + return match; +} + +static void test_strides_from_shape() { + const uint64_t ndims = 4; + uint64_t shape[ndims] = { 999, 3, 5, 7 }; + uint64_t strides[ndims] = { 0 }; + __nac3_ndarray_strides_from_shape64(ndims, shape, strides); + + uint64_t expected_strides[ndims] = { 3*5*7, 5*7, 7, 1 }; + if (!assert_arrays_match("strides", "%u", ndims, expected_strides, strides)) test_fail(); +} + +int main() { + test_strides_from_shape(); + return 0; +} \ No newline at end of file diff --git a/nac3core/src/codegen/irrt/mod.rs b/nac3core/src/codegen/irrt/mod.rs index 1109bef5..dd9edbfc 100644 --- a/nac3core/src/codegen/irrt/mod.rs +++ b/nac3core/src/codegen/irrt/mod.rs @@ -1,5 +1,7 @@ use crate::typecheck::typedef::Type; +mod test; + use super::{ classes::{ ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, diff --git a/nac3core/src/codegen/irrt/test.rs b/nac3core/src/codegen/irrt/test.rs new file mode 100644 index 00000000..9a91f41f --- /dev/null +++ b/nac3core/src/codegen/irrt/test.rs @@ -0,0 +1,26 @@ +#[cfg(test)] +mod tests { + use std::{path::Path, process::Command}; + + #[test] + fn run_irrt_test() { + assert!( + cfg!(feature = "test"), + "Please do `cargo test -F test` to compile `irrt_test.out` and run test" + ); + + let irrt_test_out_path = Path::new(concat!(env!("OUT_DIR"), "/irrt_test.out")); + let output = Command::new(irrt_test_out_path.to_str().unwrap()).output().unwrap(); + + if !output.status.success() { + eprintln!("irrt_test failed with status {}:", output.status); + eprintln!("====== stdout ======"); + eprintln!("{}", String::from_utf8(output.stdout).unwrap()); + eprintln!("====== stderr ======"); + eprintln!("{}", String::from_utf8(output.stderr).unwrap()); + eprintln!("===================="); + + panic!("irrt_test failed"); + } + } +} diff --git a/nix/windows/default.nix b/nix/windows/default.nix index c7709fd6..2580bb73 100644 --- a/nix/windows/default.nix +++ b/nix/windows/default.nix @@ -81,6 +81,7 @@ in rec { '' mkdir -p $out/bin ln -s ${llvm-nac3}/bin/clang.exe $out/bin/clang-irrt.exe + ln -s ${llvm-nac3}/bin/clang.exe $out/bin/clang-irrt-test.exe ln -s ${llvm-nac3}/bin/llvm-as.exe $out/bin/llvm-as-irrt.exe ''; nac3artiq = pkgs.rustPlatform.buildRustPackage {