diff --git a/nac3core/src/codegen/irrt/irrt.cpp b/nac3core/src/codegen/irrt/irrt.cpp index 45db12af..6032518d 100644 --- a/nac3core/src/codegen/irrt/irrt.cpp +++ b/nac3core/src/codegen/irrt/irrt.cpp @@ -1,29 +1,30 @@ -typedef _BitInt(8) int8_t; -typedef unsigned _BitInt(8) uint8_t; -typedef _BitInt(32) int32_t; -typedef unsigned _BitInt(32) uint32_t; -typedef _BitInt(64) int64_t; -typedef unsigned _BitInt(64) uint64_t; +using int8_t = _BitInt(8); +using uint8_t = unsigned _BitInt(8); +using int32_t = _BitInt(32); +using uint32_t = unsigned _BitInt(32); +using int64_t = _BitInt(64); +using uint64_t = unsigned _BitInt(64); // NDArray indices are always `uint32_t`. -typedef uint32_t NDIndex; +using NDIndex = uint32_t; // The type of an index or a value describing the length of a range/slice is always `int32_t`. -typedef int32_t SliceIndex; +using SliceIndex = int32_t; +namespace { template -static T max(T a, T b) { +const T& max(const T& a, const T& b) { return a > b ? a : b; } template -static T min(T a, T b) { +const T& min(const T& a, const T& b) { return a > b ? b : a; } // adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c // need to make sure `exp >= 0` before calling this function template -static T __nac3_int_exp_impl(T base, T exp) { +T __nac3_int_exp_impl(T base, T exp) { T res = 1; /* repeated squaring method */ do { @@ -37,8 +38,8 @@ static T __nac3_int_exp_impl(T base, T exp) { } template -static SizeT __nac3_ndarray_calc_size_impl( - const SizeT *list_data, +SizeT __nac3_ndarray_calc_size_impl( + const SizeT* list_data, SizeT list_len, SizeT begin_idx, SizeT end_idx @@ -55,11 +56,11 @@ static SizeT __nac3_ndarray_calc_size_impl( } template -static void __nac3_ndarray_calc_nd_indices_impl( +void __nac3_ndarray_calc_nd_indices_impl( SizeT index, - const SizeT *dims, + const SizeT* dims, SizeT num_dims, - NDIndex *idxs + NDIndex* idxs ) { SizeT stride = 1; for (SizeT dim = 0; dim < num_dims; dim++) { @@ -71,10 +72,10 @@ static void __nac3_ndarray_calc_nd_indices_impl( } template -static SizeT __nac3_ndarray_flatten_index_impl( - const SizeT *dims, +SizeT __nac3_ndarray_flatten_index_impl( + const SizeT* dims, SizeT num_dims, - const NDIndex *indices, + const NDIndex* indices, SizeT num_indices ) { SizeT idx = 0; @@ -92,19 +93,19 @@ static SizeT __nac3_ndarray_flatten_index_impl( } template -static void __nac3_ndarray_calc_broadcast_impl( - const SizeT *lhs_dims, +void __nac3_ndarray_calc_broadcast_impl( + const SizeT* lhs_dims, SizeT lhs_ndims, - const SizeT *rhs_dims, + const SizeT* rhs_dims, SizeT rhs_ndims, - SizeT *out_dims + SizeT* out_dims ) { SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; for (SizeT i = 0; i < max_ndims; ++i) { - const SizeT *lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; - const SizeT *rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; - SizeT *out_dim = &out_dims[max_ndims - i - 1]; + const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; + const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; + SizeT* out_dim = &out_dims[max_ndims - i - 1]; if (lhs_dim_sz == nullptr) { *out_dim = *rhs_dim_sz; @@ -123,290 +124,291 @@ static void __nac3_ndarray_calc_broadcast_impl( } template -static void __nac3_ndarray_calc_broadcast_idx_impl( - const SizeT *src_dims, +void __nac3_ndarray_calc_broadcast_idx_impl( + const SizeT* src_dims, SizeT src_ndims, - const NDIndex *in_idx, - NDIndex *out_idx + const NDIndex* in_idx, + NDIndex* out_idx ) { for (SizeT i = 0; i < src_ndims; ++i) { SizeT src_i = src_ndims - i - 1; out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; } } +} // namespace extern "C" { - #define DEF_nac3_int_exp_(T) \ - T __nac3_int_exp_##T(T base, T exp) {\ - return __nac3_int_exp_impl(base, exp);\ - } - - DEF_nac3_int_exp_(int32_t) - DEF_nac3_int_exp_(int64_t) - DEF_nac3_int_exp_(uint32_t) - DEF_nac3_int_exp_(uint64_t) - - SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { - if (i < 0) { - i = len + i; - } - if (i < 0) { - return 0; - } else if (i > len) { - return len; - } - return i; +#define DEF_nac3_int_exp_(T) \ + T __nac3_int_exp_##T(T base, T exp) {\ + return __nac3_int_exp_impl(base, exp);\ } - SliceIndex __nac3_range_slice_len( - const SliceIndex start, - const SliceIndex end, - const SliceIndex step - ) { - SliceIndex diff = end - start; - if (diff > 0 && step > 0) { - return ((diff - 1) / step) + 1; - } else if (diff < 0 && step < 0) { - return ((diff + 1) / step) + 1; - } else { - return 0; - } - } +DEF_nac3_int_exp_(int32_t) +DEF_nac3_int_exp_(int64_t) +DEF_nac3_int_exp_(uint32_t) +DEF_nac3_int_exp_(uint64_t) - // Handle list assignment and dropping part of the list when - // both dest_step and src_step are +1. - // - All the index must *not* be out-of-bound or negative, - // - The end index is *inclusive*, - // - The length of src and dest slice size should already - // be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) - SliceIndex __nac3_list_slice_assign_var_size( - SliceIndex dest_start, - SliceIndex dest_end, - SliceIndex dest_step, - uint8_t *dest_arr, - SliceIndex dest_arr_len, - SliceIndex src_start, - SliceIndex src_end, - SliceIndex src_step, - uint8_t *src_arr, - SliceIndex src_arr_len, - const SliceIndex size - ) { - /* if dest_arr_len == 0, do nothing since we do not support extending list */ - if (dest_arr_len == 0) return dest_arr_len; - /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ - if (src_step == dest_step && dest_step == 1) { - const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; - const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; - if (src_len > 0) { - __builtin_memmove( - dest_arr + dest_start * size, - src_arr + src_start * size, - src_len * size - ); - } - if (dest_len > 0) { - /* dropping */ - __builtin_memmove( - dest_arr + (dest_start + src_len) * size, - dest_arr + (dest_end + 1) * size, - (dest_arr_len - dest_end - 1) * size - ); - } - /* shrink size */ - return dest_arr_len - (dest_len - src_len); - } - /* if two range overlaps, need alloca */ - uint8_t need_alloca = - (dest_arr == src_arr) - && !( - max(dest_start, dest_end) < min(src_start, src_end) - || max(src_start, src_end) < min(dest_start, dest_end) - ); - if (need_alloca) { - uint8_t *tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); - __builtin_memcpy(tmp, src_arr, src_arr_len * size); - src_arr = tmp; - } - SliceIndex src_ind = src_start; - SliceIndex dest_ind = dest_start; - for (; - (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); - src_ind += src_step, dest_ind += dest_step - ) { - /* for constant optimization */ - if (size == 1) { - __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); - } else if (size == 4) { - __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); - } else if (size == 8) { - __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); - } else { - /* memcpy for var size, cannot overlap after previous alloca */ - __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); - } - } - /* only dest_step == 1 can we shrink the dest list. */ - /* size should be ensured prior to calling this function */ - if (dest_step == 1 && dest_end >= dest_start) { +SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { + if (i < 0) { + i = len + i; + } + if (i < 0) { + return 0; + } else if (i > len) { + return len; + } + return i; +} + +SliceIndex __nac3_range_slice_len( + const SliceIndex start, + const SliceIndex end, + const SliceIndex step +) { + SliceIndex diff = end - start; + if (diff > 0 && step > 0) { + return ((diff - 1) / step) + 1; + } else if (diff < 0 && step < 0) { + return ((diff + 1) / step) + 1; + } else { + return 0; + } +} + +// Handle list assignment and dropping part of the list when +// both dest_step and src_step are +1. +// - All the index must *not* be out-of-bound or negative, +// - The end index is *inclusive*, +// - The length of src and dest slice size should already +// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) +SliceIndex __nac3_list_slice_assign_var_size( + SliceIndex dest_start, + SliceIndex dest_end, + SliceIndex dest_step, + uint8_t* dest_arr, + SliceIndex dest_arr_len, + SliceIndex src_start, + SliceIndex src_end, + SliceIndex src_step, + uint8_t* src_arr, + SliceIndex src_arr_len, + const SliceIndex size +) { + /* if dest_arr_len == 0, do nothing since we do not support extending list */ + if (dest_arr_len == 0) return dest_arr_len; + /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ + if (src_step == dest_step && dest_step == 1) { + const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; + const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; + if (src_len > 0) { __builtin_memmove( - dest_arr + dest_ind * size, + dest_arr + dest_start * size, + src_arr + src_start * size, + src_len * size + ); + } + if (dest_len > 0) { + /* dropping */ + __builtin_memmove( + dest_arr + (dest_start + src_len) * size, dest_arr + (dest_end + 1) * size, (dest_arr_len - dest_end - 1) * size ); - return dest_arr_len - (dest_end - dest_ind) - 1; } - return dest_arr_len; + /* shrink size */ + return dest_arr_len - (dest_len - src_len); } - - int32_t __nac3_isinf(double x) { - return __builtin_isinf(x); + /* if two range overlaps, need alloca */ + uint8_t need_alloca = + (dest_arr == src_arr) + && !( + max(dest_start, dest_end) < min(src_start, src_end) + || max(src_start, src_end) < min(dest_start, dest_end) + ); + if (need_alloca) { + uint8_t* tmp = reinterpret_cast(__builtin_alloca(src_arr_len * size)); + __builtin_memcpy(tmp, src_arr, src_arr_len * size); + src_arr = tmp; } - - int32_t __nac3_isnan(double x) { - return __builtin_isnan(x); - } - - double tgamma(double arg); - - double __nac3_gamma(double z) { - // Handling for denormals - // | x | Python gamma(x) | C tgamma(x) | - // --- | ----------------- | --------------- | ----------- | - // (1) | nan | nan | nan | - // (2) | -inf | -inf | inf | - // (3) | inf | inf | inf | - // (4) | 0.0 | inf | inf | - // (5) | {-1.0, -2.0, ...} | inf | nan | - - // (1)-(3) - if (__builtin_isinf(z) || __builtin_isnan(z)) { - return z; + SliceIndex src_ind = src_start; + SliceIndex dest_ind = dest_start; + for (; + (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); + src_ind += src_step, dest_ind += dest_step + ) { + /* for constant optimization */ + if (size == 1) { + __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); + } else if (size == 4) { + __builtin_memcpy(dest_arr + dest_ind * 4, src_arr + src_ind * 4, 4); + } else if (size == 8) { + __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); + } else { + /* memcpy for var size, cannot overlap after previous alloca */ + __builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); } + } + /* only dest_step == 1 can we shrink the dest list. */ + /* size should be ensured prior to calling this function */ + if (dest_step == 1 && dest_end >= dest_start) { + __builtin_memmove( + dest_arr + dest_ind * size, + dest_arr + (dest_end + 1) * size, + (dest_arr_len - dest_end - 1) * size + ); + return dest_arr_len - (dest_end - dest_ind) - 1; + } + return dest_arr_len; +} - double v = tgamma(z); +int32_t __nac3_isinf(double x) { + return __builtin_isinf(x); +} - // (4)-(5) - return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; +int32_t __nac3_isnan(double x) { + return __builtin_isnan(x); +} + +double tgamma(double arg); + +double __nac3_gamma(double z) { + // Handling for denormals + // | x | Python gamma(x) | C tgamma(x) | + // --- | ----------------- | --------------- | ----------- | + // (1) | nan | nan | nan | + // (2) | -inf | -inf | inf | + // (3) | inf | inf | inf | + // (4) | 0.0 | inf | inf | + // (5) | {-1.0, -2.0, ...} | inf | nan | + + // (1)-(3) + if (__builtin_isinf(z) || __builtin_isnan(z)) { + return z; } - double lgamma(double arg); + double v = tgamma(z); - double __nac3_gammaln(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: gammaln(-inf) -> -inf - // - libm : lgamma(-inf) -> inf + // (4)-(5) + return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; +} - if (__builtin_isinf(x)) { - return x; - } +double lgamma(double arg); - return lgamma(x); +double __nac3_gammaln(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: gammaln(-inf) -> -inf + // - libm : lgamma(-inf) -> inf + + if (__builtin_isinf(x)) { + return x; } - double j0(double x); + return lgamma(x); +} - double __nac3_j0(double x) { - // libm's handling of value overflows differs from scipy: - // - scipy: j0(inf) -> nan - // - libm : j0(inf) -> 0.0 +double j0(double x); - if (__builtin_isinf(x)) { - return __builtin_nan(""); - } +double __nac3_j0(double x) { + // libm's handling of value overflows differs from scipy: + // - scipy: j0(inf) -> nan + // - libm : j0(inf) -> 0.0 - return j0(x); + if (__builtin_isinf(x)) { + return __builtin_nan(""); } - uint32_t __nac3_ndarray_calc_size( - const uint32_t *list_data, - uint32_t list_len, - uint32_t begin_idx, - uint32_t end_idx - ) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); - } + return j0(x); +} - uint64_t __nac3_ndarray_calc_size64( - const uint64_t *list_data, - uint64_t list_len, - uint64_t begin_idx, - uint64_t end_idx - ) { - return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); - } +uint32_t __nac3_ndarray_calc_size( + const uint32_t* list_data, + uint32_t list_len, + uint32_t begin_idx, + uint32_t end_idx +) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); +} - void __nac3_ndarray_calc_nd_indices( - uint32_t index, - const uint32_t* dims, - uint32_t num_dims, - NDIndex* idxs - ) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); - } +uint64_t __nac3_ndarray_calc_size64( + const uint64_t* list_data, + uint64_t list_len, + uint64_t begin_idx, + uint64_t end_idx +) { + return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); +} - void __nac3_ndarray_calc_nd_indices64( - uint64_t index, - const uint64_t* dims, - uint64_t num_dims, - NDIndex* idxs - ) { - __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); - } +void __nac3_ndarray_calc_nd_indices( + uint32_t index, + const uint32_t* dims, + uint32_t num_dims, + NDIndex* idxs +) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); +} - uint32_t __nac3_ndarray_flatten_index( - const uint32_t* dims, - uint32_t num_dims, - const NDIndex* indices, - uint32_t num_indices - ) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); - } +void __nac3_ndarray_calc_nd_indices64( + uint64_t index, + const uint64_t* dims, + uint64_t num_dims, + NDIndex* idxs +) { + __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); +} - uint64_t __nac3_ndarray_flatten_index64( - const uint64_t* dims, - uint64_t num_dims, - const NDIndex* indices, - uint64_t num_indices - ) { - return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); - } +uint32_t __nac3_ndarray_flatten_index( + const uint32_t* dims, + uint32_t num_dims, + const NDIndex* indices, + uint32_t num_indices +) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); +} - void __nac3_ndarray_calc_broadcast( - const uint32_t *lhs_dims, - uint32_t lhs_ndims, - const uint32_t *rhs_dims, - uint32_t rhs_ndims, - uint32_t *out_dims - ) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); - } +uint64_t __nac3_ndarray_flatten_index64( + const uint64_t* dims, + uint64_t num_dims, + const NDIndex* indices, + uint64_t num_indices +) { + return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); +} - void __nac3_ndarray_calc_broadcast64( - const uint64_t *lhs_dims, - uint64_t lhs_ndims, - const uint64_t *rhs_dims, - uint64_t rhs_ndims, - uint64_t *out_dims - ) { - return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); - } +void __nac3_ndarray_calc_broadcast( + const uint32_t* lhs_dims, + uint32_t lhs_ndims, + const uint32_t* rhs_dims, + uint32_t rhs_ndims, + uint32_t* out_dims +) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); +} - void __nac3_ndarray_calc_broadcast_idx( - const uint32_t *src_dims, - uint32_t src_ndims, - const NDIndex *in_idx, - NDIndex *out_idx - ) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); - } +void __nac3_ndarray_calc_broadcast64( + const uint64_t* lhs_dims, + uint64_t lhs_ndims, + const uint64_t* rhs_dims, + uint64_t rhs_ndims, + uint64_t* out_dims +) { + return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims); +} - void __nac3_ndarray_calc_broadcast_idx64( - const uint64_t *src_dims, - uint64_t src_ndims, - const NDIndex *in_idx, - NDIndex *out_idx - ) { - __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); - } -} \ No newline at end of file +void __nac3_ndarray_calc_broadcast_idx( + const uint32_t* src_dims, + uint32_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx +) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); +} + +void __nac3_ndarray_calc_broadcast_idx64( + const uint64_t* src_dims, + uint64_t src_ndims, + const NDIndex* in_idx, + NDIndex* out_idx +) { + __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx); +} +} // extern "C" \ No newline at end of file