forked from M-Labs/nac3
1
0
Fork 0

core/irrt: reformat

This commit is contained in:
lyken 2024-07-22 10:56:28 +08:00
parent 31d0fdd818
commit e4ba5e6411
1 changed files with 111 additions and 168 deletions

View File

@ -7,7 +7,8 @@ using uint64_t = unsigned _BitInt(64);
// NDArray indices are always `uint32_t`. // NDArray indices are always `uint32_t`.
using NDIndex = uint32_t; using NDIndex = uint32_t;
// The type of an index or a value describing the length of a range/slice is always `int32_t`. // The type of an index or a value describing the length of a range/slice is
// always `int32_t`.
using SliceIndex = int32_t; using SliceIndex = int32_t;
namespace { namespace {
@ -21,8 +22,9 @@ const T& min(const T& a, const T& b) {
return a > b ? b : a; return a > b ? b : a;
} }
// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c // adapted from GNU Scientific Library:
// need to make sure `exp >= 0` before calling this function // https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c need to make
// sure `exp >= 0` before calling this function
template <typename T> template <typename T>
T __nac3_int_exp_impl(T base, T exp) { T __nac3_int_exp_impl(T base, T exp) {
T res = 1; T res = 1;
@ -38,12 +40,8 @@ T __nac3_int_exp_impl(T base, T exp) {
} }
template <typename SizeT> template <typename SizeT>
SizeT __nac3_ndarray_calc_size_impl( SizeT __nac3_ndarray_calc_size_impl(const SizeT* list_data, SizeT list_len,
const SizeT* list_data, SizeT begin_idx, SizeT end_idx) {
SizeT list_len,
SizeT begin_idx,
SizeT end_idx
) {
__builtin_assume(end_idx <= list_len); __builtin_assume(end_idx <= list_len);
SizeT num_elems = 1; SizeT num_elems = 1;
@ -56,12 +54,8 @@ SizeT __nac3_ndarray_calc_size_impl(
} }
template <typename SizeT> template <typename SizeT>
void __nac3_ndarray_calc_nd_indices_impl( void __nac3_ndarray_calc_nd_indices_impl(SizeT index, const SizeT* dims,
SizeT index, SizeT num_dims, NDIndex* idxs) {
const SizeT* dims,
SizeT num_dims,
NDIndex* idxs
) {
SizeT stride = 1; SizeT stride = 1;
for (SizeT dim = 0; dim < num_dims; dim++) { for (SizeT dim = 0; dim < num_dims; dim++) {
SizeT i = num_dims - dim - 1; SizeT i = num_dims - dim - 1;
@ -72,12 +66,9 @@ void __nac3_ndarray_calc_nd_indices_impl(
} }
template <typename SizeT> template <typename SizeT>
SizeT __nac3_ndarray_flatten_index_impl( SizeT __nac3_ndarray_flatten_index_impl(const SizeT* dims, SizeT num_dims,
const SizeT* dims, const NDIndex* indices,
SizeT num_dims, SizeT num_indices) {
const NDIndex* indices,
SizeT num_indices
) {
SizeT idx = 0; SizeT idx = 0;
SizeT stride = 1; SizeT stride = 1;
for (SizeT i = 0; i < num_dims; ++i) { for (SizeT i = 0; i < num_dims; ++i) {
@ -93,18 +84,16 @@ SizeT __nac3_ndarray_flatten_index_impl(
} }
template <typename SizeT> template <typename SizeT>
void __nac3_ndarray_calc_broadcast_impl( void __nac3_ndarray_calc_broadcast_impl(const SizeT* lhs_dims, SizeT lhs_ndims,
const SizeT* lhs_dims, const SizeT* rhs_dims, SizeT rhs_ndims,
SizeT lhs_ndims, SizeT* out_dims) {
const SizeT* rhs_dims,
SizeT rhs_ndims,
SizeT* out_dims
) {
SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims;
for (SizeT i = 0; i < max_ndims; ++i) { for (SizeT i = 0; i < max_ndims; ++i) {
const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; const SizeT* lhs_dim_sz =
const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr;
const SizeT* rhs_dim_sz =
i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr;
SizeT* out_dim = &out_dims[max_ndims - i - 1]; SizeT* out_dim = &out_dims[max_ndims - i - 1];
if (lhs_dim_sz == nullptr) { if (lhs_dim_sz == nullptr) {
@ -124,12 +113,10 @@ void __nac3_ndarray_calc_broadcast_impl(
} }
template <typename SizeT> template <typename SizeT>
void __nac3_ndarray_calc_broadcast_idx_impl( void __nac3_ndarray_calc_broadcast_idx_impl(const SizeT* src_dims,
const SizeT* src_dims, SizeT src_ndims,
SizeT src_ndims, const NDIndex* in_idx,
const NDIndex* in_idx, NDIndex* out_idx) {
NDIndex* out_idx
) {
for (SizeT i = 0; i < src_ndims; ++i) { for (SizeT i = 0; i < src_ndims; ++i) {
SizeT src_i = src_ndims - i - 1; SizeT src_i = src_ndims - i - 1;
out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i];
@ -138,17 +125,16 @@ void __nac3_ndarray_calc_broadcast_idx_impl(
} // namespace } // namespace
extern "C" { extern "C" {
#define DEF_nac3_int_exp_(T) \ #define DEF_nac3_int_exp_(T) \
T __nac3_int_exp_##T(T base, T exp) {\ T __nac3_int_exp_##T(T base, T exp) { \
return __nac3_int_exp_impl(base, exp);\ return __nac3_int_exp_impl(base, exp); \
} }
DEF_nac3_int_exp_(int32_t) DEF_nac3_int_exp_(int32_t) DEF_nac3_int_exp_(int64_t)
DEF_nac3_int_exp_(int64_t) DEF_nac3_int_exp_(uint32_t) DEF_nac3_int_exp_(uint64_t)
DEF_nac3_int_exp_(uint32_t)
DEF_nac3_int_exp_(uint64_t)
SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { SliceIndex
__nac3_slice_index_bound(SliceIndex i, const SliceIndex len) {
if (i < 0) { if (i < 0) {
i = len + i; i = len + i;
} }
@ -160,11 +146,8 @@ SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) {
return i; return i;
} }
SliceIndex __nac3_range_slice_len( SliceIndex __nac3_range_slice_len(const SliceIndex start, const SliceIndex end,
const SliceIndex start, const SliceIndex step) {
const SliceIndex end,
const SliceIndex step
) {
SliceIndex diff = end - start; SliceIndex diff = end - start;
if (diff > 0 && step > 0) { if (diff > 0 && step > 0) {
return ((diff - 1) / step) + 1; return ((diff - 1) / step) + 1;
@ -180,62 +163,51 @@ SliceIndex __nac3_range_slice_len(
// - All the index must *not* be out-of-bound or negative, // - All the index must *not* be out-of-bound or negative,
// - The end index is *inclusive*, // - The end index is *inclusive*,
// - The length of src and dest slice size should already // - The length of src and dest slice size should already
// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) // be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) ==
// len(dest)
SliceIndex __nac3_list_slice_assign_var_size( SliceIndex __nac3_list_slice_assign_var_size(
SliceIndex dest_start, SliceIndex dest_start, SliceIndex dest_end, SliceIndex dest_step,
SliceIndex dest_end, uint8_t* dest_arr, SliceIndex dest_arr_len, SliceIndex src_start,
SliceIndex dest_step, SliceIndex src_end, SliceIndex src_step, uint8_t* src_arr,
uint8_t* dest_arr, SliceIndex src_arr_len, const SliceIndex size) {
SliceIndex dest_arr_len, /* if dest_arr_len == 0, do nothing since we do not support extending list
SliceIndex src_start, */
SliceIndex src_end,
SliceIndex src_step,
uint8_t* src_arr,
SliceIndex src_arr_len,
const SliceIndex size
) {
/* if dest_arr_len == 0, do nothing since we do not support extending list */
if (dest_arr_len == 0) return dest_arr_len; if (dest_arr_len == 0) return dest_arr_len;
/* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ /* if both step is 1, memmove directly, handle the dropping of the list, and
* shrink size */
if (src_step == dest_step && dest_step == 1) { if (src_step == dest_step && dest_step == 1) {
const SliceIndex src_len = (src_end >= src_start) ? (src_end - src_start + 1) : 0; const SliceIndex src_len =
const SliceIndex dest_len = (dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0; (src_end >= src_start) ? (src_end - src_start + 1) : 0;
const SliceIndex dest_len =
(dest_end >= dest_start) ? (dest_end - dest_start + 1) : 0;
if (src_len > 0) { if (src_len > 0) {
__builtin_memmove( __builtin_memmove(dest_arr + dest_start * size,
dest_arr + dest_start * size, src_arr + src_start * size, src_len * size);
src_arr + src_start * size,
src_len * size
);
} }
if (dest_len > 0) { if (dest_len > 0) {
/* dropping */ /* dropping */
__builtin_memmove( __builtin_memmove(dest_arr + (dest_start + src_len) * size,
dest_arr + (dest_start + src_len) * size, dest_arr + (dest_end + 1) * size,
dest_arr + (dest_end + 1) * size, (dest_arr_len - dest_end - 1) * size);
(dest_arr_len - dest_end - 1) * size
);
} }
/* shrink size */ /* shrink size */
return dest_arr_len - (dest_len - src_len); return dest_arr_len - (dest_len - src_len);
} }
/* if two range overlaps, need alloca */ /* if two range overlaps, need alloca */
uint8_t need_alloca = uint8_t need_alloca =
(dest_arr == src_arr) (dest_arr == src_arr) &&
&& !( !(max(dest_start, dest_end) < min(src_start, src_end) ||
max(dest_start, dest_end) < min(src_start, src_end) max(src_start, src_end) < min(dest_start, dest_end));
|| max(src_start, src_end) < min(dest_start, dest_end)
);
if (need_alloca) { if (need_alloca) {
uint8_t* tmp = reinterpret_cast<uint8_t *>(__builtin_alloca(src_arr_len * size)); uint8_t* tmp =
reinterpret_cast<uint8_t*>(__builtin_alloca(src_arr_len * size));
__builtin_memcpy(tmp, src_arr, src_arr_len * size); __builtin_memcpy(tmp, src_arr, src_arr_len * size);
src_arr = tmp; src_arr = tmp;
} }
SliceIndex src_ind = src_start; SliceIndex src_ind = src_start;
SliceIndex dest_ind = dest_start; SliceIndex dest_ind = dest_start;
for (; for (; (src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end);
(src_step > 0) ? (src_ind <= src_end) : (src_ind >= src_end); src_ind += src_step, dest_ind += dest_step) {
src_ind += src_step, dest_ind += dest_step
) {
/* for constant optimization */ /* for constant optimization */
if (size == 1) { if (size == 1) {
__builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1); __builtin_memcpy(dest_arr + dest_ind, src_arr + src_ind, 1);
@ -245,29 +217,24 @@ SliceIndex __nac3_list_slice_assign_var_size(
__builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8); __builtin_memcpy(dest_arr + dest_ind * 8, src_arr + src_ind * 8, 8);
} else { } else {
/* memcpy for var size, cannot overlap after previous alloca */ /* memcpy for var size, cannot overlap after previous alloca */
__builtin_memcpy(dest_arr + dest_ind * size, src_arr + src_ind * size, size); __builtin_memcpy(dest_arr + dest_ind * size,
src_arr + src_ind * size, size);
} }
} }
/* only dest_step == 1 can we shrink the dest list. */ /* only dest_step == 1 can we shrink the dest list. */
/* size should be ensured prior to calling this function */ /* size should be ensured prior to calling this function */
if (dest_step == 1 && dest_end >= dest_start) { if (dest_step == 1 && dest_end >= dest_start) {
__builtin_memmove( __builtin_memmove(dest_arr + dest_ind * size,
dest_arr + dest_ind * size, dest_arr + (dest_end + 1) * size,
dest_arr + (dest_end + 1) * size, (dest_arr_len - dest_end - 1) * size);
(dest_arr_len - dest_end - 1) * size
);
return dest_arr_len - (dest_end - dest_ind) - 1; return dest_arr_len - (dest_end - dest_ind) - 1;
} }
return dest_arr_len; return dest_arr_len;
} }
int32_t __nac3_isinf(double x) { int32_t __nac3_isinf(double x) { return __builtin_isinf(x); }
return __builtin_isinf(x);
}
int32_t __nac3_isnan(double x) { int32_t __nac3_isnan(double x) { return __builtin_isnan(x); }
return __builtin_isnan(x);
}
double tgamma(double arg); double tgamma(double arg);
@ -320,95 +287,71 @@ double __nac3_j0(double x) {
return j0(x); return j0(x);
} }
uint32_t __nac3_ndarray_calc_size( uint32_t __nac3_ndarray_calc_size(const uint32_t* list_data, uint32_t list_len,
const uint32_t* list_data, uint32_t begin_idx, uint32_t end_idx) {
uint32_t list_len, return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx,
uint32_t begin_idx, end_idx);
uint32_t end_idx
) {
return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx);
} }
uint64_t __nac3_ndarray_calc_size64( uint64_t __nac3_ndarray_calc_size64(const uint64_t* list_data,
const uint64_t* list_data, uint64_t list_len, uint64_t begin_idx,
uint64_t list_len, uint64_t end_idx) {
uint64_t begin_idx, return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx,
uint64_t end_idx end_idx);
) {
return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx);
} }
void __nac3_ndarray_calc_nd_indices( void __nac3_ndarray_calc_nd_indices(uint32_t index, const uint32_t* dims,
uint32_t index, uint32_t num_dims, NDIndex* idxs) {
const uint32_t* dims,
uint32_t num_dims,
NDIndex* idxs
) {
__nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs);
} }
void __nac3_ndarray_calc_nd_indices64( void __nac3_ndarray_calc_nd_indices64(uint64_t index, const uint64_t* dims,
uint64_t index, uint64_t num_dims, NDIndex* idxs) {
const uint64_t* dims,
uint64_t num_dims,
NDIndex* idxs
) {
__nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs);
} }
uint32_t __nac3_ndarray_flatten_index( uint32_t __nac3_ndarray_flatten_index(const uint32_t* dims, uint32_t num_dims,
const uint32_t* dims, const NDIndex* indices,
uint32_t num_dims, uint32_t num_indices) {
const NDIndex* indices, return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices,
uint32_t num_indices num_indices);
) {
return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices);
} }
uint64_t __nac3_ndarray_flatten_index64( uint64_t __nac3_ndarray_flatten_index64(const uint64_t* dims, uint64_t num_dims,
const uint64_t* dims, const NDIndex* indices,
uint64_t num_dims, uint64_t num_indices) {
const NDIndex* indices, return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices,
uint64_t num_indices num_indices);
) {
return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices);
} }
void __nac3_ndarray_calc_broadcast( void __nac3_ndarray_calc_broadcast(const uint32_t* lhs_dims, uint32_t lhs_ndims,
const uint32_t* lhs_dims, const uint32_t* rhs_dims, uint32_t rhs_ndims,
uint32_t lhs_ndims, uint32_t* out_dims) {
const uint32_t* rhs_dims, return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims,
uint32_t rhs_ndims, rhs_ndims, out_dims);
uint32_t* out_dims
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
} }
void __nac3_ndarray_calc_broadcast64( void __nac3_ndarray_calc_broadcast64(const uint64_t* lhs_dims,
const uint64_t* lhs_dims, uint64_t lhs_ndims,
uint64_t lhs_ndims, const uint64_t* rhs_dims,
const uint64_t* rhs_dims, uint64_t rhs_ndims, uint64_t* out_dims) {
uint64_t rhs_ndims, return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims,
uint64_t* out_dims rhs_ndims, out_dims);
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
} }
void __nac3_ndarray_calc_broadcast_idx( void __nac3_ndarray_calc_broadcast_idx(const uint32_t* src_dims,
const uint32_t* src_dims, uint32_t src_ndims,
uint32_t src_ndims, const NDIndex* in_idx,
const NDIndex* in_idx, NDIndex* out_idx) {
NDIndex* out_idx __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx,
) { out_idx);
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
} }
void __nac3_ndarray_calc_broadcast_idx64( void __nac3_ndarray_calc_broadcast_idx64(const uint64_t* src_dims,
const uint64_t* src_dims, uint64_t src_ndims,
uint64_t src_ndims, const NDIndex* in_idx,
const NDIndex* in_idx, NDIndex* out_idx) {
NDIndex* out_idx __nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx,
) { out_idx);
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
} }
} // extern "C" } // extern "C"