1
0
forked from M-Labs/nac3

core: Reformat and modernize irrt.cpp

- Use anon namespace instead of static
- Use using declaration instead of typedef
- Align pointers to the type instead of the identifier
This commit is contained in:
David Mak 2024-07-09 13:52:18 +08:00
parent bc40a32524
commit 1c56005a01

View File

@ -1,29 +1,30 @@
typedef _BitInt(8) int8_t; using int8_t = _BitInt(8);
typedef unsigned _BitInt(8) uint8_t; using uint8_t = unsigned _BitInt(8);
typedef _BitInt(32) int32_t; using int32_t = _BitInt(32);
typedef unsigned _BitInt(32) uint32_t; using uint32_t = unsigned _BitInt(32);
typedef _BitInt(64) int64_t; using int64_t = _BitInt(64);
typedef unsigned _BitInt(64) uint64_t; using uint64_t = unsigned _BitInt(64);
// NDArray indices are always `uint32_t`. // NDArray indices are always `uint32_t`.
typedef uint32_t NDIndex; using NDIndex = uint32_t;
// The type of an index or a value describing the length of a range/slice is always `int32_t`. // The type of an index or a value describing the length of a range/slice is always `int32_t`.
typedef int32_t SliceIndex; using SliceIndex = int32_t;
namespace {
template <typename T> template <typename T>
static T max(T a, T b) { const T& max(const T& a, const T& b) {
return a > b ? a : b; return a > b ? a : b;
} }
template <typename T> template <typename T>
static T min(T a, T b) { const T& min(const T& a, const T& b) {
return a > b ? b : a; return a > b ? b : a;
} }
// adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c // adapted from GNU Scientific Library: https://git.savannah.gnu.org/cgit/gsl.git/tree/sys/pow_int.c
// need to make sure `exp >= 0` before calling this function // need to make sure `exp >= 0` before calling this function
template <typename T> template <typename T>
static T __nac3_int_exp_impl(T base, T exp) { T __nac3_int_exp_impl(T base, T exp) {
T res = 1; T res = 1;
/* repeated squaring method */ /* repeated squaring method */
do { do {
@ -37,8 +38,8 @@ static T __nac3_int_exp_impl(T base, T exp) {
} }
template <typename SizeT> template <typename SizeT>
static SizeT __nac3_ndarray_calc_size_impl( SizeT __nac3_ndarray_calc_size_impl(
const SizeT *list_data, const SizeT* list_data,
SizeT list_len, SizeT list_len,
SizeT begin_idx, SizeT begin_idx,
SizeT end_idx SizeT end_idx
@ -55,11 +56,11 @@ static SizeT __nac3_ndarray_calc_size_impl(
} }
template <typename SizeT> template <typename SizeT>
static void __nac3_ndarray_calc_nd_indices_impl( void __nac3_ndarray_calc_nd_indices_impl(
SizeT index, SizeT index,
const SizeT *dims, const SizeT* dims,
SizeT num_dims, SizeT num_dims,
NDIndex *idxs NDIndex* idxs
) { ) {
SizeT stride = 1; SizeT stride = 1;
for (SizeT dim = 0; dim < num_dims; dim++) { for (SizeT dim = 0; dim < num_dims; dim++) {
@ -71,10 +72,10 @@ static void __nac3_ndarray_calc_nd_indices_impl(
} }
template <typename SizeT> template <typename SizeT>
static SizeT __nac3_ndarray_flatten_index_impl( SizeT __nac3_ndarray_flatten_index_impl(
const SizeT *dims, const SizeT* dims,
SizeT num_dims, SizeT num_dims,
const NDIndex *indices, const NDIndex* indices,
SizeT num_indices SizeT num_indices
) { ) {
SizeT idx = 0; SizeT idx = 0;
@ -92,19 +93,19 @@ static SizeT __nac3_ndarray_flatten_index_impl(
} }
template <typename SizeT> template <typename SizeT>
static void __nac3_ndarray_calc_broadcast_impl( void __nac3_ndarray_calc_broadcast_impl(
const SizeT *lhs_dims, const SizeT* lhs_dims,
SizeT lhs_ndims, SizeT lhs_ndims,
const SizeT *rhs_dims, const SizeT* rhs_dims,
SizeT rhs_ndims, SizeT rhs_ndims,
SizeT *out_dims SizeT* out_dims
) { ) {
SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims; SizeT max_ndims = lhs_ndims > rhs_ndims ? lhs_ndims : rhs_ndims;
for (SizeT i = 0; i < max_ndims; ++i) { for (SizeT i = 0; i < max_ndims; ++i) {
const SizeT *lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr; const SizeT* lhs_dim_sz = i < lhs_ndims ? &lhs_dims[lhs_ndims - i - 1] : nullptr;
const SizeT *rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr; const SizeT* rhs_dim_sz = i < rhs_ndims ? &rhs_dims[rhs_ndims - i - 1] : nullptr;
SizeT *out_dim = &out_dims[max_ndims - i - 1]; SizeT* out_dim = &out_dims[max_ndims - i - 1];
if (lhs_dim_sz == nullptr) { if (lhs_dim_sz == nullptr) {
*out_dim = *rhs_dim_sz; *out_dim = *rhs_dim_sz;
@ -123,30 +124,31 @@ static void __nac3_ndarray_calc_broadcast_impl(
} }
template <typename SizeT> template <typename SizeT>
static void __nac3_ndarray_calc_broadcast_idx_impl( void __nac3_ndarray_calc_broadcast_idx_impl(
const SizeT *src_dims, const SizeT* src_dims,
SizeT src_ndims, SizeT src_ndims,
const NDIndex *in_idx, const NDIndex* in_idx,
NDIndex *out_idx NDIndex* out_idx
) { ) {
for (SizeT i = 0; i < src_ndims; ++i) { for (SizeT i = 0; i < src_ndims; ++i) {
SizeT src_i = src_ndims - i - 1; SizeT src_i = src_ndims - i - 1;
out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i]; out_idx[src_i] = src_dims[src_i] == 1 ? 0 : in_idx[src_i];
} }
} }
} // namespace
extern "C" { extern "C" {
#define DEF_nac3_int_exp_(T) \ #define DEF_nac3_int_exp_(T) \
T __nac3_int_exp_##T(T base, T exp) {\ T __nac3_int_exp_##T(T base, T exp) {\
return __nac3_int_exp_impl(base, exp);\ return __nac3_int_exp_impl(base, exp);\
} }
DEF_nac3_int_exp_(int32_t) DEF_nac3_int_exp_(int32_t)
DEF_nac3_int_exp_(int64_t) DEF_nac3_int_exp_(int64_t)
DEF_nac3_int_exp_(uint32_t) DEF_nac3_int_exp_(uint32_t)
DEF_nac3_int_exp_(uint64_t) DEF_nac3_int_exp_(uint64_t)
SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) { SliceIndex __nac3_slice_index_bound(SliceIndex i, const SliceIndex len) {
if (i < 0) { if (i < 0) {
i = len + i; i = len + i;
} }
@ -156,13 +158,13 @@ extern "C" {
return len; return len;
} }
return i; return i;
} }
SliceIndex __nac3_range_slice_len( SliceIndex __nac3_range_slice_len(
const SliceIndex start, const SliceIndex start,
const SliceIndex end, const SliceIndex end,
const SliceIndex step const SliceIndex step
) { ) {
SliceIndex diff = end - start; SliceIndex diff = end - start;
if (diff > 0 && step > 0) { if (diff > 0 && step > 0) {
return ((diff - 1) / step) + 1; return ((diff - 1) / step) + 1;
@ -171,27 +173,27 @@ extern "C" {
} else { } else {
return 0; return 0;
} }
} }
// Handle list assignment and dropping part of the list when // Handle list assignment and dropping part of the list when
// both dest_step and src_step are +1. // both dest_step and src_step are +1.
// - All the index must *not* be out-of-bound or negative, // - All the index must *not* be out-of-bound or negative,
// - The end index is *inclusive*, // - The end index is *inclusive*,
// - The length of src and dest slice size should already // - The length of src and dest slice size should already
// be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest) // be checked: if dest.step == 1 then len(src) <= len(dest) else len(src) == len(dest)
SliceIndex __nac3_list_slice_assign_var_size( SliceIndex __nac3_list_slice_assign_var_size(
SliceIndex dest_start, SliceIndex dest_start,
SliceIndex dest_end, SliceIndex dest_end,
SliceIndex dest_step, SliceIndex dest_step,
uint8_t *dest_arr, uint8_t* dest_arr,
SliceIndex dest_arr_len, SliceIndex dest_arr_len,
SliceIndex src_start, SliceIndex src_start,
SliceIndex src_end, SliceIndex src_end,
SliceIndex src_step, SliceIndex src_step,
uint8_t *src_arr, uint8_t* src_arr,
SliceIndex src_arr_len, SliceIndex src_arr_len,
const SliceIndex size const SliceIndex size
) { ) {
/* if dest_arr_len == 0, do nothing since we do not support extending list */ /* if dest_arr_len == 0, do nothing since we do not support extending list */
if (dest_arr_len == 0) return dest_arr_len; if (dest_arr_len == 0) return dest_arr_len;
/* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */ /* if both step is 1, memmove directly, handle the dropping of the list, and shrink size */
@ -224,7 +226,7 @@ extern "C" {
|| max(src_start, src_end) < min(dest_start, dest_end) || max(src_start, src_end) < min(dest_start, dest_end)
); );
if (need_alloca) { if (need_alloca) {
uint8_t *tmp = reinterpret_cast<uint8_t *>(__builtin_alloca(src_arr_len * size)); uint8_t* tmp = reinterpret_cast<uint8_t *>(__builtin_alloca(src_arr_len * size));
__builtin_memcpy(tmp, src_arr, src_arr_len * size); __builtin_memcpy(tmp, src_arr, src_arr_len * size);
src_arr = tmp; src_arr = tmp;
} }
@ -257,19 +259,19 @@ extern "C" {
return dest_arr_len - (dest_end - dest_ind) - 1; return dest_arr_len - (dest_end - dest_ind) - 1;
} }
return dest_arr_len; return dest_arr_len;
} }
int32_t __nac3_isinf(double x) { int32_t __nac3_isinf(double x) {
return __builtin_isinf(x); return __builtin_isinf(x);
} }
int32_t __nac3_isnan(double x) { int32_t __nac3_isnan(double x) {
return __builtin_isnan(x); return __builtin_isnan(x);
} }
double tgamma(double arg); double tgamma(double arg);
double __nac3_gamma(double z) { double __nac3_gamma(double z) {
// Handling for denormals // Handling for denormals
// | x | Python gamma(x) | C tgamma(x) | // | x | Python gamma(x) | C tgamma(x) |
// --- | ----------------- | --------------- | ----------- | // --- | ----------------- | --------------- | ----------- |
@ -288,11 +290,11 @@ extern "C" {
// (4)-(5) // (4)-(5)
return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v; return __builtin_isinf(v) || __builtin_isnan(v) ? __builtin_inf() : v;
} }
double lgamma(double arg); double lgamma(double arg);
double __nac3_gammaln(double x) { double __nac3_gammaln(double x) {
// libm's handling of value overflows differs from scipy: // libm's handling of value overflows differs from scipy:
// - scipy: gammaln(-inf) -> -inf // - scipy: gammaln(-inf) -> -inf
// - libm : lgamma(-inf) -> inf // - libm : lgamma(-inf) -> inf
@ -302,11 +304,11 @@ extern "C" {
} }
return lgamma(x); return lgamma(x);
} }
double j0(double x); double j0(double x);
double __nac3_j0(double x) { double __nac3_j0(double x) {
// libm's handling of value overflows differs from scipy: // libm's handling of value overflows differs from scipy:
// - scipy: j0(inf) -> nan // - scipy: j0(inf) -> nan
// - libm : j0(inf) -> 0.0 // - libm : j0(inf) -> 0.0
@ -316,97 +318,97 @@ extern "C" {
} }
return j0(x); return j0(x);
} }
uint32_t __nac3_ndarray_calc_size( uint32_t __nac3_ndarray_calc_size(
const uint32_t *list_data, const uint32_t* list_data,
uint32_t list_len, uint32_t list_len,
uint32_t begin_idx, uint32_t begin_idx,
uint32_t end_idx uint32_t end_idx
) { ) {
return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx);
} }
uint64_t __nac3_ndarray_calc_size64( uint64_t __nac3_ndarray_calc_size64(
const uint64_t *list_data, const uint64_t* list_data,
uint64_t list_len, uint64_t list_len,
uint64_t begin_idx, uint64_t begin_idx,
uint64_t end_idx uint64_t end_idx
) { ) {
return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx); return __nac3_ndarray_calc_size_impl(list_data, list_len, begin_idx, end_idx);
} }
void __nac3_ndarray_calc_nd_indices( void __nac3_ndarray_calc_nd_indices(
uint32_t index, uint32_t index,
const uint32_t* dims, const uint32_t* dims,
uint32_t num_dims, uint32_t num_dims,
NDIndex* idxs NDIndex* idxs
) { ) {
__nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs);
} }
void __nac3_ndarray_calc_nd_indices64( void __nac3_ndarray_calc_nd_indices64(
uint64_t index, uint64_t index,
const uint64_t* dims, const uint64_t* dims,
uint64_t num_dims, uint64_t num_dims,
NDIndex* idxs NDIndex* idxs
) { ) {
__nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs); __nac3_ndarray_calc_nd_indices_impl(index, dims, num_dims, idxs);
} }
uint32_t __nac3_ndarray_flatten_index( uint32_t __nac3_ndarray_flatten_index(
const uint32_t* dims, const uint32_t* dims,
uint32_t num_dims, uint32_t num_dims,
const NDIndex* indices, const NDIndex* indices,
uint32_t num_indices uint32_t num_indices
) { ) {
return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices);
} }
uint64_t __nac3_ndarray_flatten_index64( uint64_t __nac3_ndarray_flatten_index64(
const uint64_t* dims, const uint64_t* dims,
uint64_t num_dims, uint64_t num_dims,
const NDIndex* indices, const NDIndex* indices,
uint64_t num_indices uint64_t num_indices
) { ) {
return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices); return __nac3_ndarray_flatten_index_impl(dims, num_dims, indices, num_indices);
}
void __nac3_ndarray_calc_broadcast(
const uint32_t *lhs_dims,
uint32_t lhs_ndims,
const uint32_t *rhs_dims,
uint32_t rhs_ndims,
uint32_t *out_dims
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
}
void __nac3_ndarray_calc_broadcast64(
const uint64_t *lhs_dims,
uint64_t lhs_ndims,
const uint64_t *rhs_dims,
uint64_t rhs_ndims,
uint64_t *out_dims
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
}
void __nac3_ndarray_calc_broadcast_idx(
const uint32_t *src_dims,
uint32_t src_ndims,
const NDIndex *in_idx,
NDIndex *out_idx
) {
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
}
void __nac3_ndarray_calc_broadcast_idx64(
const uint64_t *src_dims,
uint64_t src_ndims,
const NDIndex *in_idx,
NDIndex *out_idx
) {
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
}
} }
void __nac3_ndarray_calc_broadcast(
const uint32_t* lhs_dims,
uint32_t lhs_ndims,
const uint32_t* rhs_dims,
uint32_t rhs_ndims,
uint32_t* out_dims
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
}
void __nac3_ndarray_calc_broadcast64(
const uint64_t* lhs_dims,
uint64_t lhs_ndims,
const uint64_t* rhs_dims,
uint64_t rhs_ndims,
uint64_t* out_dims
) {
return __nac3_ndarray_calc_broadcast_impl(lhs_dims, lhs_ndims, rhs_dims, rhs_ndims, out_dims);
}
void __nac3_ndarray_calc_broadcast_idx(
const uint32_t* src_dims,
uint32_t src_ndims,
const NDIndex* in_idx,
NDIndex* out_idx
) {
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
}
void __nac3_ndarray_calc_broadcast_idx64(
const uint64_t* src_dims,
uint64_t src_ndims,
const NDIndex* in_idx,
NDIndex* out_idx
) {
__nac3_ndarray_calc_broadcast_idx_impl(src_dims, src_ndims, in_idx, out_idx);
}
} // extern "C"