From dec1658e1399359464d5aa15b176b086cd34a013 Mon Sep 17 00:00:00 2001
From: lyken <lyken@m-labs.hk>
Date: Wed, 10 Jul 2024 17:05:01 +0800
Subject: [PATCH] core: irrt general numpy broadcasting

---
 nac3core/irrt/irrt_numpy_ndarray.hpp | 140 +++++++++++++++-
 nac3core/irrt/irrt_test.cpp          | 237 ++++++++++++++++++++++++++-
 nac3core/irrt/irrt_utils.hpp         |   1 +
 3 files changed, 364 insertions(+), 14 deletions(-)
diff --git a/nac3core/irrt/irrt_numpy_ndarray.hpp b/nac3core/irrt/irrt_numpy_ndarray.hpp
index 38f62754..8e1f1d50 100644
--- a/nac3core/irrt/irrt_numpy_ndarray.hpp
+++ b/nac3core/irrt/irrt_numpy_ndarray.hpp
@@ -13,6 +13,17 @@ using NDIndex = uint32_t;
 
 namespace {
     namespace ndarray_util {
+        template <typename SizeT>
+        static void set_indices_by_nth(SizeT ndims, const SizeT* shape, SizeT* indices, SizeT nth) {
+            for (int32_t i = 0; i < ndims; i++) {
+                int32_t dim_i = ndims - i - 1;
+                int32_t dim = shape[dim_i];
+
+                indices[dim_i] = nth % dim;
+                nth /= dim;
+            }
+        }
+
         // Compute the strides of an ndarray given an ndarray `shape`
         // and assuming that the ndarray is *fully C-contagious*.
         //
@@ -34,6 +45,57 @@ namespace {
             for (SizeT dim_i = 0; dim_i < ndims; dim_i++) size *= shape[dim_i];
             return size;
         }
+
+        template <typename SizeT>
+        static bool can_broadcast_shape_to(
+            const SizeT target_ndims,
+            const SizeT *target_shape,
+            const SizeT src_ndims,
+            const SizeT *src_shape
+        ) {
+            /*
+                // See https://numpy.org/doc/stable/user/basics.broadcasting.html
+
+                This function handles this example:
+                ```
+                Image  (3d array): 256 x 256 x 3
+                Scale  (1d array):             3
+                Result (3d array): 256 x 256 x 3
+                ```
+
+                Other interesting examples to consider:
+                - `can_broadcast_shape_to([3], [1, 1, 1, 1, 3]) == true`
+                - `can_broadcast_shape_to([3], [3, 1]) == false`
+                - `can_broadcast_shape_to([256, 256, 3], [256, 1, 3]) == true`
+
+                In cases when the shapes contain zero(es):
+                - `can_broadcast_shape_to([0], [1]) == true`
+                - `can_broadcast_shape_to([0], [2]) == false`
+                - `can_broadcast_shape_to([0, 4, 0, 0], [1]) == true`
+                - `can_broadcast_shape_to([0, 4, 0, 0], [1, 1, 1, 1]) == true`
+                - `can_broadcast_shape_to([0, 4, 0, 0], [1, 4, 1, 1]) == true`
+                - `can_broadcast_shape_to([4, 3], [0, 3]) == false`
+                - `can_broadcast_shape_to([4, 3], [0, 0]) == false`
+            */
+
+            // This is essentially doing the following in Python:
+            // `for target_dim, src_dim in itertools.zip_longest(target_shape[::-1], src_shape[::-1], fillvalue=1)`
+            for (SizeT i = 0; i < max(target_ndims, src_ndims); i++) {
+                SizeT target_dim_i = target_ndims - i - 1;
+                SizeT src_dim_i = src_ndims - i - 1;
+
+                bool target_dim_exists = target_dim_i >= 0;
+                bool src_dim_exists = src_dim_i >= 0;
+
+                SizeT target_dim = target_dim_exists ? target_shape[target_dim_i] : 1;
+                SizeT src_dim = src_dim_exists ? src_shape[src_dim_i] : 1;
+
+                bool ok = src_dim == 1 || target_dim == src_dim;
+                if (!ok) return false;
+            }
+
+            return true;
+        }
     }
 
     typedef uint8_t NDSliceType;
@@ -55,7 +117,7 @@ namespace {
 
     namespace ndarray_util {
         template<typename SizeT>
-        SizeT deduce_ndims_after_slicing(SizeT ndims, const SizeT num_slices, const NDSlice *slices) {
+        SizeT deduce_ndims_after_slicing(SizeT ndims, SizeT num_slices, const NDSlice *slices) {
             irrt_assert(num_slices <= ndims);
 
             SizeT final_ndims = ndims;
@@ -150,17 +212,26 @@ namespace {
             return this->size() * itemsize;
         }
 
-        void set_value_at_pelement(uint8_t* pelement, uint8_t* pvalue) {
+        void set_value_at_pelement(uint8_t* pelement, const uint8_t* pvalue) {
             __builtin_memcpy(pelement, pvalue, itemsize);
         }
 
-        uint8_t* get_pelement(SizeT *indices) {
+        uint8_t* get_pelement(const SizeT *indices) {
             uint8_t* element = data;
             for (SizeT dim_i = 0; dim_i < ndims; dim_i++)
                 element += indices[dim_i] * strides[dim_i];
             return element;
         }
 
+        uint8_t* get_nth_pelement(SizeT nth) {
+            irrt_assert(0 <= nth);
+            irrt_assert(nth < this->size());
+
+            SizeT* indices = (SizeT*) __builtin_alloca(sizeof(SizeT) * this->ndims);
+            ndarray_util::set_indices_by_nth(this->ndims, this->shape, indices, nth);
+            return get_pelement(indices);
+        }
+
         // Get pointer to the first element of this ndarray, assuming
         // `this->size() > 0`, i.e., not "degenerate" due to zeroes in `this->shape`)
         //
@@ -171,7 +242,7 @@ namespace {
         }
 
         // Is the given `indices` valid/in-bounds?
-        bool in_bounds(SizeT *indices) {
+        bool in_bounds(const SizeT *indices) {
             for (SizeT dim_i = 0; dim_i < ndims; dim_i++) {
                 bool dim_ok = indices[dim_i] < shape[dim_i];
                 if (!dim_ok) return false;
@@ -180,7 +251,7 @@ namespace {
         }
 
         // Fill the ndarray with a value
-        void fill_generic(uint8_t* pvalue) {
+        void fill_generic(const uint8_t* pvalue) {
             NDArrayIndicesIter<SizeT> iter;
             iter.ndims = this->ndims;
             iter.shape = this->shape;
@@ -199,7 +270,7 @@ namespace {
         }
 
         // https://numpy.org/doc/stable/reference/generated/numpy.eye.html
-        void set_to_eye(SizeT k, uint8_t* zero_pvalue, uint8_t* one_pvalue) {
+        void set_to_eye(SizeT k, const uint8_t* zero_pvalue, const uint8_t* one_pvalue) {
             __builtin_assume(ndims == 2);
 
             // TODO: Better implementation
@@ -275,6 +346,63 @@ namespace {
 
             irrt_assert(dst_axis == dst_ndarray->ndims); // Sanity check on the implementation
         }
+
+        // Similar to `np.broadcast_to(<ndarray>, <target_shape>)`
+        // Assumptions:
+        //   - `this` has to be fully initialized.
+        //   - `dst_ndarray->ndims` has to be set.
+        //   - `dst_ndarray->shape` has to be set, this determines the shape `this` broadcasts to.
+        //
+        // Other notes:
+        //   - `dst_ndarray->data` does not have to be set, it will be set to `this->data`.
+        //   - `dst_ndarray->itemsize` does not have to be set, it will be set to `this->data`.
+        //   - `dst_ndarray->strides` does not have to be set, it will be overwritten.
+        //
+        // Cautions:
+        //   ```
+        //   xs = np.zeros((4,))
+        //   ys = np.zero((4, 1))
+        //   ys[:] = xs # ok
+        // 
+        //   xs = np.zeros((1, 4))
+        //   ys = np.zero((4,))
+        //   ys[:] = xs # allowed
+        //   # However `np.broadcast_to(xs, (4,))` would fails, as per numpy's broadcasting rule.
+        //   # and apparently numpy will "deprecate" this? SEE https://github.com/numpy/numpy/issues/21744
+        //   # This implementation will NOT support this assignment.
+        //   ```
+        void broadcast_to(NDArray<SizeT>* dst_ndarray) {
+            dst_ndarray->data = this->data;
+            dst_ndarray->itemsize = this->itemsize;
+
+            irrt_assert(
+                ndarray_util::can_broadcast_shape_to(
+                    dst_ndarray->ndims,
+                    dst_ndarray->shape,
+                    this->ndims,
+                    this->shape
+                )
+            );
+
+            SizeT stride_product = 1;
+            for (SizeT i = 0; i < max(this->ndims, dst_ndarray->ndims); i++) {
+                SizeT this_dim_i = this->ndims - i - 1;
+                SizeT dst_dim_i = dst_ndarray->ndims - i - 1;
+
+                bool this_dim_exists = this_dim_i >= 0;
+                bool dst_dim_exists = dst_dim_i >= 0;
+
+                // TODO: Explain how this works
+                bool c1 = this_dim_exists && this->shape[this_dim_i] == 1;
+                bool c2 = dst_dim_exists && dst_ndarray->shape[dst_dim_i] != 1;
+                if (!this_dim_exists || (c1 && c2)) {
+                    dst_ndarray->strides[dst_dim_i] = 0; // Freeze it in-place
+                } else {
+                    dst_ndarray->strides[dst_dim_i] = stride_product * this->itemsize;
+                    stride_product *= this->shape[this_dim_i]; // NOTE: this_dim_exist must be true here.
+                }
+            }
+        }
     };
 }
 
diff --git a/nac3core/irrt/irrt_test.cpp b/nac3core/irrt/irrt_test.cpp
index 1dda0b0d..f6e67ff3 100644
--- a/nac3core/irrt/irrt_test.cpp
+++ b/nac3core/irrt/irrt_test.cpp
@@ -33,10 +33,11 @@ void debug_print_array(const char* format, int len, T* as) {
 template <typename T>
 void assert_arrays_match(const char* label, const char* format, int len, T* expected, T* got) {
     if (!arrays_match(len, expected, got)) {
-        printf("expected %s: ", label);
+        printf(">>>>>>> %s\n", label);
+        printf("    Expecting = ");
         debug_print_array(format, len, expected);
         printf("\n");
-        printf("got %s: ", label);
+        printf("          Got = ");
         debug_print_array(format, len, got);
         printf("\n");
         test_fail();
@@ -46,22 +47,89 @@ void assert_arrays_match(const char* label, const char* format, int len, T* expe
 template <typename T>
 void assert_values_match(const char* label, const char* format, T expected, T got) {
     if (expected != got) {
-        printf("expected %s: ", label);
+        printf(">>>>>>> %s\n", label);
+        printf("    Expecting = ");
         printf(format, expected);
         printf("\n");
-        printf("got %s: ", label);
+        printf("          Got = ");
         printf(format, got);
         printf("\n");
         test_fail();
     }
 }
 
+void print_repeated(const char *str, int count) {
+    for (int i = 0; i < count; i++) {
+        printf("%s", str);
+    }
+}
+
+template<typename SizeT, typename ElementT>
+void __print_ndarray_aux(const char *format, bool first, bool last, SizeT* cursor, SizeT depth, NDArray<SizeT>* ndarray) {
+    // A really lazy recursive implementation
+    
+    // Add left padding unless its the first entry (since there would be "[[[" before it)
+    if (!first) {
+        print_repeated(" ", depth);
+    }
+
+    const SizeT dim = ndarray->shape[depth];
+    if (depth + 1 == ndarray->ndims) {
+        // Recursed down to last dimension, print the values in a nice list
+        printf("[");
+
+        SizeT* indices = (SizeT*) __builtin_alloca(sizeof(SizeT) * ndarray->ndims);
+        for (SizeT i = 0; i < dim; i++) {
+            ndarray_util::set_indices_by_nth(ndarray->ndims, ndarray->shape, indices, *cursor);
+            ElementT* pelement = (ElementT*) ndarray->get_pelement(indices);
+            ElementT element = *pelement;
+
+            if (i != 0) printf(", "); // List delimiter
+            printf(format, element);
+            printf("(@");
+            debug_print_array("%d", ndarray->ndims, indices);
+            printf(")");
+
+            (*cursor)++;
+        }
+        printf("]");
+    } else {
+        printf("[");
+        for (SizeT i = 0; i < ndarray->shape[depth]; i++) {
+            __print_ndarray_aux<SizeT, ElementT>(
+                format,
+                i == 0, // first?
+                i + 1 == dim, // last?
+                cursor,
+                depth + 1,
+                ndarray
+            );
+        }
+        printf("]");
+    }
+
+    // Add newline unless its the last entry (since there will be "]]]" after it)
+    if (!last) {
+        print_repeated("\n", depth);
+    }
+}
+
+template<typename SizeT, typename ElementT>
+void print_ndarray(const char *format, NDArray<SizeT>* ndarray) {
+    if (ndarray->ndims == 0) {
+        printf("<empty ndarray>");
+    } else {
+        SizeT cursor = 0;
+        __print_ndarray_aux<SizeT, ElementT>(format, true, true, &cursor, 0, ndarray);
+    }
+    printf("\n");
+}
+
 void test_calc_size_from_shape_normal() {
     // Test shapes with normal values
     BEGIN_TEST();
 
     int32_t shape[4] = { 2, 3, 5, 7 };
-    debug_print_array("%d", 4, shape);
     assert_values_match("size", "%d", 210, ndarray_util::calc_size_from_shape<int32_t>(4, shape));
 }
 
@@ -267,9 +335,6 @@ void test_ndslice_1() {
         assert dst_ndarray[0, 1] == 7.0
         assert dst_ndarray[1, 0] == 9.0
         assert dst_ndarray[1, 1] == 11.0
-
-        dst_ndarray[1, 0] == 99 # Write to `dst_ndarray`
-        assert ndarray[1, 3] == 99 # `ndarray` also updates!!
         ```
     */
     BEGIN_TEST();
@@ -410,6 +475,160 @@ void test_ndslice_2() {
     assert_values_match("dst_ndarray[1]", "%f", 9.0, *((double *) dst_ndarray.get_pelement((int32_t[dst_ndims]) { 1 })));
 }
 
+void test_can_broadcast_shape() {
+    BEGIN_TEST();
+
+    assert_values_match(
+        "can_broadcast_shape_to([3], [1, 1, 1, 1, 3]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 3 }, 5, (int32_t[]) { 1, 1, 1, 1, 3 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([3], [3, 1]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 3 }, 2, (int32_t[]) { 3, 1 }));
+    assert_values_match(
+        "can_broadcast_shape_to([3], [3]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 3 }, 1, (int32_t[]) { 3 }));
+    assert_values_match(
+        "can_broadcast_shape_to([1], [3]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 1 }, 1, (int32_t[]) { 3 }));
+    assert_values_match(
+        "can_broadcast_shape_to([1], [1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 1 }, 1, (int32_t[]) { 1 }));
+    assert_values_match(
+        "can_broadcast_shape_to([256, 256, 3], [256, 1, 3]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(3, (int32_t[]) { 256, 256, 3 }, 3, (int32_t[]) { 256, 1, 3 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([256, 256, 3], [3]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(3, (int32_t[]) { 256, 256, 3 }, 1, (int32_t[]) { 3 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([256, 256, 3], [2]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(3, (int32_t[]) { 256, 256, 3 }, 1, (int32_t[]) { 2 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([256, 256, 3], [1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(3, (int32_t[]) { 256, 256, 3 }, 1, (int32_t[]) { 1 })
+    );
+
+    // In cases when the shapes contain zero(es)
+    assert_values_match(
+        "can_broadcast_shape_to([0], [1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 0 }, 1, (int32_t[]) { 1 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([0], [2]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(1, (int32_t[]) { 0 }, 1, (int32_t[]) { 2 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([0, 4, 0, 0], [1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(4, (int32_t[]) { 0, 4, 0, 0 }, 1, (int32_t[]) { 1 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([0, 4, 0, 0], [1, 1, 1, 1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(4, (int32_t[]) { 0, 4, 0, 0 }, 4, (int32_t[]) { 1, 1, 1, 1 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([0, 4, 0, 0], [1, 4, 1, 1]) == true",
+        "%d",
+        true,
+        ndarray_util::can_broadcast_shape_to(4, (int32_t[]) { 0, 4, 0, 0 }, 4, (int32_t[]) { 1, 4, 1, 1 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([4, 3], [0, 3]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(2, (int32_t[]) { 4, 3 }, 2, (int32_t[]) { 0, 3 })
+    );
+    assert_values_match(
+        "can_broadcast_shape_to([4, 3], [0, 0]) == false",
+        "%d",
+        false,
+        ndarray_util::can_broadcast_shape_to(2, (int32_t[]) { 4, 3 }, 2, (int32_t[]) { 0, 0 })
+    );
+}
+
+void test_ndarray_broadcast_1() {
+    /*
+    # array = np.array([[19.9, 29.9, 39.9, 49.9]], dtype=np.float64)
+    # >>> [[19.9 29.9 39.9 49.9]]
+    #
+    # array = np.broadcast_to(array, (2, 3, 4))
+    # >>> [[[19.9 29.9 39.9 49.9]
+    # >>>   [19.9 29.9 39.9 49.9]
+    # >>>   [19.9 29.9 39.9 49.9]]
+    # >>>  [[19.9 29.9 39.9 49.9]
+    # >>>   [19.9 29.9 39.9 49.9]
+    # >>>   [19.9 29.9 39.9 49.9]]]
+    #
+    # assery array.strides == (0, 0, 8)
+
+    */
+    BEGIN_TEST();
+
+    double in_data[4] = { 19.9, 29.9, 39.9, 49.9 };
+    const int32_t in_ndims = 2;
+    int32_t in_shape[in_ndims] = {1, 4};
+    int32_t in_strides[in_ndims] = {};
+    NDArray<int32_t> ndarray = {
+        .data = (uint8_t*) in_data,
+        .itemsize = sizeof(double),
+        .ndims = in_ndims,
+        .shape = in_shape,
+        .strides = in_strides
+    };
+    ndarray.set_strides_by_shape();
+
+    const int32_t dst_ndims = 3;
+    int32_t dst_shape[dst_ndims] = {2, 3, 4};
+    int32_t dst_strides[dst_ndims] = {};
+    NDArray<int32_t> dst_ndarray = {
+        .ndims = dst_ndims,
+        .shape = dst_shape,
+        .strides = dst_strides
+    };
+
+    ndarray.broadcast_to(&dst_ndarray);
+
+    assert_arrays_match("dst_ndarray->strides", "%d", dst_ndims, (int32_t[]) { 0, 0, 8 }, dst_ndarray.strides);
+
+    assert_values_match("dst_ndarray[0, 0, 0]", "%f", 19.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 0, 0})));
+    assert_values_match("dst_ndarray[0, 0, 1]", "%f", 29.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 0, 1})));
+    assert_values_match("dst_ndarray[0, 0, 2]", "%f", 39.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 0, 2})));
+    assert_values_match("dst_ndarray[0, 0, 3]", "%f", 49.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 0, 3})));
+    assert_values_match("dst_ndarray[0, 1, 0]", "%f", 19.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 1, 0})));
+    assert_values_match("dst_ndarray[0, 1, 1]", "%f", 29.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 1, 1})));
+    assert_values_match("dst_ndarray[0, 1, 2]", "%f", 39.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 1, 2})));
+    assert_values_match("dst_ndarray[0, 1, 3]", "%f", 49.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {0, 1, 3})));
+    assert_values_match("dst_ndarray[1, 2, 3]", "%f", 49.9, *((double*) dst_ndarray.get_pelement((int32_t[]) {1, 2, 3})));
+}
+
 int main() {
     test_calc_size_from_shape_normal();
     test_calc_size_from_shape_has_zero();
@@ -423,5 +642,7 @@ int main() {
     test_slice_4();
     test_ndslice_1();
     test_ndslice_2();
+    test_can_broadcast_shape();
+    test_ndarray_broadcast_1();
     return 0;
 }
\ No newline at end of file
diff --git a/nac3core/irrt/irrt_utils.hpp b/nac3core/irrt/irrt_utils.hpp
index 033b995e..8d69b6a1 100644
--- a/nac3core/irrt/irrt_utils.hpp
+++ b/nac3core/irrt/irrt_utils.hpp
@@ -30,6 +30,7 @@ namespace {
         *death = 0; // TODO: address 0 on hardware might be writable?
     }
 
+    // TODO: Make this a macro and allow it to be toggled on/off (e.g., debug vs release)
     void irrt_assert(bool condition) {
         if (!condition) irrt_panic();
     }