compiler: Implement binary NumPy math functions (arctan2, …)

The bulk of the diff is just factoring out the implementation for binary arithmetic implementations, to be reused for binary function calls.
2020-11-10 22:24:04 +01:00 · 2020-11-10 22:24:04 +01:00 · d95e619567
commit d95e619567
parent fcf4763ae7
7 changed files with 211 additions and 135 deletions
--- a/artiq/compiler/math_fns.py
+++ b/artiq/compiler/math_fns.py
@ -53,23 +53,36 @@ unary_fp_runtime_calls = [
    ("cbrt", "cbrt"),
 ]
 #: (float, float) -> float numpy.* math functions lowered to runtime calls.
 binary_fp_runtime_calls = [
    ("arctan2", "atan2"),
    ("copysign", "copysign"),
    ("fmax", "fmax"),
    ("fmin", "fmin"),
    # ("ldexp", "ldexp"),  # One argument is an int; would need a bit more plumbing.
    ("hypot", "hypot"),
    ("nextafter", "nextafter"),
 ]
 #: Array handling builtins (special treatment due to allocations).
 numpy_builtins = ["transpose"]
-def unary_fp_type(name):
+def fp_runtime_type(name, arity):
-    return types.TExternalFunction(OrderedDict([("arg", builtins.TFloat())]),
+    args = [("arg{}".format(i), builtins.TFloat()) for i in range(arity)]
    return types.TExternalFunction(OrderedDict(args),
                                   builtins.TFloat(),
                                   name,
                                   # errno isn't observable from ARTIQ Python.
                                   flags={"nounwind", "nowrite"},
                                   broadcast_across_arrays=True)
 numpy_map = {
-    getattr(numpy, symbol): unary_fp_type(mangle)
+    getattr(numpy, symbol): fp_runtime_type(mangle, arity=1)
    for symbol, mangle in (unary_fp_intrinsics + unary_fp_runtime_calls)
 }
 for symbol, mangle in binary_fp_runtime_calls:
    numpy_map[getattr(numpy, symbol)] = fp_runtime_type(mangle, arity=2)
 for name in numpy_builtins:
    numpy_map[getattr(numpy, name)] = types.TBuiltinFunction("numpy." + name)
--- a/artiq/compiler/transforms/artiq_ir_generator.py
+++ b/artiq/compiler/transforms/artiq_ir_generator.py
@ -1502,6 +1502,48 @@ class ARTIQIRGenerator(algorithm.Visitor):
            self.final_branch = old_final_branch
            self.unwind_target = old_unwind
    def _make_array_elementwise_binop(self, name, result_type, lhs_type,
                                      rhs_type, make_op):
        def body_gen(result, lhs, rhs):
            # At this point, shapes are assumed to match; could just pass buffer
            # pointer for two of the three arrays as well.
            result_buffer = self.append(ir.GetAttr(result, "buffer"))
            shape = self.append(ir.GetAttr(result, "shape"))
            num_total_elts = self._get_total_array_len(shape)
            if builtins.is_array(lhs.type):
                lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
                def get_left(index):
                    return self.append(ir.GetElem(lhs_buffer, index))
            else:
                def get_left(index):
                    return lhs
            if builtins.is_array(rhs.type):
                rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
                def get_right(index):
                    return self.append(ir.GetElem(rhs_buffer, index))
            else:
                def get_right(index):
                    return rhs
            def loop_gen(index):
                l = get_left(index)
                r = get_right(index)
                result = make_op(l, r)
                self.append(ir.SetElem(result_buffer, index, result))
                return self.append(
                    ir.Arith(ast.Add(loc=None), index,
                             ir.Constant(1, self._size_type)))
            self._make_loop(
                ir.Constant(0, self._size_type), lambda index: self.append(
                    ir.Compare(ast.Lt(loc=None), index, num_total_elts)),
                loop_gen)
        return self._make_array_binop(name, result_type, lhs_type, rhs_type,
                                      body_gen)
    def _mangle_arrayop_types(self, types):
        def name_error(typ):
            assert False, "Internal compiler error: No RPC tag for {}".format(typ)
@ -1517,53 +1559,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
        return "_".join(mangle_name(t) for t in types)
-    def _get_array_binop(self, op, result_type, lhs_type, rhs_type):
+    def _get_array_elementwise_binop(self, name, make_op, result_type, lhs_type, rhs_type):
        # Currently, we always have any type coercions resolved explicitly in the AST.
        # In the future, this might no longer be true and the three types might all
        # differ.
        name = "_array_{}_{}".format(
-            type(op).__name__,
+            name,
            self._mangle_arrayop_types([result_type, lhs_type, rhs_type]))
        if name not in self.array_op_funcs:
-
+            self.array_op_funcs[name] = self._make_array_elementwise_binop(
-            def body_gen(result, lhs, rhs):
+                name, result_type, lhs_type, rhs_type, make_op)
                # At this point, shapes are assumed to match; could just pass buffer
                # pointer for two of the three arrays as well.
                result_buffer = self.append(ir.GetAttr(result, "buffer"))
                shape = self.append(ir.GetAttr(result, "shape"))
                num_total_elts = self._get_total_array_len(shape)
                if builtins.is_array(lhs.type):
                    lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
                    def get_left(index):
                        return self.append(ir.GetElem(lhs_buffer, index))
                else:
                    def get_left(index):
                        return lhs
                if builtins.is_array(rhs.type):
                    rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
                    def get_right(index):
                        return self.append(ir.GetElem(rhs_buffer, index))
                else:
                    def get_right(index):
                        return rhs
                def loop_gen(index):
                    l = get_left(index)
                    r = get_right(index)
                    result = self.append(ir.Arith(op, l, r))
                    self.append(ir.SetElem(result_buffer, index, result))
                    return self.append(
                        ir.Arith(ast.Add(loc=None), index,
                                 ir.Constant(1, self._size_type)))
                self._make_loop(
                    ir.Constant(0, self._size_type), lambda index: self.append(
                        ir.Compare(ast.Lt(loc=None), index, num_total_elts)), loop_gen)
            self.array_op_funcs[name] = self._make_array_binop(
                name, result_type, lhs_type, rhs_type, body_gen)
        return self.array_op_funcs[name]
    def _invoke_arrayop(self, func, params):
@ -1721,6 +1726,34 @@ class ARTIQIRGenerator(algorithm.Visitor):
            return self.append(ir.Alloc([result_buffer, shape], node.type))
        return self.append(ir.GetElem(result_buffer, ir.Constant(0, self._size_type)))
    def _broadcast_binop(self, name, make_op, result_type, lhs, rhs):
        # Broadcast scalars (broadcasting higher dimensions is not yet allowed in the
        # language).
        broadcast = False
        array_arg = lhs
        if not builtins.is_array(lhs.type):
            broadcast = True
            array_arg = rhs
        elif not builtins.is_array(rhs.type):
            broadcast = True
        shape = self.append(ir.GetAttr(array_arg, "shape"))
        if not broadcast:
            rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
            self._make_check(
                self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
                lambda: self.alloc_exn(
                    builtins.TException("ValueError"),
                    ir.Constant("operands could not be broadcast together",
                                builtins.TStr())))
        elt = result_type.find()["elt"]
        result, _ = self._allocate_new_array(elt, shape)
        func = self._get_array_elementwise_binop(name, make_op, result_type, lhs.type,
            rhs.type)
        self._invoke_arrayop(func, [result, lhs, rhs])
        return result
    def visit_BinOpT(self, node):
        if isinstance(node.op, ast.MatMult):
@ -1728,31 +1761,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
        elif builtins.is_array(node.type):
            lhs = self.visit(node.left)
            rhs = self.visit(node.right)
-
+            name = type(node.op).__name__
-            # Broadcast scalars.
+            def make_op(l, r):
-            broadcast = False
+                return self.append(ir.Arith(node.op, l, r))
-            array_arg = lhs
+            return self._broadcast_binop(name, make_op, node.type, lhs, rhs)
            if not builtins.is_array(lhs.type):
                broadcast = True
                array_arg = rhs
            elif not builtins.is_array(rhs.type):
                broadcast = True
            shape = self.append(ir.GetAttr(array_arg, "shape"))
            if not broadcast:
                rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
                self._make_check(
                    self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
                    lambda: self.alloc_exn(
                        builtins.TException("ValueError"),
                        ir.Constant("operands could not be broadcast together",
                                    builtins.TStr())))
            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
            func = self._get_array_binop(node.op, node.type, lhs.type, rhs.type)
            self._invoke_arrayop(func, [result, lhs, rhs])
            return result
        elif builtins.is_numeric(node.type):
            lhs = self.visit(node.left)
            rhs = self.visit(node.right)
@ -2427,25 +2439,27 @@ class ARTIQIRGenerator(algorithm.Visitor):
        if types.is_builtin(node.func.type):
            insn = self.visit_builtin_call(node)
        elif (types.is_broadcast_across_arrays(node.func.type) and len(args) >= 1
-              and builtins.is_array(args[0].type)):
+              and any(builtins.is_array(arg.type) for arg in args)):
            # The iodelay machinery set up in the surrounding code was
            # deprecated/a relic from the past when array broadcasting support
            # was added, so no attempt to keep the delay tracking intact is
            # made.
-            assert len(args) == 1, "Broadcasting for multiple arguments not implemented"
+            def make_call(*args):
-
+                return self._user_call(ir.Constant(None, callee.type), args, {},
            def make_call(val):
                return self._user_call(ir.Constant(None, callee.type), [val], {},
                                       node.arg_exprs)
            shape = self.append(ir.GetAttr(args[0], "shape"))
            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
            # TODO: Generate more generically if non-externals are allowed.
            name = node.func.type.find().name
-            func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
+
-            self._invoke_arrayop(func, [result, args[0]])
+            if len(args) == 1:
-            insn = result
+                shape = self.append(ir.GetAttr(args[0], "shape"))
                result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
                func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
                self._invoke_arrayop(func, [result, args[0]])
                insn = result
            elif len(args) == 2:
                insn = self._broadcast_binop(name, make_call, node.type, *args)
            else:
                assert False, "Broadcasting for {} arguments not implemented".format(len)
        else:
            insn = self._user_call(callee, args, keywords, node.arg_exprs)
            if isinstance(node.func, asttyped.AttributeT):
--- a/artiq/compiler/transforms/inferencer.py
+++ b/artiq/compiler/transforms/inferencer.py
@ -405,6 +405,47 @@ class Inferencer(algorithm.Visitor):
        else:
            assert False
    def _coerce_binary_broadcast_op(self, left, right, map_return_elt, op_loc):
        def num_dims(typ):
            if builtins.is_array(typ):
                # TODO: If number of dimensions is ever made a non-fixed parameter,
                # need to acutally unify num_dims in _coerce_binop/….
                return typ.find()["num_dims"].value
            return 0
        left_dims = num_dims(left.type)
        right_dims = num_dims(right.type)
        if left_dims != right_dims and left_dims != 0 and right_dims != 0:
            # Mismatch (only scalar broadcast supported for now).
            note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                            {"num_dims": left_dims}, left.loc)
            note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                            {"num_dims": right_dims}, right.loc)
            diag = diagnostic.Diagnostic(
                "error", "dimensions of '{op}' array operands must match",
                {"op": op_loc.source()}, op_loc, [left.loc, right.loc], [note1, note2])
            self.engine.process(diag)
            return
        def map_node_type(typ):
            if not builtins.is_array(typ):
                # This is a single value broadcast across the array.
                return typ
            return typ.find()["elt"]
        # Figure out result type, handling broadcasts.
        result_dims = left_dims if left_dims else right_dims
        def map_return(typ):
            elt = map_return_elt(typ)
            result = builtins.TArray(elt=elt, num_dims=result_dims)
            left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
            right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
            return (result, left, right)
        return self._coerce_numeric((left, right),
                                    map_return=map_return,
                                    map_node_type=map_node_type)
    def _coerce_binop(self, op, left, right):
        if isinstance(op, ast.MatMult):
            if types.is_var(left.type) or types.is_var(right.type):
@ -477,45 +518,11 @@ class Inferencer(algorithm.Visitor):
                self.engine.process(diag)
                return
-            def num_dims(typ):
+            def map_result(typ):
-                if builtins.is_array(typ):
+                if isinstance(op, ast.Div):
-                    # TODO: If number of dimensions is ever made a non-fixed parameter,
+                    return builtins.TFloat()
-                    # need to acutally unify num_dims in _coerce_binop.
+                return typ
-                    return typ.find()["num_dims"].value
+            return self._coerce_binary_broadcast_op(left, right, map_result, op.loc)
                return 0
            left_dims = num_dims(left.type)
            right_dims = num_dims(right.type)
            if left_dims != right_dims and left_dims != 0 and right_dims != 0:
                # Mismatch (only scalar broadcast supported for now).
                note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                              {"num_dims": left_dims}, left.loc)
                note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                              {"num_dims": right_dims}, right.loc)
                diag = diagnostic.Diagnostic(
                    "error", "dimensions of '{op}' array operands must match",
                    {"op": op.loc.source()}, op.loc, [left.loc, right.loc], [note1, note2])
                self.engine.process(diag)
                return
            def map_node_type(typ):
                if not builtins.is_array(typ):
                    # This is a single value broadcast across the array.
                    return typ
                return typ.find()["elt"]
            # Figure out result type, handling broadcasts.
            result_dims = left_dims if left_dims else right_dims
            def map_return(typ):
                elt = builtins.TFloat() if isinstance(op, ast.Div) else typ
                result = builtins.TArray(elt=elt, num_dims=result_dims)
                left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
                right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
                return (result, left, right)
            return self._coerce_numeric((left, right),
                                        map_return=map_return,
                                        map_node_type=map_node_type)
        elif isinstance(op, (ast.BitAnd, ast.BitOr, ast.BitXor,
                           ast.LShift, ast.RShift)):
            # bitwise operators require integers
@ -1290,15 +1297,26 @@ class Inferencer(algorithm.Visitor):
            self.engine.process(diag)
            return
-        # Array broadcasting for unary functions explicitly marked as such.
+        # Array broadcasting for functions explicitly marked as such.
-        if len(node.args) == typ_arity == 1 and types.is_broadcast_across_arrays(typ):
+        if len(node.args) == typ_arity and types.is_broadcast_across_arrays(typ):
-            arg_type = node.args[0].type.find()
+            if typ_arity == 1:
-            if builtins.is_array(arg_type):
+                arg_type = node.args[0].type.find()
-                typ_arg, = typ_args.values()
+                if builtins.is_array(arg_type):
-                self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
+                    typ_arg, = typ_args.values()
-                self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
+                    self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
-                            node.loc, None)
+                    self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
-                return
+                                node.loc, None)
                    return
            elif typ_arity == 2:
                if any(builtins.is_array(arg.type) for arg in node.args):
                    ret, arg0, arg1 = self._coerce_binary_broadcast_op(
                        node.args[0], node.args[1], lambda t: typ_ret, node.loc)
                    node.args[0] = self._coerce_one(arg0, node.args[0],
                                                    other_node=node.args[1])
                    node.args[1] = self._coerce_one(arg1, node.args[1],
                                                    other_node=node.args[0])
                    self._unify(node.type, ret, node.loc, None)
                    return
        for actualarg, (formalname, formaltyp) in \
                zip(node.args, list(typ_args.items()) + list(typ_optargs.items())):
--- a/artiq/firmware/ksupport/api.rs
+++ b/artiq/firmware/ksupport/api.rs
@ -76,6 +76,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(atanh),
    api!(cbrt),
    api!(ceil),
    api!(copysign),
    api!(cos),
    api!(cosh),
    api!(erf),
@ -86,6 +87,8 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(expm1),
    api!(fabs),
    api!(floor),
    // api!(fmax),
    // api!(fmin),
    //api!(fma),
    api!(fmod),
    api!(hypot),
@ -96,6 +99,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(log),
    //api!(log2),
    api!(log10),
    api!(nextafter),
    api!(pow),
    api!(round),
    api!(sin),
--- a/artiq/test/coredevice/test_numpy.py
+++ b/artiq/test/coredevice/test_numpy.py
@ -27,7 +27,7 @@ class CompareHostDeviceTest(ExperimentCase):
    def _test_binop(self, op, a, b):
        exp = self.create(_RunOnDevice)
        exp.run = kernel_from_string(["a", "b", "callback", "numpy"],
-                                     "callback(a " + op + "b)",
+                                     "callback(" + op + ")",
                                     decorator=portable)
        checked = False
@ -74,16 +74,17 @@ class CompareHostDeviceTest(ExperimentCase):
                for typ in [numpy.int32, numpy.int64, numpy.float]]
        for op in ELEM_WISE_BINOPS:
            for arg in args:
-                self._test_binop(op, *arg)
+                self._test_binop("a" + op + "b", *arg)
    def test_scalar_matrix_binops(self):
        for typ in [numpy.int32, numpy.int64, numpy.float]:
            scalar = typ(3)
            matrix = numpy.array([[4, 5, 6], [7, 8, 9]], dtype=typ)
            for op in ELEM_WISE_BINOPS:
-                self._test_binop(op, scalar, matrix)
+                code = "a" + op + "b"
-                self._test_binop(op, matrix, scalar)
+                self._test_binop(code, scalar, matrix)
-                self._test_binop(op, matrix, matrix)
+                self._test_binop(code, matrix, scalar)
                self._test_binop(code, matrix, matrix)
    def test_unary_math_fns(self):
        names = [
@ -99,3 +100,17 @@ class CompareHostDeviceTest(ExperimentCase):
            # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
            self._test_unaryop(op, 0.51)
            self._test_unaryop(op, numpy.array([[0.3, 0.4], [0.51, 0.6]]))
    def test_binary_math_fns(self):
        names = [name for name, _ in math_fns.binary_fp_runtime_calls]
        exp = self.create(_RunOnDevice)
        if exp.core.target_cls != CortexA9Target:
            names.remove("fmax")
            names.remove("fmin")
        for name in names:
            code = "numpy.{}(a, b)".format(name)
            # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
            self._test_binop(code, 1.0, 2.0)
            self._test_binop(code, 1.0, numpy.array([2.0, 3.0]))
            self._test_binop(code, numpy.array([1.0, 2.0]), 3.0)
            self._test_binop(code, numpy.array([1.0, 2.0]), numpy.array([3.0, 4.0]))
--- a/artiq/test/lit/embedding/array_math_fns.py
+++ b/artiq/test/lit/embedding/array_math_fns.py
@ -6,10 +6,21 @@ import numpy as np
@kernel
 def entrypoint():
    # Just make sure everything compiles.
    # LLVM intrinsic:
    a = np.array([1.0, 2.0, 3.0])
    b = np.sin(a)
    assert b.shape == a.shape
    # libm:
    c = np.array([1.0, 2.0, 3.0])
    d = np.arctan(c)
    assert d.shape == c.shape
    # libm, binary:
    e = np.array([1.0, 2.0, 3.0])
    f = np.array([4.0, 5.0, 6.0])
    g = np.arctan2(e, f)
    # g = np.arctan2(e, 0.0)
    # g = np.arctan2(0.0, f)
    assert g.shape == e.shape
--- a/artiq/test/lit/embedding/math_fns.py
+++ b/artiq/test/lit/embedding/math_fns.py
@ -28,3 +28,4 @@ def entrypoint():
    assert numpy.arcsin(0.0) == 0.0
    assert numpy.arccos(1.0) == 0.0
    assert numpy.arctan(0.0) == 0.0
    assert numpy.arctan2(0.0, 1.0) == 0.0