compiler: Implement binary NumPy math functions (arctan2, …)

The bulk of the diff is just factoring out the implementation for binary arithmetic implementations, to be reused for binary function calls.
2020-11-10 22:24:04 +01:00 · 2020-11-10 22:24:04 +01:00 · d95e619567
parent fcf4763ae7
commit d95e619567
7 changed files with 211 additions and 135 deletions
--- a/artiq/compiler/math_fns.py
+++ b/artiq/compiler/math_fns.py
@ -53,23 +53,36 @@ unary_fp_runtime_calls = [
    ("cbrt", "cbrt"),
 ]
 #: (float, float) -> float numpy.* math functions lowered to runtime calls.
 binary_fp_runtime_calls = [
    ("arctan2", "atan2"),
    ("copysign", "copysign"),
    ("fmax", "fmax"),
    ("fmin", "fmin"),
    # ("ldexp", "ldexp"),  # One argument is an int; would need a bit more plumbing.
    ("hypot", "hypot"),
    ("nextafter", "nextafter"),
 ]
 #: Array handling builtins (special treatment due to allocations).
 numpy_builtins = ["transpose"]
-def unary_fp_type(name):
+def fp_runtime_type(name, arity):
-    return types.TExternalFunction(OrderedDict([("arg", builtins.TFloat())]),
+    args = [("arg{}".format(i), builtins.TFloat()) for i in range(arity)]
    return types.TExternalFunction(OrderedDict(args),
                                   builtins.TFloat(),
                                   name,
                                   # errno isn't observable from ARTIQ Python.
                                   flags={"nounwind", "nowrite"},
                                   broadcast_across_arrays=True)
 numpy_map = {
-    getattr(numpy, symbol): unary_fp_type(mangle)
+    getattr(numpy, symbol): fp_runtime_type(mangle, arity=1)
    for symbol, mangle in (unary_fp_intrinsics + unary_fp_runtime_calls)
 }
 for symbol, mangle in binary_fp_runtime_calls:
    numpy_map[getattr(numpy, symbol)] = fp_runtime_type(mangle, arity=2)
 for name in numpy_builtins:
    numpy_map[getattr(numpy, name)] = types.TBuiltinFunction("numpy." + name)
--- a/artiq/compiler/transforms/artiq_ir_generator.py
+++ b/artiq/compiler/transforms/artiq_ir_generator.py
@ -1502,30 +1502,8 @@ class ARTIQIRGenerator(algorithm.Visitor):
            self.final_branch = old_final_branch
            self.unwind_target = old_unwind
-    def _mangle_arrayop_types(self, types):
+    def _make_array_elementwise_binop(self, name, result_type, lhs_type,
-        def name_error(typ):
+                                      rhs_type, make_op):
            assert False, "Internal compiler error: No RPC tag for {}".format(typ)
        def mangle_name(typ):
            typ = typ.find()
            # rpc_tag is used to turn element types into mangled names for no
            # particularly good reason apart from not having to invent yet another
            # string representation.
            if builtins.is_array(typ):
                return mangle_name(typ["elt"]) + str(typ["num_dims"].find().value)
            return ir.rpc_tag(typ, name_error).decode()
        return "_".join(mangle_name(t) for t in types)
    def _get_array_binop(self, op, result_type, lhs_type, rhs_type):
        # Currently, we always have any type coercions resolved explicitly in the AST.
        # In the future, this might no longer be true and the three types might all
        # differ.
        name = "_array_{}_{}".format(
            type(op).__name__,
            self._mangle_arrayop_types([result_type, lhs_type, rhs_type]))
        if name not in self.array_op_funcs:
        def body_gen(result, lhs, rhs):
            # At this point, shapes are assumed to match; could just pass buffer
            # pointer for two of the three arrays as well.
@ -1552,7 +1530,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
            def loop_gen(index):
                l = get_left(index)
                r = get_right(index)
-                    result = self.append(ir.Arith(op, l, r))
+                result = make_op(l, r)
                self.append(ir.SetElem(result_buffer, index, result))
                return self.append(
                    ir.Arith(ast.Add(loc=None), index,
@ -1560,10 +1538,37 @@ class ARTIQIRGenerator(algorithm.Visitor):
            self._make_loop(
                ir.Constant(0, self._size_type), lambda index: self.append(
-                        ir.Compare(ast.Lt(loc=None), index, num_total_elts)), loop_gen)
+                    ir.Compare(ast.Lt(loc=None), index, num_total_elts)),
                loop_gen)
-            self.array_op_funcs[name] = self._make_array_binop(
+        return self._make_array_binop(name, result_type, lhs_type, rhs_type,
-                name, result_type, lhs_type, rhs_type, body_gen)
+                                      body_gen)
    def _mangle_arrayop_types(self, types):
        def name_error(typ):
            assert False, "Internal compiler error: No RPC tag for {}".format(typ)
        def mangle_name(typ):
            typ = typ.find()
            # rpc_tag is used to turn element types into mangled names for no
            # particularly good reason apart from not having to invent yet another
            # string representation.
            if builtins.is_array(typ):
                return mangle_name(typ["elt"]) + str(typ["num_dims"].find().value)
            return ir.rpc_tag(typ, name_error).decode()
        return "_".join(mangle_name(t) for t in types)
    def _get_array_elementwise_binop(self, name, make_op, result_type, lhs_type, rhs_type):
        # Currently, we always have any type coercions resolved explicitly in the AST.
        # In the future, this might no longer be true and the three types might all
        # differ.
        name = "_array_{}_{}".format(
            name,
            self._mangle_arrayop_types([result_type, lhs_type, rhs_type]))
        if name not in self.array_op_funcs:
            self.array_op_funcs[name] = self._make_array_elementwise_binop(
                name, result_type, lhs_type, rhs_type, make_op)
        return self.array_op_funcs[name]
    def _invoke_arrayop(self, func, params):
@ -1721,15 +1726,9 @@ class ARTIQIRGenerator(algorithm.Visitor):
            return self.append(ir.Alloc([result_buffer, shape], node.type))
        return self.append(ir.GetElem(result_buffer, ir.Constant(0, self._size_type)))
-
+    def _broadcast_binop(self, name, make_op, result_type, lhs, rhs):
-    def visit_BinOpT(self, node):
+        # Broadcast scalars (broadcasting higher dimensions is not yet allowed in the
-        if isinstance(node.op, ast.MatMult):
+        # language).
            return self._emit_matmult(node, node.left, node.right)
        elif builtins.is_array(node.type):
            lhs = self.visit(node.left)
            rhs = self.visit(node.right)
            # Broadcast scalars.
        broadcast = False
        array_arg = lhs
        if not builtins.is_array(lhs.type):
@ -1749,10 +1748,23 @@ class ARTIQIRGenerator(algorithm.Visitor):
                    ir.Constant("operands could not be broadcast together",
                                builtins.TStr())))
-            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
+        elt = result_type.find()["elt"]
-            func = self._get_array_binop(node.op, node.type, lhs.type, rhs.type)
+        result, _ = self._allocate_new_array(elt, shape)
        func = self._get_array_elementwise_binop(name, make_op, result_type, lhs.type,
            rhs.type)
        self._invoke_arrayop(func, [result, lhs, rhs])
        return result
    def visit_BinOpT(self, node):
        if isinstance(node.op, ast.MatMult):
            return self._emit_matmult(node, node.left, node.right)
        elif builtins.is_array(node.type):
            lhs = self.visit(node.left)
            rhs = self.visit(node.right)
            name = type(node.op).__name__
            def make_op(l, r):
                return self.append(ir.Arith(node.op, l, r))
            return self._broadcast_binop(name, make_op, node.type, lhs, rhs)
        elif builtins.is_numeric(node.type):
            lhs = self.visit(node.left)
            rhs = self.visit(node.right)
@ -2427,25 +2439,27 @@ class ARTIQIRGenerator(algorithm.Visitor):
        if types.is_builtin(node.func.type):
            insn = self.visit_builtin_call(node)
        elif (types.is_broadcast_across_arrays(node.func.type) and len(args) >= 1
-              and builtins.is_array(args[0].type)):
+              and any(builtins.is_array(arg.type) for arg in args)):
            # The iodelay machinery set up in the surrounding code was
            # deprecated/a relic from the past when array broadcasting support
            # was added, so no attempt to keep the delay tracking intact is
            # made.
-            assert len(args) == 1, "Broadcasting for multiple arguments not implemented"
+            def make_call(*args):
-
+                return self._user_call(ir.Constant(None, callee.type), args, {},
            def make_call(val):
                return self._user_call(ir.Constant(None, callee.type), [val], {},
                                       node.arg_exprs)
            shape = self.append(ir.GetAttr(args[0], "shape"))
            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
            # TODO: Generate more generically if non-externals are allowed.
            name = node.func.type.find().name
            if len(args) == 1:
                shape = self.append(ir.GetAttr(args[0], "shape"))
                result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
                func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
                self._invoke_arrayop(func, [result, args[0]])
                insn = result
            elif len(args) == 2:
                insn = self._broadcast_binop(name, make_call, node.type, *args)
            else:
                assert False, "Broadcasting for {} arguments not implemented".format(len)
        else:
            insn = self._user_call(callee, args, keywords, node.arg_exprs)
            if isinstance(node.func, asttyped.AttributeT):
--- a/artiq/compiler/transforms/inferencer.py
+++ b/artiq/compiler/transforms/inferencer.py
@ -405,6 +405,47 @@ class Inferencer(algorithm.Visitor):
        else:
            assert False
    def _coerce_binary_broadcast_op(self, left, right, map_return_elt, op_loc):
        def num_dims(typ):
            if builtins.is_array(typ):
                # TODO: If number of dimensions is ever made a non-fixed parameter,
                # need to acutally unify num_dims in _coerce_binop/….
                return typ.find()["num_dims"].value
            return 0
        left_dims = num_dims(left.type)
        right_dims = num_dims(right.type)
        if left_dims != right_dims and left_dims != 0 and right_dims != 0:
            # Mismatch (only scalar broadcast supported for now).
            note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                            {"num_dims": left_dims}, left.loc)
            note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                            {"num_dims": right_dims}, right.loc)
            diag = diagnostic.Diagnostic(
                "error", "dimensions of '{op}' array operands must match",
                {"op": op_loc.source()}, op_loc, [left.loc, right.loc], [note1, note2])
            self.engine.process(diag)
            return
        def map_node_type(typ):
            if not builtins.is_array(typ):
                # This is a single value broadcast across the array.
                return typ
            return typ.find()["elt"]
        # Figure out result type, handling broadcasts.
        result_dims = left_dims if left_dims else right_dims
        def map_return(typ):
            elt = map_return_elt(typ)
            result = builtins.TArray(elt=elt, num_dims=result_dims)
            left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
            right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
            return (result, left, right)
        return self._coerce_numeric((left, right),
                                    map_return=map_return,
                                    map_node_type=map_node_type)
    def _coerce_binop(self, op, left, right):
        if isinstance(op, ast.MatMult):
            if types.is_var(left.type) or types.is_var(right.type):
@ -477,45 +518,11 @@ class Inferencer(algorithm.Visitor):
                self.engine.process(diag)
                return
-            def num_dims(typ):
+            def map_result(typ):
-                if builtins.is_array(typ):
+                if isinstance(op, ast.Div):
-                    # TODO: If number of dimensions is ever made a non-fixed parameter,
+                    return builtins.TFloat()
                    # need to acutally unify num_dims in _coerce_binop.
                    return typ.find()["num_dims"].value
                return 0
            left_dims = num_dims(left.type)
            right_dims = num_dims(right.type)
            if left_dims != right_dims and left_dims != 0 and right_dims != 0:
                # Mismatch (only scalar broadcast supported for now).
                note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                              {"num_dims": left_dims}, left.loc)
                note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
                                              {"num_dims": right_dims}, right.loc)
                diag = diagnostic.Diagnostic(
                    "error", "dimensions of '{op}' array operands must match",
                    {"op": op.loc.source()}, op.loc, [left.loc, right.loc], [note1, note2])
                self.engine.process(diag)
                return
            def map_node_type(typ):
                if not builtins.is_array(typ):
                    # This is a single value broadcast across the array.
                return typ
-                return typ.find()["elt"]
+            return self._coerce_binary_broadcast_op(left, right, map_result, op.loc)
            # Figure out result type, handling broadcasts.
            result_dims = left_dims if left_dims else right_dims
            def map_return(typ):
                elt = builtins.TFloat() if isinstance(op, ast.Div) else typ
                result = builtins.TArray(elt=elt, num_dims=result_dims)
                left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
                right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
                return (result, left, right)
            return self._coerce_numeric((left, right),
                                        map_return=map_return,
                                        map_node_type=map_node_type)
        elif isinstance(op, (ast.BitAnd, ast.BitOr, ast.BitXor,
                           ast.LShift, ast.RShift)):
            # bitwise operators require integers
@ -1290,8 +1297,9 @@ class Inferencer(algorithm.Visitor):
            self.engine.process(diag)
            return
-        # Array broadcasting for unary functions explicitly marked as such.
+        # Array broadcasting for functions explicitly marked as such.
-        if len(node.args) == typ_arity == 1 and types.is_broadcast_across_arrays(typ):
+        if len(node.args) == typ_arity and types.is_broadcast_across_arrays(typ):
            if typ_arity == 1:
                arg_type = node.args[0].type.find()
                if builtins.is_array(arg_type):
                    typ_arg, = typ_args.values()
@ -1299,6 +1307,16 @@ class Inferencer(algorithm.Visitor):
                    self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
                                node.loc, None)
                    return
            elif typ_arity == 2:
                if any(builtins.is_array(arg.type) for arg in node.args):
                    ret, arg0, arg1 = self._coerce_binary_broadcast_op(
                        node.args[0], node.args[1], lambda t: typ_ret, node.loc)
                    node.args[0] = self._coerce_one(arg0, node.args[0],
                                                    other_node=node.args[1])
                    node.args[1] = self._coerce_one(arg1, node.args[1],
                                                    other_node=node.args[0])
                    self._unify(node.type, ret, node.loc, None)
                    return
        for actualarg, (formalname, formaltyp) in \
                zip(node.args, list(typ_args.items()) + list(typ_optargs.items())):
--- a/artiq/firmware/ksupport/api.rs
+++ b/artiq/firmware/ksupport/api.rs
@ -76,6 +76,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(atanh),
    api!(cbrt),
    api!(ceil),
    api!(copysign),
    api!(cos),
    api!(cosh),
    api!(erf),
@ -86,6 +87,8 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(expm1),
    api!(fabs),
    api!(floor),
    // api!(fmax),
    // api!(fmin),
    //api!(fma),
    api!(fmod),
    api!(hypot),
@ -96,6 +99,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
    api!(log),
    //api!(log2),
    api!(log10),
    api!(nextafter),
    api!(pow),
    api!(round),
    api!(sin),
--- a/artiq/test/coredevice/test_numpy.py
+++ b/artiq/test/coredevice/test_numpy.py
@ -27,7 +27,7 @@ class CompareHostDeviceTest(ExperimentCase):
    def _test_binop(self, op, a, b):
        exp = self.create(_RunOnDevice)
        exp.run = kernel_from_string(["a", "b", "callback", "numpy"],
-                                     "callback(a " + op + "b)",
+                                     "callback(" + op + ")",
                                     decorator=portable)
        checked = False
@ -74,16 +74,17 @@ class CompareHostDeviceTest(ExperimentCase):
                for typ in [numpy.int32, numpy.int64, numpy.float]]
        for op in ELEM_WISE_BINOPS:
            for arg in args:
-                self._test_binop(op, *arg)
+                self._test_binop("a" + op + "b", *arg)
    def test_scalar_matrix_binops(self):
        for typ in [numpy.int32, numpy.int64, numpy.float]:
            scalar = typ(3)
            matrix = numpy.array([[4, 5, 6], [7, 8, 9]], dtype=typ)
            for op in ELEM_WISE_BINOPS:
-                self._test_binop(op, scalar, matrix)
+                code = "a" + op + "b"
-                self._test_binop(op, matrix, scalar)
+                self._test_binop(code, scalar, matrix)
-                self._test_binop(op, matrix, matrix)
+                self._test_binop(code, matrix, scalar)
                self._test_binop(code, matrix, matrix)
    def test_unary_math_fns(self):
        names = [
@ -99,3 +100,17 @@ class CompareHostDeviceTest(ExperimentCase):
            # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
            self._test_unaryop(op, 0.51)
            self._test_unaryop(op, numpy.array([[0.3, 0.4], [0.51, 0.6]]))
    def test_binary_math_fns(self):
        names = [name for name, _ in math_fns.binary_fp_runtime_calls]
        exp = self.create(_RunOnDevice)
        if exp.core.target_cls != CortexA9Target:
            names.remove("fmax")
            names.remove("fmin")
        for name in names:
            code = "numpy.{}(a, b)".format(name)
            # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
            self._test_binop(code, 1.0, 2.0)
            self._test_binop(code, 1.0, numpy.array([2.0, 3.0]))
            self._test_binop(code, numpy.array([1.0, 2.0]), 3.0)
            self._test_binop(code, numpy.array([1.0, 2.0]), numpy.array([3.0, 4.0]))
--- a/artiq/test/lit/embedding/array_math_fns.py
+++ b/artiq/test/lit/embedding/array_math_fns.py
@ -6,10 +6,21 @@ import numpy as np
@kernel
 def entrypoint():
    # Just make sure everything compiles.
    # LLVM intrinsic:
    a = np.array([1.0, 2.0, 3.0])
    b = np.sin(a)
    assert b.shape == a.shape
    # libm:
    c = np.array([1.0, 2.0, 3.0])
    d = np.arctan(c)
    assert d.shape == c.shape
    # libm, binary:
    e = np.array([1.0, 2.0, 3.0])
    f = np.array([4.0, 5.0, 6.0])
    g = np.arctan2(e, f)
    # g = np.arctan2(e, 0.0)
    # g = np.arctan2(0.0, f)
    assert g.shape == e.shape
--- a/artiq/test/lit/embedding/math_fns.py
+++ b/artiq/test/lit/embedding/math_fns.py
@ -28,3 +28,4 @@ def entrypoint():
    assert numpy.arcsin(0.0) == 0.0
    assert numpy.arccos(1.0) == 0.0
    assert numpy.arctan(0.0) == 0.0
    assert numpy.arctan2(0.0, 1.0) == 0.0