From d95e61956743d986970ad0f0e1d4cf4e3da86cf5 Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Tue, 10 Nov 2020 22:24:04 +0100 Subject: [PATCH] =?UTF-8?q?compiler:=20Implement=20binary=20NumPy=20math?= =?UTF-8?q?=20functions=20(arctan2,=20=E2=80=A6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bulk of the diff is just factoring out the implementation for binary arithmetic implementations, to be reused for binary function calls. --- artiq/compiler/math_fns.py | 21 ++- .../compiler/transforms/artiq_ir_generator.py | 170 ++++++++++-------- artiq/compiler/transforms/inferencer.py | 114 +++++++----- artiq/firmware/ksupport/api.rs | 4 + artiq/test/coredevice/test_numpy.py | 25 ++- artiq/test/lit/embedding/array_math_fns.py | 11 ++ artiq/test/lit/embedding/math_fns.py | 1 + 7 files changed, 211 insertions(+), 135 deletions(-) diff --git a/artiq/compiler/math_fns.py b/artiq/compiler/math_fns.py index 8b1b38914..c54eac552 100644 --- a/artiq/compiler/math_fns.py +++ b/artiq/compiler/math_fns.py @@ -53,23 +53,36 @@ unary_fp_runtime_calls = [ ("cbrt", "cbrt"), ] +#: (float, float) -> float numpy.* math functions lowered to runtime calls. +binary_fp_runtime_calls = [ + ("arctan2", "atan2"), + ("copysign", "copysign"), + ("fmax", "fmax"), + ("fmin", "fmin"), + # ("ldexp", "ldexp"), # One argument is an int; would need a bit more plumbing. + ("hypot", "hypot"), + ("nextafter", "nextafter"), +] + #: Array handling builtins (special treatment due to allocations). numpy_builtins = ["transpose"] -def unary_fp_type(name): - return types.TExternalFunction(OrderedDict([("arg", builtins.TFloat())]), +def fp_runtime_type(name, arity): + args = [("arg{}".format(i), builtins.TFloat()) for i in range(arity)] + return types.TExternalFunction(OrderedDict(args), builtins.TFloat(), name, # errno isn't observable from ARTIQ Python. flags={"nounwind", "nowrite"}, broadcast_across_arrays=True) - numpy_map = { - getattr(numpy, symbol): unary_fp_type(mangle) + getattr(numpy, symbol): fp_runtime_type(mangle, arity=1) for symbol, mangle in (unary_fp_intrinsics + unary_fp_runtime_calls) } +for symbol, mangle in binary_fp_runtime_calls: + numpy_map[getattr(numpy, symbol)] = fp_runtime_type(mangle, arity=2) for name in numpy_builtins: numpy_map[getattr(numpy, name)] = types.TBuiltinFunction("numpy." + name) diff --git a/artiq/compiler/transforms/artiq_ir_generator.py b/artiq/compiler/transforms/artiq_ir_generator.py index 9673eb89f..14a45a0e4 100644 --- a/artiq/compiler/transforms/artiq_ir_generator.py +++ b/artiq/compiler/transforms/artiq_ir_generator.py @@ -1502,6 +1502,48 @@ class ARTIQIRGenerator(algorithm.Visitor): self.final_branch = old_final_branch self.unwind_target = old_unwind + def _make_array_elementwise_binop(self, name, result_type, lhs_type, + rhs_type, make_op): + def body_gen(result, lhs, rhs): + # At this point, shapes are assumed to match; could just pass buffer + # pointer for two of the three arrays as well. + result_buffer = self.append(ir.GetAttr(result, "buffer")) + shape = self.append(ir.GetAttr(result, "shape")) + num_total_elts = self._get_total_array_len(shape) + + if builtins.is_array(lhs.type): + lhs_buffer = self.append(ir.GetAttr(lhs, "buffer")) + def get_left(index): + return self.append(ir.GetElem(lhs_buffer, index)) + else: + def get_left(index): + return lhs + + if builtins.is_array(rhs.type): + rhs_buffer = self.append(ir.GetAttr(rhs, "buffer")) + def get_right(index): + return self.append(ir.GetElem(rhs_buffer, index)) + else: + def get_right(index): + return rhs + + def loop_gen(index): + l = get_left(index) + r = get_right(index) + result = make_op(l, r) + self.append(ir.SetElem(result_buffer, index, result)) + return self.append( + ir.Arith(ast.Add(loc=None), index, + ir.Constant(1, self._size_type))) + + self._make_loop( + ir.Constant(0, self._size_type), lambda index: self.append( + ir.Compare(ast.Lt(loc=None), index, num_total_elts)), + loop_gen) + + return self._make_array_binop(name, result_type, lhs_type, rhs_type, + body_gen) + def _mangle_arrayop_types(self, types): def name_error(typ): assert False, "Internal compiler error: No RPC tag for {}".format(typ) @@ -1517,53 +1559,16 @@ class ARTIQIRGenerator(algorithm.Visitor): return "_".join(mangle_name(t) for t in types) - def _get_array_binop(self, op, result_type, lhs_type, rhs_type): + def _get_array_elementwise_binop(self, name, make_op, result_type, lhs_type, rhs_type): # Currently, we always have any type coercions resolved explicitly in the AST. # In the future, this might no longer be true and the three types might all # differ. name = "_array_{}_{}".format( - type(op).__name__, + name, self._mangle_arrayop_types([result_type, lhs_type, rhs_type])) if name not in self.array_op_funcs: - - def body_gen(result, lhs, rhs): - # At this point, shapes are assumed to match; could just pass buffer - # pointer for two of the three arrays as well. - result_buffer = self.append(ir.GetAttr(result, "buffer")) - shape = self.append(ir.GetAttr(result, "shape")) - num_total_elts = self._get_total_array_len(shape) - - if builtins.is_array(lhs.type): - lhs_buffer = self.append(ir.GetAttr(lhs, "buffer")) - def get_left(index): - return self.append(ir.GetElem(lhs_buffer, index)) - else: - def get_left(index): - return lhs - - if builtins.is_array(rhs.type): - rhs_buffer = self.append(ir.GetAttr(rhs, "buffer")) - def get_right(index): - return self.append(ir.GetElem(rhs_buffer, index)) - else: - def get_right(index): - return rhs - - def loop_gen(index): - l = get_left(index) - r = get_right(index) - result = self.append(ir.Arith(op, l, r)) - self.append(ir.SetElem(result_buffer, index, result)) - return self.append( - ir.Arith(ast.Add(loc=None), index, - ir.Constant(1, self._size_type))) - - self._make_loop( - ir.Constant(0, self._size_type), lambda index: self.append( - ir.Compare(ast.Lt(loc=None), index, num_total_elts)), loop_gen) - - self.array_op_funcs[name] = self._make_array_binop( - name, result_type, lhs_type, rhs_type, body_gen) + self.array_op_funcs[name] = self._make_array_elementwise_binop( + name, result_type, lhs_type, rhs_type, make_op) return self.array_op_funcs[name] def _invoke_arrayop(self, func, params): @@ -1721,6 +1726,34 @@ class ARTIQIRGenerator(algorithm.Visitor): return self.append(ir.Alloc([result_buffer, shape], node.type)) return self.append(ir.GetElem(result_buffer, ir.Constant(0, self._size_type))) + def _broadcast_binop(self, name, make_op, result_type, lhs, rhs): + # Broadcast scalars (broadcasting higher dimensions is not yet allowed in the + # language). + broadcast = False + array_arg = lhs + if not builtins.is_array(lhs.type): + broadcast = True + array_arg = rhs + elif not builtins.is_array(rhs.type): + broadcast = True + + shape = self.append(ir.GetAttr(array_arg, "shape")) + + if not broadcast: + rhs_shape = self.append(ir.GetAttr(rhs, "shape")) + self._make_check( + self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)), + lambda: self.alloc_exn( + builtins.TException("ValueError"), + ir.Constant("operands could not be broadcast together", + builtins.TStr()))) + + elt = result_type.find()["elt"] + result, _ = self._allocate_new_array(elt, shape) + func = self._get_array_elementwise_binop(name, make_op, result_type, lhs.type, + rhs.type) + self._invoke_arrayop(func, [result, lhs, rhs]) + return result def visit_BinOpT(self, node): if isinstance(node.op, ast.MatMult): @@ -1728,31 +1761,10 @@ class ARTIQIRGenerator(algorithm.Visitor): elif builtins.is_array(node.type): lhs = self.visit(node.left) rhs = self.visit(node.right) - - # Broadcast scalars. - broadcast = False - array_arg = lhs - if not builtins.is_array(lhs.type): - broadcast = True - array_arg = rhs - elif not builtins.is_array(rhs.type): - broadcast = True - - shape = self.append(ir.GetAttr(array_arg, "shape")) - - if not broadcast: - rhs_shape = self.append(ir.GetAttr(rhs, "shape")) - self._make_check( - self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)), - lambda: self.alloc_exn( - builtins.TException("ValueError"), - ir.Constant("operands could not be broadcast together", - builtins.TStr()))) - - result, _ = self._allocate_new_array(node.type.find()["elt"], shape) - func = self._get_array_binop(node.op, node.type, lhs.type, rhs.type) - self._invoke_arrayop(func, [result, lhs, rhs]) - return result + name = type(node.op).__name__ + def make_op(l, r): + return self.append(ir.Arith(node.op, l, r)) + return self._broadcast_binop(name, make_op, node.type, lhs, rhs) elif builtins.is_numeric(node.type): lhs = self.visit(node.left) rhs = self.visit(node.right) @@ -2427,25 +2439,27 @@ class ARTIQIRGenerator(algorithm.Visitor): if types.is_builtin(node.func.type): insn = self.visit_builtin_call(node) elif (types.is_broadcast_across_arrays(node.func.type) and len(args) >= 1 - and builtins.is_array(args[0].type)): + and any(builtins.is_array(arg.type) for arg in args)): # The iodelay machinery set up in the surrounding code was # deprecated/a relic from the past when array broadcasting support # was added, so no attempt to keep the delay tracking intact is # made. - assert len(args) == 1, "Broadcasting for multiple arguments not implemented" - - def make_call(val): - return self._user_call(ir.Constant(None, callee.type), [val], {}, + def make_call(*args): + return self._user_call(ir.Constant(None, callee.type), args, {}, node.arg_exprs) - - shape = self.append(ir.GetAttr(args[0], "shape")) - result, _ = self._allocate_new_array(node.type.find()["elt"], shape) - # TODO: Generate more generically if non-externals are allowed. name = node.func.type.find().name - func = self._get_array_unaryop(name, make_call, node.type, args[0].type) - self._invoke_arrayop(func, [result, args[0]]) - insn = result + + if len(args) == 1: + shape = self.append(ir.GetAttr(args[0], "shape")) + result, _ = self._allocate_new_array(node.type.find()["elt"], shape) + func = self._get_array_unaryop(name, make_call, node.type, args[0].type) + self._invoke_arrayop(func, [result, args[0]]) + insn = result + elif len(args) == 2: + insn = self._broadcast_binop(name, make_call, node.type, *args) + else: + assert False, "Broadcasting for {} arguments not implemented".format(len) else: insn = self._user_call(callee, args, keywords, node.arg_exprs) if isinstance(node.func, asttyped.AttributeT): diff --git a/artiq/compiler/transforms/inferencer.py b/artiq/compiler/transforms/inferencer.py index ce0a269e1..0de455c8d 100644 --- a/artiq/compiler/transforms/inferencer.py +++ b/artiq/compiler/transforms/inferencer.py @@ -405,6 +405,47 @@ class Inferencer(algorithm.Visitor): else: assert False + def _coerce_binary_broadcast_op(self, left, right, map_return_elt, op_loc): + def num_dims(typ): + if builtins.is_array(typ): + # TODO: If number of dimensions is ever made a non-fixed parameter, + # need to acutally unify num_dims in _coerce_binop/…. + return typ.find()["num_dims"].value + return 0 + + left_dims = num_dims(left.type) + right_dims = num_dims(right.type) + if left_dims != right_dims and left_dims != 0 and right_dims != 0: + # Mismatch (only scalar broadcast supported for now). + note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}", + {"num_dims": left_dims}, left.loc) + note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}", + {"num_dims": right_dims}, right.loc) + diag = diagnostic.Diagnostic( + "error", "dimensions of '{op}' array operands must match", + {"op": op_loc.source()}, op_loc, [left.loc, right.loc], [note1, note2]) + self.engine.process(diag) + return + + def map_node_type(typ): + if not builtins.is_array(typ): + # This is a single value broadcast across the array. + return typ + return typ.find()["elt"] + + # Figure out result type, handling broadcasts. + result_dims = left_dims if left_dims else right_dims + def map_return(typ): + elt = map_return_elt(typ) + result = builtins.TArray(elt=elt, num_dims=result_dims) + left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt + right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt + return (result, left, right) + + return self._coerce_numeric((left, right), + map_return=map_return, + map_node_type=map_node_type) + def _coerce_binop(self, op, left, right): if isinstance(op, ast.MatMult): if types.is_var(left.type) or types.is_var(right.type): @@ -477,45 +518,11 @@ class Inferencer(algorithm.Visitor): self.engine.process(diag) return - def num_dims(typ): - if builtins.is_array(typ): - # TODO: If number of dimensions is ever made a non-fixed parameter, - # need to acutally unify num_dims in _coerce_binop. - return typ.find()["num_dims"].value - return 0 - - left_dims = num_dims(left.type) - right_dims = num_dims(right.type) - if left_dims != right_dims and left_dims != 0 and right_dims != 0: - # Mismatch (only scalar broadcast supported for now). - note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}", - {"num_dims": left_dims}, left.loc) - note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}", - {"num_dims": right_dims}, right.loc) - diag = diagnostic.Diagnostic( - "error", "dimensions of '{op}' array operands must match", - {"op": op.loc.source()}, op.loc, [left.loc, right.loc], [note1, note2]) - self.engine.process(diag) - return - - def map_node_type(typ): - if not builtins.is_array(typ): - # This is a single value broadcast across the array. - return typ - return typ.find()["elt"] - - # Figure out result type, handling broadcasts. - result_dims = left_dims if left_dims else right_dims - def map_return(typ): - elt = builtins.TFloat() if isinstance(op, ast.Div) else typ - result = builtins.TArray(elt=elt, num_dims=result_dims) - left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt - right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt - return (result, left, right) - - return self._coerce_numeric((left, right), - map_return=map_return, - map_node_type=map_node_type) + def map_result(typ): + if isinstance(op, ast.Div): + return builtins.TFloat() + return typ + return self._coerce_binary_broadcast_op(left, right, map_result, op.loc) elif isinstance(op, (ast.BitAnd, ast.BitOr, ast.BitXor, ast.LShift, ast.RShift)): # bitwise operators require integers @@ -1290,15 +1297,26 @@ class Inferencer(algorithm.Visitor): self.engine.process(diag) return - # Array broadcasting for unary functions explicitly marked as such. - if len(node.args) == typ_arity == 1 and types.is_broadcast_across_arrays(typ): - arg_type = node.args[0].type.find() - if builtins.is_array(arg_type): - typ_arg, = typ_args.values() - self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None) - self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]), - node.loc, None) - return + # Array broadcasting for functions explicitly marked as such. + if len(node.args) == typ_arity and types.is_broadcast_across_arrays(typ): + if typ_arity == 1: + arg_type = node.args[0].type.find() + if builtins.is_array(arg_type): + typ_arg, = typ_args.values() + self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None) + self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]), + node.loc, None) + return + elif typ_arity == 2: + if any(builtins.is_array(arg.type) for arg in node.args): + ret, arg0, arg1 = self._coerce_binary_broadcast_op( + node.args[0], node.args[1], lambda t: typ_ret, node.loc) + node.args[0] = self._coerce_one(arg0, node.args[0], + other_node=node.args[1]) + node.args[1] = self._coerce_one(arg1, node.args[1], + other_node=node.args[0]) + self._unify(node.type, ret, node.loc, None) + return for actualarg, (formalname, formaltyp) in \ zip(node.args, list(typ_args.items()) + list(typ_optargs.items())): diff --git a/artiq/firmware/ksupport/api.rs b/artiq/firmware/ksupport/api.rs index 873dc6f71..84e69dad5 100644 --- a/artiq/firmware/ksupport/api.rs +++ b/artiq/firmware/ksupport/api.rs @@ -76,6 +76,7 @@ static mut API: &'static [(&'static str, *const ())] = &[ api!(atanh), api!(cbrt), api!(ceil), + api!(copysign), api!(cos), api!(cosh), api!(erf), @@ -86,6 +87,8 @@ static mut API: &'static [(&'static str, *const ())] = &[ api!(expm1), api!(fabs), api!(floor), + // api!(fmax), + // api!(fmin), //api!(fma), api!(fmod), api!(hypot), @@ -96,6 +99,7 @@ static mut API: &'static [(&'static str, *const ())] = &[ api!(log), //api!(log2), api!(log10), + api!(nextafter), api!(pow), api!(round), api!(sin), diff --git a/artiq/test/coredevice/test_numpy.py b/artiq/test/coredevice/test_numpy.py index caaa2ce27..a3b7b5a00 100644 --- a/artiq/test/coredevice/test_numpy.py +++ b/artiq/test/coredevice/test_numpy.py @@ -27,7 +27,7 @@ class CompareHostDeviceTest(ExperimentCase): def _test_binop(self, op, a, b): exp = self.create(_RunOnDevice) exp.run = kernel_from_string(["a", "b", "callback", "numpy"], - "callback(a " + op + "b)", + "callback(" + op + ")", decorator=portable) checked = False @@ -74,16 +74,17 @@ class CompareHostDeviceTest(ExperimentCase): for typ in [numpy.int32, numpy.int64, numpy.float]] for op in ELEM_WISE_BINOPS: for arg in args: - self._test_binop(op, *arg) + self._test_binop("a" + op + "b", *arg) def test_scalar_matrix_binops(self): for typ in [numpy.int32, numpy.int64, numpy.float]: scalar = typ(3) matrix = numpy.array([[4, 5, 6], [7, 8, 9]], dtype=typ) for op in ELEM_WISE_BINOPS: - self._test_binop(op, scalar, matrix) - self._test_binop(op, matrix, scalar) - self._test_binop(op, matrix, matrix) + code = "a" + op + "b" + self._test_binop(code, scalar, matrix) + self._test_binop(code, matrix, scalar) + self._test_binop(code, matrix, matrix) def test_unary_math_fns(self): names = [ @@ -99,3 +100,17 @@ class CompareHostDeviceTest(ExperimentCase): # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match. self._test_unaryop(op, 0.51) self._test_unaryop(op, numpy.array([[0.3, 0.4], [0.51, 0.6]])) + + def test_binary_math_fns(self): + names = [name for name, _ in math_fns.binary_fp_runtime_calls] + exp = self.create(_RunOnDevice) + if exp.core.target_cls != CortexA9Target: + names.remove("fmax") + names.remove("fmin") + for name in names: + code = "numpy.{}(a, b)".format(name) + # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match. + self._test_binop(code, 1.0, 2.0) + self._test_binop(code, 1.0, numpy.array([2.0, 3.0])) + self._test_binop(code, numpy.array([1.0, 2.0]), 3.0) + self._test_binop(code, numpy.array([1.0, 2.0]), numpy.array([3.0, 4.0])) diff --git a/artiq/test/lit/embedding/array_math_fns.py b/artiq/test/lit/embedding/array_math_fns.py index 2b298f2ef..d23540b48 100644 --- a/artiq/test/lit/embedding/array_math_fns.py +++ b/artiq/test/lit/embedding/array_math_fns.py @@ -6,10 +6,21 @@ import numpy as np @kernel def entrypoint(): # Just make sure everything compiles. + + # LLVM intrinsic: a = np.array([1.0, 2.0, 3.0]) b = np.sin(a) assert b.shape == a.shape + # libm: c = np.array([1.0, 2.0, 3.0]) d = np.arctan(c) assert d.shape == c.shape + + # libm, binary: + e = np.array([1.0, 2.0, 3.0]) + f = np.array([4.0, 5.0, 6.0]) + g = np.arctan2(e, f) + # g = np.arctan2(e, 0.0) + # g = np.arctan2(0.0, f) + assert g.shape == e.shape diff --git a/artiq/test/lit/embedding/math_fns.py b/artiq/test/lit/embedding/math_fns.py index 40ac18e42..6f9416c8d 100644 --- a/artiq/test/lit/embedding/math_fns.py +++ b/artiq/test/lit/embedding/math_fns.py @@ -28,3 +28,4 @@ def entrypoint(): assert numpy.arcsin(0.0) == 0.0 assert numpy.arccos(1.0) == 0.0 assert numpy.arctan(0.0) == 0.0 + assert numpy.arctan2(0.0, 1.0) == 0.0