compiler: Implement binary NumPy math functions (arctan2, …)

The bulk of the diff is just factoring out the implementation
for binary arithmetic implementations, to be reused for binary
function calls.
pull/1551/head
David Nadlinger 2020-11-10 22:24:04 +01:00
parent fcf4763ae7
commit d95e619567
7 changed files with 211 additions and 135 deletions

View File

@ -53,23 +53,36 @@ unary_fp_runtime_calls = [
("cbrt", "cbrt"),
]
#: (float, float) -> float numpy.* math functions lowered to runtime calls.
binary_fp_runtime_calls = [
("arctan2", "atan2"),
("copysign", "copysign"),
("fmax", "fmax"),
("fmin", "fmin"),
# ("ldexp", "ldexp"), # One argument is an int; would need a bit more plumbing.
("hypot", "hypot"),
("nextafter", "nextafter"),
]
#: Array handling builtins (special treatment due to allocations).
numpy_builtins = ["transpose"]
def unary_fp_type(name):
return types.TExternalFunction(OrderedDict([("arg", builtins.TFloat())]),
def fp_runtime_type(name, arity):
args = [("arg{}".format(i), builtins.TFloat()) for i in range(arity)]
return types.TExternalFunction(OrderedDict(args),
builtins.TFloat(),
name,
# errno isn't observable from ARTIQ Python.
flags={"nounwind", "nowrite"},
broadcast_across_arrays=True)
numpy_map = {
getattr(numpy, symbol): unary_fp_type(mangle)
getattr(numpy, symbol): fp_runtime_type(mangle, arity=1)
for symbol, mangle in (unary_fp_intrinsics + unary_fp_runtime_calls)
}
for symbol, mangle in binary_fp_runtime_calls:
numpy_map[getattr(numpy, symbol)] = fp_runtime_type(mangle, arity=2)
for name in numpy_builtins:
numpy_map[getattr(numpy, name)] = types.TBuiltinFunction("numpy." + name)

View File

@ -1502,6 +1502,48 @@ class ARTIQIRGenerator(algorithm.Visitor):
self.final_branch = old_final_branch
self.unwind_target = old_unwind
def _make_array_elementwise_binop(self, name, result_type, lhs_type,
rhs_type, make_op):
def body_gen(result, lhs, rhs):
# At this point, shapes are assumed to match; could just pass buffer
# pointer for two of the three arrays as well.
result_buffer = self.append(ir.GetAttr(result, "buffer"))
shape = self.append(ir.GetAttr(result, "shape"))
num_total_elts = self._get_total_array_len(shape)
if builtins.is_array(lhs.type):
lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
def get_left(index):
return self.append(ir.GetElem(lhs_buffer, index))
else:
def get_left(index):
return lhs
if builtins.is_array(rhs.type):
rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
def get_right(index):
return self.append(ir.GetElem(rhs_buffer, index))
else:
def get_right(index):
return rhs
def loop_gen(index):
l = get_left(index)
r = get_right(index)
result = make_op(l, r)
self.append(ir.SetElem(result_buffer, index, result))
return self.append(
ir.Arith(ast.Add(loc=None), index,
ir.Constant(1, self._size_type)))
self._make_loop(
ir.Constant(0, self._size_type), lambda index: self.append(
ir.Compare(ast.Lt(loc=None), index, num_total_elts)),
loop_gen)
return self._make_array_binop(name, result_type, lhs_type, rhs_type,
body_gen)
def _mangle_arrayop_types(self, types):
def name_error(typ):
assert False, "Internal compiler error: No RPC tag for {}".format(typ)
@ -1517,53 +1559,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
return "_".join(mangle_name(t) for t in types)
def _get_array_binop(self, op, result_type, lhs_type, rhs_type):
def _get_array_elementwise_binop(self, name, make_op, result_type, lhs_type, rhs_type):
# Currently, we always have any type coercions resolved explicitly in the AST.
# In the future, this might no longer be true and the three types might all
# differ.
name = "_array_{}_{}".format(
type(op).__name__,
name,
self._mangle_arrayop_types([result_type, lhs_type, rhs_type]))
if name not in self.array_op_funcs:
def body_gen(result, lhs, rhs):
# At this point, shapes are assumed to match; could just pass buffer
# pointer for two of the three arrays as well.
result_buffer = self.append(ir.GetAttr(result, "buffer"))
shape = self.append(ir.GetAttr(result, "shape"))
num_total_elts = self._get_total_array_len(shape)
if builtins.is_array(lhs.type):
lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
def get_left(index):
return self.append(ir.GetElem(lhs_buffer, index))
else:
def get_left(index):
return lhs
if builtins.is_array(rhs.type):
rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
def get_right(index):
return self.append(ir.GetElem(rhs_buffer, index))
else:
def get_right(index):
return rhs
def loop_gen(index):
l = get_left(index)
r = get_right(index)
result = self.append(ir.Arith(op, l, r))
self.append(ir.SetElem(result_buffer, index, result))
return self.append(
ir.Arith(ast.Add(loc=None), index,
ir.Constant(1, self._size_type)))
self._make_loop(
ir.Constant(0, self._size_type), lambda index: self.append(
ir.Compare(ast.Lt(loc=None), index, num_total_elts)), loop_gen)
self.array_op_funcs[name] = self._make_array_binop(
name, result_type, lhs_type, rhs_type, body_gen)
self.array_op_funcs[name] = self._make_array_elementwise_binop(
name, result_type, lhs_type, rhs_type, make_op)
return self.array_op_funcs[name]
def _invoke_arrayop(self, func, params):
@ -1721,6 +1726,34 @@ class ARTIQIRGenerator(algorithm.Visitor):
return self.append(ir.Alloc([result_buffer, shape], node.type))
return self.append(ir.GetElem(result_buffer, ir.Constant(0, self._size_type)))
def _broadcast_binop(self, name, make_op, result_type, lhs, rhs):
# Broadcast scalars (broadcasting higher dimensions is not yet allowed in the
# language).
broadcast = False
array_arg = lhs
if not builtins.is_array(lhs.type):
broadcast = True
array_arg = rhs
elif not builtins.is_array(rhs.type):
broadcast = True
shape = self.append(ir.GetAttr(array_arg, "shape"))
if not broadcast:
rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
self._make_check(
self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
lambda: self.alloc_exn(
builtins.TException("ValueError"),
ir.Constant("operands could not be broadcast together",
builtins.TStr())))
elt = result_type.find()["elt"]
result, _ = self._allocate_new_array(elt, shape)
func = self._get_array_elementwise_binop(name, make_op, result_type, lhs.type,
rhs.type)
self._invoke_arrayop(func, [result, lhs, rhs])
return result
def visit_BinOpT(self, node):
if isinstance(node.op, ast.MatMult):
@ -1728,31 +1761,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
elif builtins.is_array(node.type):
lhs = self.visit(node.left)
rhs = self.visit(node.right)
# Broadcast scalars.
broadcast = False
array_arg = lhs
if not builtins.is_array(lhs.type):
broadcast = True
array_arg = rhs
elif not builtins.is_array(rhs.type):
broadcast = True
shape = self.append(ir.GetAttr(array_arg, "shape"))
if not broadcast:
rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
self._make_check(
self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
lambda: self.alloc_exn(
builtins.TException("ValueError"),
ir.Constant("operands could not be broadcast together",
builtins.TStr())))
result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
func = self._get_array_binop(node.op, node.type, lhs.type, rhs.type)
self._invoke_arrayop(func, [result, lhs, rhs])
return result
name = type(node.op).__name__
def make_op(l, r):
return self.append(ir.Arith(node.op, l, r))
return self._broadcast_binop(name, make_op, node.type, lhs, rhs)
elif builtins.is_numeric(node.type):
lhs = self.visit(node.left)
rhs = self.visit(node.right)
@ -2427,25 +2439,27 @@ class ARTIQIRGenerator(algorithm.Visitor):
if types.is_builtin(node.func.type):
insn = self.visit_builtin_call(node)
elif (types.is_broadcast_across_arrays(node.func.type) and len(args) >= 1
and builtins.is_array(args[0].type)):
and any(builtins.is_array(arg.type) for arg in args)):
# The iodelay machinery set up in the surrounding code was
# deprecated/a relic from the past when array broadcasting support
# was added, so no attempt to keep the delay tracking intact is
# made.
assert len(args) == 1, "Broadcasting for multiple arguments not implemented"
def make_call(val):
return self._user_call(ir.Constant(None, callee.type), [val], {},
def make_call(*args):
return self._user_call(ir.Constant(None, callee.type), args, {},
node.arg_exprs)
shape = self.append(ir.GetAttr(args[0], "shape"))
result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
# TODO: Generate more generically if non-externals are allowed.
name = node.func.type.find().name
func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
self._invoke_arrayop(func, [result, args[0]])
insn = result
if len(args) == 1:
shape = self.append(ir.GetAttr(args[0], "shape"))
result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
self._invoke_arrayop(func, [result, args[0]])
insn = result
elif len(args) == 2:
insn = self._broadcast_binop(name, make_call, node.type, *args)
else:
assert False, "Broadcasting for {} arguments not implemented".format(len)
else:
insn = self._user_call(callee, args, keywords, node.arg_exprs)
if isinstance(node.func, asttyped.AttributeT):

View File

@ -405,6 +405,47 @@ class Inferencer(algorithm.Visitor):
else:
assert False
def _coerce_binary_broadcast_op(self, left, right, map_return_elt, op_loc):
def num_dims(typ):
if builtins.is_array(typ):
# TODO: If number of dimensions is ever made a non-fixed parameter,
# need to acutally unify num_dims in _coerce_binop/….
return typ.find()["num_dims"].value
return 0
left_dims = num_dims(left.type)
right_dims = num_dims(right.type)
if left_dims != right_dims and left_dims != 0 and right_dims != 0:
# Mismatch (only scalar broadcast supported for now).
note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
{"num_dims": left_dims}, left.loc)
note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
{"num_dims": right_dims}, right.loc)
diag = diagnostic.Diagnostic(
"error", "dimensions of '{op}' array operands must match",
{"op": op_loc.source()}, op_loc, [left.loc, right.loc], [note1, note2])
self.engine.process(diag)
return
def map_node_type(typ):
if not builtins.is_array(typ):
# This is a single value broadcast across the array.
return typ
return typ.find()["elt"]
# Figure out result type, handling broadcasts.
result_dims = left_dims if left_dims else right_dims
def map_return(typ):
elt = map_return_elt(typ)
result = builtins.TArray(elt=elt, num_dims=result_dims)
left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
return (result, left, right)
return self._coerce_numeric((left, right),
map_return=map_return,
map_node_type=map_node_type)
def _coerce_binop(self, op, left, right):
if isinstance(op, ast.MatMult):
if types.is_var(left.type) or types.is_var(right.type):
@ -477,45 +518,11 @@ class Inferencer(algorithm.Visitor):
self.engine.process(diag)
return
def num_dims(typ):
if builtins.is_array(typ):
# TODO: If number of dimensions is ever made a non-fixed parameter,
# need to acutally unify num_dims in _coerce_binop.
return typ.find()["num_dims"].value
return 0
left_dims = num_dims(left.type)
right_dims = num_dims(right.type)
if left_dims != right_dims and left_dims != 0 and right_dims != 0:
# Mismatch (only scalar broadcast supported for now).
note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
{"num_dims": left_dims}, left.loc)
note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
{"num_dims": right_dims}, right.loc)
diag = diagnostic.Diagnostic(
"error", "dimensions of '{op}' array operands must match",
{"op": op.loc.source()}, op.loc, [left.loc, right.loc], [note1, note2])
self.engine.process(diag)
return
def map_node_type(typ):
if not builtins.is_array(typ):
# This is a single value broadcast across the array.
return typ
return typ.find()["elt"]
# Figure out result type, handling broadcasts.
result_dims = left_dims if left_dims else right_dims
def map_return(typ):
elt = builtins.TFloat() if isinstance(op, ast.Div) else typ
result = builtins.TArray(elt=elt, num_dims=result_dims)
left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
return (result, left, right)
return self._coerce_numeric((left, right),
map_return=map_return,
map_node_type=map_node_type)
def map_result(typ):
if isinstance(op, ast.Div):
return builtins.TFloat()
return typ
return self._coerce_binary_broadcast_op(left, right, map_result, op.loc)
elif isinstance(op, (ast.BitAnd, ast.BitOr, ast.BitXor,
ast.LShift, ast.RShift)):
# bitwise operators require integers
@ -1290,15 +1297,26 @@ class Inferencer(algorithm.Visitor):
self.engine.process(diag)
return
# Array broadcasting for unary functions explicitly marked as such.
if len(node.args) == typ_arity == 1 and types.is_broadcast_across_arrays(typ):
arg_type = node.args[0].type.find()
if builtins.is_array(arg_type):
typ_arg, = typ_args.values()
self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
node.loc, None)
return
# Array broadcasting for functions explicitly marked as such.
if len(node.args) == typ_arity and types.is_broadcast_across_arrays(typ):
if typ_arity == 1:
arg_type = node.args[0].type.find()
if builtins.is_array(arg_type):
typ_arg, = typ_args.values()
self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
node.loc, None)
return
elif typ_arity == 2:
if any(builtins.is_array(arg.type) for arg in node.args):
ret, arg0, arg1 = self._coerce_binary_broadcast_op(
node.args[0], node.args[1], lambda t: typ_ret, node.loc)
node.args[0] = self._coerce_one(arg0, node.args[0],
other_node=node.args[1])
node.args[1] = self._coerce_one(arg1, node.args[1],
other_node=node.args[0])
self._unify(node.type, ret, node.loc, None)
return
for actualarg, (formalname, formaltyp) in \
zip(node.args, list(typ_args.items()) + list(typ_optargs.items())):

View File

@ -76,6 +76,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
api!(atanh),
api!(cbrt),
api!(ceil),
api!(copysign),
api!(cos),
api!(cosh),
api!(erf),
@ -86,6 +87,8 @@ static mut API: &'static [(&'static str, *const ())] = &[
api!(expm1),
api!(fabs),
api!(floor),
// api!(fmax),
// api!(fmin),
//api!(fma),
api!(fmod),
api!(hypot),
@ -96,6 +99,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
api!(log),
//api!(log2),
api!(log10),
api!(nextafter),
api!(pow),
api!(round),
api!(sin),

View File

@ -27,7 +27,7 @@ class CompareHostDeviceTest(ExperimentCase):
def _test_binop(self, op, a, b):
exp = self.create(_RunOnDevice)
exp.run = kernel_from_string(["a", "b", "callback", "numpy"],
"callback(a " + op + "b)",
"callback(" + op + ")",
decorator=portable)
checked = False
@ -74,16 +74,17 @@ class CompareHostDeviceTest(ExperimentCase):
for typ in [numpy.int32, numpy.int64, numpy.float]]
for op in ELEM_WISE_BINOPS:
for arg in args:
self._test_binop(op, *arg)
self._test_binop("a" + op + "b", *arg)
def test_scalar_matrix_binops(self):
for typ in [numpy.int32, numpy.int64, numpy.float]:
scalar = typ(3)
matrix = numpy.array([[4, 5, 6], [7, 8, 9]], dtype=typ)
for op in ELEM_WISE_BINOPS:
self._test_binop(op, scalar, matrix)
self._test_binop(op, matrix, scalar)
self._test_binop(op, matrix, matrix)
code = "a" + op + "b"
self._test_binop(code, scalar, matrix)
self._test_binop(code, matrix, scalar)
self._test_binop(code, matrix, matrix)
def test_unary_math_fns(self):
names = [
@ -99,3 +100,17 @@ class CompareHostDeviceTest(ExperimentCase):
# Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
self._test_unaryop(op, 0.51)
self._test_unaryop(op, numpy.array([[0.3, 0.4], [0.51, 0.6]]))
def test_binary_math_fns(self):
names = [name for name, _ in math_fns.binary_fp_runtime_calls]
exp = self.create(_RunOnDevice)
if exp.core.target_cls != CortexA9Target:
names.remove("fmax")
names.remove("fmin")
for name in names:
code = "numpy.{}(a, b)".format(name)
# Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
self._test_binop(code, 1.0, 2.0)
self._test_binop(code, 1.0, numpy.array([2.0, 3.0]))
self._test_binop(code, numpy.array([1.0, 2.0]), 3.0)
self._test_binop(code, numpy.array([1.0, 2.0]), numpy.array([3.0, 4.0]))

View File

@ -6,10 +6,21 @@ import numpy as np
@kernel
def entrypoint():
# Just make sure everything compiles.
# LLVM intrinsic:
a = np.array([1.0, 2.0, 3.0])
b = np.sin(a)
assert b.shape == a.shape
# libm:
c = np.array([1.0, 2.0, 3.0])
d = np.arctan(c)
assert d.shape == c.shape
# libm, binary:
e = np.array([1.0, 2.0, 3.0])
f = np.array([4.0, 5.0, 6.0])
g = np.arctan2(e, f)
# g = np.arctan2(e, 0.0)
# g = np.arctan2(0.0, f)
assert g.shape == e.shape

View File

@ -28,3 +28,4 @@ def entrypoint():
assert numpy.arcsin(0.0) == 0.0
assert numpy.arccos(1.0) == 0.0
assert numpy.arctan(0.0) == 0.0
assert numpy.arctan2(0.0, 1.0) == 0.0