diff --git a/artiq/compiler/math_fns.py b/artiq/compiler/math_fns.py
index 8b1b38914..c54eac552 100644
--- a/artiq/compiler/math_fns.py
+++ b/artiq/compiler/math_fns.py
@@ -53,23 +53,36 @@ unary_fp_runtime_calls = [
     ("cbrt", "cbrt"),
 ]
 
+#: (float, float) -> float numpy.* math functions lowered to runtime calls.
+binary_fp_runtime_calls = [
+    ("arctan2", "atan2"),
+    ("copysign", "copysign"),
+    ("fmax", "fmax"),
+    ("fmin", "fmin"),
+    # ("ldexp", "ldexp"),  # One argument is an int; would need a bit more plumbing.
+    ("hypot", "hypot"),
+    ("nextafter", "nextafter"),
+]
+
 #: Array handling builtins (special treatment due to allocations).
 numpy_builtins = ["transpose"]
 
 
-def unary_fp_type(name):
-    return types.TExternalFunction(OrderedDict([("arg", builtins.TFloat())]),
+def fp_runtime_type(name, arity):
+    args = [("arg{}".format(i), builtins.TFloat()) for i in range(arity)]
+    return types.TExternalFunction(OrderedDict(args),
                                    builtins.TFloat(),
                                    name,
                                    # errno isn't observable from ARTIQ Python.
                                    flags={"nounwind", "nowrite"},
                                    broadcast_across_arrays=True)
 
-
 numpy_map = {
-    getattr(numpy, symbol): unary_fp_type(mangle)
+    getattr(numpy, symbol): fp_runtime_type(mangle, arity=1)
     for symbol, mangle in (unary_fp_intrinsics + unary_fp_runtime_calls)
 }
+for symbol, mangle in binary_fp_runtime_calls:
+    numpy_map[getattr(numpy, symbol)] = fp_runtime_type(mangle, arity=2)
 for name in numpy_builtins:
     numpy_map[getattr(numpy, name)] = types.TBuiltinFunction("numpy." + name)
 
diff --git a/artiq/compiler/transforms/artiq_ir_generator.py b/artiq/compiler/transforms/artiq_ir_generator.py
index 9673eb89f..14a45a0e4 100644
--- a/artiq/compiler/transforms/artiq_ir_generator.py
+++ b/artiq/compiler/transforms/artiq_ir_generator.py
@@ -1502,6 +1502,48 @@ class ARTIQIRGenerator(algorithm.Visitor):
             self.final_branch = old_final_branch
             self.unwind_target = old_unwind
 
+    def _make_array_elementwise_binop(self, name, result_type, lhs_type,
+                                      rhs_type, make_op):
+        def body_gen(result, lhs, rhs):
+            # At this point, shapes are assumed to match; could just pass buffer
+            # pointer for two of the three arrays as well.
+            result_buffer = self.append(ir.GetAttr(result, "buffer"))
+            shape = self.append(ir.GetAttr(result, "shape"))
+            num_total_elts = self._get_total_array_len(shape)
+
+            if builtins.is_array(lhs.type):
+                lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
+                def get_left(index):
+                    return self.append(ir.GetElem(lhs_buffer, index))
+            else:
+                def get_left(index):
+                    return lhs
+
+            if builtins.is_array(rhs.type):
+                rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
+                def get_right(index):
+                    return self.append(ir.GetElem(rhs_buffer, index))
+            else:
+                def get_right(index):
+                    return rhs
+
+            def loop_gen(index):
+                l = get_left(index)
+                r = get_right(index)
+                result = make_op(l, r)
+                self.append(ir.SetElem(result_buffer, index, result))
+                return self.append(
+                    ir.Arith(ast.Add(loc=None), index,
+                             ir.Constant(1, self._size_type)))
+
+            self._make_loop(
+                ir.Constant(0, self._size_type), lambda index: self.append(
+                    ir.Compare(ast.Lt(loc=None), index, num_total_elts)),
+                loop_gen)
+
+        return self._make_array_binop(name, result_type, lhs_type, rhs_type,
+                                      body_gen)
+
     def _mangle_arrayop_types(self, types):
         def name_error(typ):
             assert False, "Internal compiler error: No RPC tag for {}".format(typ)
@@ -1517,53 +1559,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
 
         return "_".join(mangle_name(t) for t in types)
 
-    def _get_array_binop(self, op, result_type, lhs_type, rhs_type):
+    def _get_array_elementwise_binop(self, name, make_op, result_type, lhs_type, rhs_type):
         # Currently, we always have any type coercions resolved explicitly in the AST.
         # In the future, this might no longer be true and the three types might all
         # differ.
         name = "_array_{}_{}".format(
-            type(op).__name__,
+            name,
             self._mangle_arrayop_types([result_type, lhs_type, rhs_type]))
         if name not in self.array_op_funcs:
-
-            def body_gen(result, lhs, rhs):
-                # At this point, shapes are assumed to match; could just pass buffer
-                # pointer for two of the three arrays as well.
-                result_buffer = self.append(ir.GetAttr(result, "buffer"))
-                shape = self.append(ir.GetAttr(result, "shape"))
-                num_total_elts = self._get_total_array_len(shape)
-
-                if builtins.is_array(lhs.type):
-                    lhs_buffer = self.append(ir.GetAttr(lhs, "buffer"))
-                    def get_left(index):
-                        return self.append(ir.GetElem(lhs_buffer, index))
-                else:
-                    def get_left(index):
-                        return lhs
-
-                if builtins.is_array(rhs.type):
-                    rhs_buffer = self.append(ir.GetAttr(rhs, "buffer"))
-                    def get_right(index):
-                        return self.append(ir.GetElem(rhs_buffer, index))
-                else:
-                    def get_right(index):
-                        return rhs
-
-                def loop_gen(index):
-                    l = get_left(index)
-                    r = get_right(index)
-                    result = self.append(ir.Arith(op, l, r))
-                    self.append(ir.SetElem(result_buffer, index, result))
-                    return self.append(
-                        ir.Arith(ast.Add(loc=None), index,
-                                 ir.Constant(1, self._size_type)))
-
-                self._make_loop(
-                    ir.Constant(0, self._size_type), lambda index: self.append(
-                        ir.Compare(ast.Lt(loc=None), index, num_total_elts)), loop_gen)
-
-            self.array_op_funcs[name] = self._make_array_binop(
-                name, result_type, lhs_type, rhs_type, body_gen)
+            self.array_op_funcs[name] = self._make_array_elementwise_binop(
+                name, result_type, lhs_type, rhs_type, make_op)
         return self.array_op_funcs[name]
 
     def _invoke_arrayop(self, func, params):
@@ -1721,6 +1726,34 @@ class ARTIQIRGenerator(algorithm.Visitor):
             return self.append(ir.Alloc([result_buffer, shape], node.type))
         return self.append(ir.GetElem(result_buffer, ir.Constant(0, self._size_type)))
 
+    def _broadcast_binop(self, name, make_op, result_type, lhs, rhs):
+        # Broadcast scalars (broadcasting higher dimensions is not yet allowed in the
+        # language).
+        broadcast = False
+        array_arg = lhs
+        if not builtins.is_array(lhs.type):
+            broadcast = True
+            array_arg = rhs
+        elif not builtins.is_array(rhs.type):
+            broadcast = True
+
+        shape = self.append(ir.GetAttr(array_arg, "shape"))
+
+        if not broadcast:
+            rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
+            self._make_check(
+                self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
+                lambda: self.alloc_exn(
+                    builtins.TException("ValueError"),
+                    ir.Constant("operands could not be broadcast together",
+                                builtins.TStr())))
+
+        elt = result_type.find()["elt"]
+        result, _ = self._allocate_new_array(elt, shape)
+        func = self._get_array_elementwise_binop(name, make_op, result_type, lhs.type,
+            rhs.type)
+        self._invoke_arrayop(func, [result, lhs, rhs])
+        return result
 
     def visit_BinOpT(self, node):
         if isinstance(node.op, ast.MatMult):
@@ -1728,31 +1761,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
         elif builtins.is_array(node.type):
             lhs = self.visit(node.left)
             rhs = self.visit(node.right)
-
-            # Broadcast scalars.
-            broadcast = False
-            array_arg = lhs
-            if not builtins.is_array(lhs.type):
-                broadcast = True
-                array_arg = rhs
-            elif not builtins.is_array(rhs.type):
-                broadcast = True
-
-            shape = self.append(ir.GetAttr(array_arg, "shape"))
-
-            if not broadcast:
-                rhs_shape = self.append(ir.GetAttr(rhs, "shape"))
-                self._make_check(
-                    self.append(ir.Compare(ast.Eq(loc=None), shape, rhs_shape)),
-                    lambda: self.alloc_exn(
-                        builtins.TException("ValueError"),
-                        ir.Constant("operands could not be broadcast together",
-                                    builtins.TStr())))
-
-            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
-            func = self._get_array_binop(node.op, node.type, lhs.type, rhs.type)
-            self._invoke_arrayop(func, [result, lhs, rhs])
-            return result
+            name = type(node.op).__name__
+            def make_op(l, r):
+                return self.append(ir.Arith(node.op, l, r))
+            return self._broadcast_binop(name, make_op, node.type, lhs, rhs)
         elif builtins.is_numeric(node.type):
             lhs = self.visit(node.left)
             rhs = self.visit(node.right)
@@ -2427,25 +2439,27 @@ class ARTIQIRGenerator(algorithm.Visitor):
         if types.is_builtin(node.func.type):
             insn = self.visit_builtin_call(node)
         elif (types.is_broadcast_across_arrays(node.func.type) and len(args) >= 1
-              and builtins.is_array(args[0].type)):
+              and any(builtins.is_array(arg.type) for arg in args)):
             # The iodelay machinery set up in the surrounding code was
             # deprecated/a relic from the past when array broadcasting support
             # was added, so no attempt to keep the delay tracking intact is
             # made.
-            assert len(args) == 1, "Broadcasting for multiple arguments not implemented"
-
-            def make_call(val):
-                return self._user_call(ir.Constant(None, callee.type), [val], {},
+            def make_call(*args):
+                return self._user_call(ir.Constant(None, callee.type), args, {},
                                        node.arg_exprs)
-
-            shape = self.append(ir.GetAttr(args[0], "shape"))
-            result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
-
             # TODO: Generate more generically if non-externals are allowed.
             name = node.func.type.find().name
-            func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
-            self._invoke_arrayop(func, [result, args[0]])
-            insn = result
+
+            if len(args) == 1:
+                shape = self.append(ir.GetAttr(args[0], "shape"))
+                result, _ = self._allocate_new_array(node.type.find()["elt"], shape)
+                func = self._get_array_unaryop(name, make_call, node.type, args[0].type)
+                self._invoke_arrayop(func, [result, args[0]])
+                insn = result
+            elif len(args) == 2:
+                insn = self._broadcast_binop(name, make_call, node.type, *args)
+            else:
+                assert False, "Broadcasting for {} arguments not implemented".format(len)
         else:
             insn = self._user_call(callee, args, keywords, node.arg_exprs)
             if isinstance(node.func, asttyped.AttributeT):
diff --git a/artiq/compiler/transforms/inferencer.py b/artiq/compiler/transforms/inferencer.py
index ce0a269e1..0de455c8d 100644
--- a/artiq/compiler/transforms/inferencer.py
+++ b/artiq/compiler/transforms/inferencer.py
@@ -405,6 +405,47 @@ class Inferencer(algorithm.Visitor):
         else:
             assert False
 
+    def _coerce_binary_broadcast_op(self, left, right, map_return_elt, op_loc):
+        def num_dims(typ):
+            if builtins.is_array(typ):
+                # TODO: If number of dimensions is ever made a non-fixed parameter,
+                # need to acutally unify num_dims in _coerce_binop/….
+                return typ.find()["num_dims"].value
+            return 0
+
+        left_dims = num_dims(left.type)
+        right_dims = num_dims(right.type)
+        if left_dims != right_dims and left_dims != 0 and right_dims != 0:
+            # Mismatch (only scalar broadcast supported for now).
+            note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
+                                            {"num_dims": left_dims}, left.loc)
+            note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
+                                            {"num_dims": right_dims}, right.loc)
+            diag = diagnostic.Diagnostic(
+                "error", "dimensions of '{op}' array operands must match",
+                {"op": op_loc.source()}, op_loc, [left.loc, right.loc], [note1, note2])
+            self.engine.process(diag)
+            return
+
+        def map_node_type(typ):
+            if not builtins.is_array(typ):
+                # This is a single value broadcast across the array.
+                return typ
+            return typ.find()["elt"]
+
+        # Figure out result type, handling broadcasts.
+        result_dims = left_dims if left_dims else right_dims
+        def map_return(typ):
+            elt = map_return_elt(typ)
+            result = builtins.TArray(elt=elt, num_dims=result_dims)
+            left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
+            right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
+            return (result, left, right)
+
+        return self._coerce_numeric((left, right),
+                                    map_return=map_return,
+                                    map_node_type=map_node_type)
+
     def _coerce_binop(self, op, left, right):
         if isinstance(op, ast.MatMult):
             if types.is_var(left.type) or types.is_var(right.type):
@@ -477,45 +518,11 @@ class Inferencer(algorithm.Visitor):
                 self.engine.process(diag)
                 return
 
-            def num_dims(typ):
-                if builtins.is_array(typ):
-                    # TODO: If number of dimensions is ever made a non-fixed parameter,
-                    # need to acutally unify num_dims in _coerce_binop.
-                    return typ.find()["num_dims"].value
-                return 0
-
-            left_dims = num_dims(left.type)
-            right_dims = num_dims(right.type)
-            if left_dims != right_dims and left_dims != 0 and right_dims != 0:
-                # Mismatch (only scalar broadcast supported for now).
-                note1 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
-                                              {"num_dims": left_dims}, left.loc)
-                note2 = diagnostic.Diagnostic("note", "operand of dimension {num_dims}",
-                                              {"num_dims": right_dims}, right.loc)
-                diag = diagnostic.Diagnostic(
-                    "error", "dimensions of '{op}' array operands must match",
-                    {"op": op.loc.source()}, op.loc, [left.loc, right.loc], [note1, note2])
-                self.engine.process(diag)
-                return
-
-            def map_node_type(typ):
-                if not builtins.is_array(typ):
-                    # This is a single value broadcast across the array.
-                    return typ
-                return typ.find()["elt"]
-
-            # Figure out result type, handling broadcasts.
-            result_dims = left_dims if left_dims else right_dims
-            def map_return(typ):
-                elt = builtins.TFloat() if isinstance(op, ast.Div) else typ
-                result = builtins.TArray(elt=elt, num_dims=result_dims)
-                left = builtins.TArray(elt=elt, num_dims=left_dims) if left_dims else elt
-                right = builtins.TArray(elt=elt, num_dims=right_dims) if right_dims else elt
-                return (result, left, right)
-
-            return self._coerce_numeric((left, right),
-                                        map_return=map_return,
-                                        map_node_type=map_node_type)
+            def map_result(typ):
+                if isinstance(op, ast.Div):
+                    return builtins.TFloat()
+                return typ
+            return self._coerce_binary_broadcast_op(left, right, map_result, op.loc)
         elif isinstance(op, (ast.BitAnd, ast.BitOr, ast.BitXor,
                            ast.LShift, ast.RShift)):
             # bitwise operators require integers
@@ -1290,15 +1297,26 @@ class Inferencer(algorithm.Visitor):
             self.engine.process(diag)
             return
 
-        # Array broadcasting for unary functions explicitly marked as such.
-        if len(node.args) == typ_arity == 1 and types.is_broadcast_across_arrays(typ):
-            arg_type = node.args[0].type.find()
-            if builtins.is_array(arg_type):
-                typ_arg, = typ_args.values()
-                self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
-                self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
-                            node.loc, None)
-                return
+        # Array broadcasting for functions explicitly marked as such.
+        if len(node.args) == typ_arity and types.is_broadcast_across_arrays(typ):
+            if typ_arity == 1:
+                arg_type = node.args[0].type.find()
+                if builtins.is_array(arg_type):
+                    typ_arg, = typ_args.values()
+                    self._unify(typ_arg, arg_type["elt"], node.args[0].loc, None)
+                    self._unify(node.type, builtins.TArray(typ_ret, arg_type["num_dims"]),
+                                node.loc, None)
+                    return
+            elif typ_arity == 2:
+                if any(builtins.is_array(arg.type) for arg in node.args):
+                    ret, arg0, arg1 = self._coerce_binary_broadcast_op(
+                        node.args[0], node.args[1], lambda t: typ_ret, node.loc)
+                    node.args[0] = self._coerce_one(arg0, node.args[0],
+                                                    other_node=node.args[1])
+                    node.args[1] = self._coerce_one(arg1, node.args[1],
+                                                    other_node=node.args[0])
+                    self._unify(node.type, ret, node.loc, None)
+                    return
 
         for actualarg, (formalname, formaltyp) in \
                 zip(node.args, list(typ_args.items()) + list(typ_optargs.items())):
diff --git a/artiq/firmware/ksupport/api.rs b/artiq/firmware/ksupport/api.rs
index 873dc6f71..84e69dad5 100644
--- a/artiq/firmware/ksupport/api.rs
+++ b/artiq/firmware/ksupport/api.rs
@@ -76,6 +76,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
     api!(atanh),
     api!(cbrt),
     api!(ceil),
+    api!(copysign),
     api!(cos),
     api!(cosh),
     api!(erf),
@@ -86,6 +87,8 @@ static mut API: &'static [(&'static str, *const ())] = &[
     api!(expm1),
     api!(fabs),
     api!(floor),
+    // api!(fmax),
+    // api!(fmin),
     //api!(fma),
     api!(fmod),
     api!(hypot),
@@ -96,6 +99,7 @@ static mut API: &'static [(&'static str, *const ())] = &[
     api!(log),
     //api!(log2),
     api!(log10),
+    api!(nextafter),
     api!(pow),
     api!(round),
     api!(sin),
diff --git a/artiq/test/coredevice/test_numpy.py b/artiq/test/coredevice/test_numpy.py
index caaa2ce27..a3b7b5a00 100644
--- a/artiq/test/coredevice/test_numpy.py
+++ b/artiq/test/coredevice/test_numpy.py
@@ -27,7 +27,7 @@ class CompareHostDeviceTest(ExperimentCase):
     def _test_binop(self, op, a, b):
         exp = self.create(_RunOnDevice)
         exp.run = kernel_from_string(["a", "b", "callback", "numpy"],
-                                     "callback(a " + op + "b)",
+                                     "callback(" + op + ")",
                                      decorator=portable)
         checked = False
 
@@ -74,16 +74,17 @@ class CompareHostDeviceTest(ExperimentCase):
                 for typ in [numpy.int32, numpy.int64, numpy.float]]
         for op in ELEM_WISE_BINOPS:
             for arg in args:
-                self._test_binop(op, *arg)
+                self._test_binop("a" + op + "b", *arg)
 
     def test_scalar_matrix_binops(self):
         for typ in [numpy.int32, numpy.int64, numpy.float]:
             scalar = typ(3)
             matrix = numpy.array([[4, 5, 6], [7, 8, 9]], dtype=typ)
             for op in ELEM_WISE_BINOPS:
-                self._test_binop(op, scalar, matrix)
-                self._test_binop(op, matrix, scalar)
-                self._test_binop(op, matrix, matrix)
+                code = "a" + op + "b"
+                self._test_binop(code, scalar, matrix)
+                self._test_binop(code, matrix, scalar)
+                self._test_binop(code, matrix, matrix)
 
     def test_unary_math_fns(self):
         names = [
@@ -99,3 +100,17 @@ class CompareHostDeviceTest(ExperimentCase):
             # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
             self._test_unaryop(op, 0.51)
             self._test_unaryop(op, numpy.array([[0.3, 0.4], [0.51, 0.6]]))
+
+    def test_binary_math_fns(self):
+        names = [name for name, _ in math_fns.binary_fp_runtime_calls]
+        exp = self.create(_RunOnDevice)
+        if exp.core.target_cls != CortexA9Target:
+            names.remove("fmax")
+            names.remove("fmin")
+        for name in names:
+            code = "numpy.{}(a, b)".format(name)
+            # Avoid 0.5, as numpy.rint's rounding mode currently doesn't match.
+            self._test_binop(code, 1.0, 2.0)
+            self._test_binop(code, 1.0, numpy.array([2.0, 3.0]))
+            self._test_binop(code, numpy.array([1.0, 2.0]), 3.0)
+            self._test_binop(code, numpy.array([1.0, 2.0]), numpy.array([3.0, 4.0]))
diff --git a/artiq/test/lit/embedding/array_math_fns.py b/artiq/test/lit/embedding/array_math_fns.py
index 2b298f2ef..d23540b48 100644
--- a/artiq/test/lit/embedding/array_math_fns.py
+++ b/artiq/test/lit/embedding/array_math_fns.py
@@ -6,10 +6,21 @@ import numpy as np
 @kernel
 def entrypoint():
     # Just make sure everything compiles.
+
+    # LLVM intrinsic:
     a = np.array([1.0, 2.0, 3.0])
     b = np.sin(a)
     assert b.shape == a.shape
 
+    # libm:
     c = np.array([1.0, 2.0, 3.0])
     d = np.arctan(c)
     assert d.shape == c.shape
+
+    # libm, binary:
+    e = np.array([1.0, 2.0, 3.0])
+    f = np.array([4.0, 5.0, 6.0])
+    g = np.arctan2(e, f)
+    # g = np.arctan2(e, 0.0)
+    # g = np.arctan2(0.0, f)
+    assert g.shape == e.shape
diff --git a/artiq/test/lit/embedding/math_fns.py b/artiq/test/lit/embedding/math_fns.py
index 40ac18e42..6f9416c8d 100644
--- a/artiq/test/lit/embedding/math_fns.py
+++ b/artiq/test/lit/embedding/math_fns.py
@@ -28,3 +28,4 @@ def entrypoint():
     assert numpy.arcsin(0.0) == 0.0
     assert numpy.arccos(1.0) == 0.0
     assert numpy.arctan(0.0) == 0.0
+    assert numpy.arctan2(0.0, 1.0) == 0.0