From e75ad3d1aa56a63d6978a5c8763e31429fdea43d Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 27 Mar 2016 01:02:15 +0000 Subject: [PATCH] compiler: extract runtime checks into separate cold functions. This reduces register pressure as well as function size, which favorably affects the inliner. --- artiq/compiler/ir.py | 9 ++ artiq/compiler/targets.py | 3 +- .../compiler/transforms/artiq_ir_generator.py | 92 +++++++++++++------ .../compiler/transforms/llvm_ir_generator.py | 45 ++++++--- 4 files changed, 110 insertions(+), 39 deletions(-) diff --git a/artiq/compiler/ir.py b/artiq/compiler/ir.py index 18265eef9..91cc88988 100644 --- a/artiq/compiler/ir.py +++ b/artiq/compiler/ir.py @@ -423,6 +423,8 @@ class Function: :ivar is_internal: (bool) if True, the function should not be accessible from outside the module it is contained in + :ivar is_cold: + (bool) if True, the function should be considered rarely called """ def __init__(self, typ, name, arguments, loc=None): @@ -431,6 +433,7 @@ class Function: self.next_name = 1 self.set_arguments(arguments) self.is_internal = False + self.is_cold = False def _remove_name(self, name): self.names.remove(name) @@ -922,6 +925,8 @@ class Call(Instruction): iodelay expressions for values of arguments :ivar static_target_function: (:class:`Function` or None) statically resolved callee + :ivar is_cold: (bool) + the callee function is cold """ """ @@ -938,6 +943,7 @@ class Call(Instruction): super().__init__([func] + args, func.type.ret, name) self.arg_exprs = arg_exprs self.static_target_function = None + self.is_cold = False def copy(self, mapper): self_copy = super().copy(mapper) @@ -1186,6 +1192,8 @@ class Invoke(Terminator): iodelay expressions for values of arguments :ivar static_target_function: (:class:`Function` or None) statically resolved callee + :ivar is_cold: (bool) + the callee function is cold """ """ @@ -1206,6 +1214,7 @@ class Invoke(Terminator): super().__init__([func] + args + [normal, exn], func.type.ret, name) self.arg_exprs = arg_exprs self.static_target_function = None + self.is_cold = False def copy(self, mapper): self_copy = super().copy(mapper) diff --git a/artiq/compiler/targets.py b/artiq/compiler/targets.py index a2e4ba144..8a3211be1 100644 --- a/artiq/compiler/targets.py +++ b/artiq/compiler/targets.py @@ -101,8 +101,9 @@ class Target: # Now, actually optimize the code. llpassmgr.add_function_inlining_pass(70) - llpassmgr.add_cfg_simplification_pass() llpassmgr.add_instruction_combining_pass() + llpassmgr.add_cfg_simplification_pass() + llpassmgr.add_dead_arg_elimination_pass() llpassmgr.add_gvn_pass() llpassmgr.add_global_dce_pass() diff --git a/artiq/compiler/transforms/artiq_ir_generator.py b/artiq/compiler/transforms/artiq_ir_generator.py index a0bac70b3..1875c4989 100644 --- a/artiq/compiler/transforms/artiq_ir_generator.py +++ b/artiq/compiler/transforms/artiq_ir_generator.py @@ -302,7 +302,8 @@ class ARTIQIRGenerator(algorithm.Visitor): for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)): default = codegen_default() value = self.append(ir.Builtin("unwrap_or", [optargs[index], default], - typ.optargs[arg_name])) + typ.optargs[arg_name], + name="DEF.{}".format(arg_name))) self.append(ir.SetLocal(env, arg_name, value)) result = self.visit(node.body) @@ -574,9 +575,7 @@ class ARTIQIRGenerator(algorithm.Visitor): self.current_block = raise_proxy if exn is not None: - if loc is None: - loc = self.current_loc - + assert loc is not None loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr()) loc_line = ir.Constant(loc.line(), builtins.TInt32()) loc_column = ir.Constant(loc.column(), builtins.TInt32()) @@ -598,7 +597,7 @@ class ARTIQIRGenerator(algorithm.Visitor): self.append(ir.Reraise()) def visit_Raise(self, node): - self.raise_exn(self.visit(node.exc)) + self.raise_exn(self.visit(node.exc), loc=self.current_loc) def visit_Try(self, node): dispatcher = self.add_block("try.dispatch") @@ -927,6 +926,55 @@ class ARTIQIRGenerator(algorithm.Visitor): else: return self.append(ir.SetAttr(obj, node.attr, self.current_assign)) + def _make_check(self, cond, exn_gen, loc=None, params=[]): + if loc is None: + loc = self.current_loc + + try: + name = "check:{}:{}".format(loc.line(), loc.column()) + args = [ir.EnvironmentArgument(self.current_env.type, "ARG.ENV")] + \ + [ir.Argument(param.type, "ARG.{}".format(index)) + for index, param in enumerate(params)] + typ = types.TFunction(OrderedDict([("arg{}".format(index), param.type) + for index, param in enumerate(params)]), + OrderedDict(), + builtins.TNone()) + func = ir.Function(typ, ".".join(self.name + [name]), args, loc=loc) + func.is_internal = True + func.is_cold = True + self.functions.append(func) + old_func, self.current_function = self.current_function, func + + entry = self.add_block("entry") + old_block, self.current_block = self.current_block, entry + + old_final_branch, self.final_branch = self.final_branch, None + old_unwind, self.unwind_target = self.unwind_target, None + self.raise_exn(exn_gen(*args[1:]), loc=loc) + finally: + self.current_function = old_func + self.current_block = old_block + self.final_branch = old_final_branch + self.unwind_target = old_unwind + + # cond: bool Value, condition + # exn_gen: lambda()->exn Value, exception if condition not true + cond_block = self.current_block + + self.current_block = body_block = self.add_block("check.body") + closure = self.append(ir.Closure(func, ir.Constant(None, ir.TEnvironment("check", {})))) + if self.unwind_target is None: + insn = self.append(ir.Call(closure, params, {})) + else: + after_invoke = self.add_block("check.invoke") + insn = self.append(ir.Invoke(closure, params, {}, after_invoke, self.unwind_target)) + self.current_block = after_invoke + insn.is_cold = True + self.append(ir.Unreachable()) + + self.current_block = tail_block = self.add_block("check.tail") + cond_block.append(ir.BranchIf(cond, tail_block, body_block)) + def _map_index(self, length, index, one_past_the_end=False, loc=None): lt_0 = self.append(ir.Compare(ast.Lt(loc=None), index, ir.Constant(0, index.type))) @@ -940,28 +988,16 @@ class ARTIQIRGenerator(algorithm.Visitor): ir.Constant(False, builtins.TBool()))) head = self.current_block - self.current_block = out_of_bounds_block = self.add_block("index.outofbounds") - exn = self.alloc_exn(builtins.TException("IndexError"), - ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()), - index, length) - self.raise_exn(exn, loc=loc) - - self.current_block = in_bounds_block = self.add_block("index.inbounds") - head.append(ir.BranchIf(in_bounds, in_bounds_block, out_of_bounds_block)) + self._make_check( + in_bounds, + lambda index, length: self.alloc_exn(builtins.TException("IndexError"), + ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()), + index, length), + params=[index, length], + loc=loc) return mapped_index - def _make_check(self, cond, exn_gen, loc=None, name="check"): - # cond: bool Value, condition - # exn_gen: lambda()->exn Value, exception if condition not true - cond_block = self.current_block - - self.current_block = body_block = self.add_block("{}.body".format(name)) - self.raise_exn(exn_gen(), loc=loc) - - self.current_block = tail_block = self.add_block("{}.tail".format(name)) - cond_block.append(ir.BranchIf(cond, tail_block, body_block)) - def _make_loop(self, init, cond_gen, body_gen, name="loop"): # init: 'iter Value, initial loop variable value # cond_gen: lambda('iter Value)->bool Value, loop condition @@ -1064,10 +1100,11 @@ class ARTIQIRGenerator(algorithm.Visitor): name="slice.size")) self._make_check( self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)), - lambda: self.alloc_exn(builtins.TException("ValueError"), + lambda slice_size, length: self.alloc_exn(builtins.TException("ValueError"), ir.Constant("slice size {0} is larger than iterable length {1}", builtins.TStr()), slice_size, length), + params=[slice_size, length], loc=node.slice.loc) if self.current_assign is None: @@ -1147,9 +1184,10 @@ class ARTIQIRGenerator(algorithm.Visitor): self._make_check( self.append(ir.Compare(ast.Eq(loc=None), length, ir.Constant(len(node.elts), self._size_type))), - lambda: self.alloc_exn(builtins.TException("ValueError"), + lambda length: self.alloc_exn(builtins.TException("ValueError"), ir.Constant("list must be {0} elements long to decompose", builtins.TStr()), - length)) + length), + params=[length]) for index, elt_node in enumerate(node.elts): elt = self.append(ir.GetElem(self.current_assign, diff --git a/artiq/compiler/transforms/llvm_ir_generator.py b/artiq/compiler/transforms/llvm_ir_generator.py index c73124ecb..d853bad65 100644 --- a/artiq/compiler/transforms/llvm_ir_generator.py +++ b/artiq/compiler/transforms/llvm_ir_generator.py @@ -313,7 +313,10 @@ class LLVMIRGenerator: def llconst_of_const(self, const): llty = self.llty_of_type(const.type) if const.value is None: - return ll.Constant(llty, []) + if isinstance(llty, ll.PointerType): + return ll.Constant(llty, None) + else: + return ll.Constant(llty, []) elif const.value is True: return ll.Constant(llty, True) elif const.value is False: @@ -539,6 +542,10 @@ class LLVMIRGenerator: if func.is_internal: self.llfunction.linkage = 'private' + if func.is_cold: + self.llfunction.calling_convention = 'coldcc' + self.llfunction.attributes.add('cold') + self.llfunction.attributes.add('noinline') self.llfunction.attributes.add('uwtable') @@ -1039,7 +1046,7 @@ class LLVMIRGenerator: def process_Closure(self, insn): llenv = self.map(insn.environment()) - llenv = self.llbuilder.bitcast(llenv, llptr, name="ptr.{}".format(llenv.name)) + llenv = self.llbuilder.bitcast(llenv, llptr) llfun = self.map(insn.target_function) llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined) llvalue = self.llbuilder.insert_value(llvalue, llenv, 0) @@ -1244,15 +1251,17 @@ class LLVMIRGenerator: llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), []) llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee) - self.llbuilder.call(llfun, [llresultslot] + llargs) + llcall = self.llbuilder.call(llfun, [llresultslot] + llargs) llresult = self.llbuilder.load(llresultslot) self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr]) - - return llresult else: - return self.llbuilder.call(llfun, llargs, - name=insn.name) + llcall = llresult = self.llbuilder.call(llfun, llargs, name=insn.name) + + if insn.is_cold: + llcall.cconv = 'coldcc' + + return llresult def process_Invoke(self, insn): llnormalblock = self.map(insn.normal_target()) @@ -1264,12 +1273,26 @@ class LLVMIRGenerator: llnormalblock, llunwindblock) elif types.is_c_function(insn.target_function().type): llfun, llargs = self._prepare_ffi_call(insn) - return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock, - name=insn.name) else: llfun, llargs = self._prepare_closure_call(insn) - return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock, - name=insn.name) + + if self.has_sret(insn.target_function().type): + llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), []) + + llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee) + llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock, + name=insn.name) + llresult = self.llbuilder.load(llresultslot) + + self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr]) + else: + llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock, + name=insn.name) + + if insn.is_cold: + llcall.cconv = 'coldcc' + + return llcall def _quote(self, value, typ, path): value_id = id(value)