compiler: extract runtime checks into separate cold functions.

This reduces register pressure as well as function size, which
favorably affects the inliner.
This commit is contained in:
whitequark 2016-03-27 01:02:15 +00:00
parent 2a210d74fb
commit e75ad3d1aa
4 changed files with 110 additions and 39 deletions

View File

@ -423,6 +423,8 @@ class Function:
:ivar is_internal: :ivar is_internal:
(bool) if True, the function should not be accessible from outside (bool) if True, the function should not be accessible from outside
the module it is contained in the module it is contained in
:ivar is_cold:
(bool) if True, the function should be considered rarely called
""" """
def __init__(self, typ, name, arguments, loc=None): def __init__(self, typ, name, arguments, loc=None):
@ -431,6 +433,7 @@ class Function:
self.next_name = 1 self.next_name = 1
self.set_arguments(arguments) self.set_arguments(arguments)
self.is_internal = False self.is_internal = False
self.is_cold = False
def _remove_name(self, name): def _remove_name(self, name):
self.names.remove(name) self.names.remove(name)
@ -922,6 +925,8 @@ class Call(Instruction):
iodelay expressions for values of arguments iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None) :ivar static_target_function: (:class:`Function` or None)
statically resolved callee statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
""" """
""" """
@ -938,6 +943,7 @@ class Call(Instruction):
super().__init__([func] + args, func.type.ret, name) super().__init__([func] + args, func.type.ret, name)
self.arg_exprs = arg_exprs self.arg_exprs = arg_exprs
self.static_target_function = None self.static_target_function = None
self.is_cold = False
def copy(self, mapper): def copy(self, mapper):
self_copy = super().copy(mapper) self_copy = super().copy(mapper)
@ -1186,6 +1192,8 @@ class Invoke(Terminator):
iodelay expressions for values of arguments iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None) :ivar static_target_function: (:class:`Function` or None)
statically resolved callee statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
""" """
""" """
@ -1206,6 +1214,7 @@ class Invoke(Terminator):
super().__init__([func] + args + [normal, exn], func.type.ret, name) super().__init__([func] + args + [normal, exn], func.type.ret, name)
self.arg_exprs = arg_exprs self.arg_exprs = arg_exprs
self.static_target_function = None self.static_target_function = None
self.is_cold = False
def copy(self, mapper): def copy(self, mapper):
self_copy = super().copy(mapper) self_copy = super().copy(mapper)

View File

@ -101,8 +101,9 @@ class Target:
# Now, actually optimize the code. # Now, actually optimize the code.
llpassmgr.add_function_inlining_pass(70) llpassmgr.add_function_inlining_pass(70)
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_instruction_combining_pass() llpassmgr.add_instruction_combining_pass()
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_dead_arg_elimination_pass()
llpassmgr.add_gvn_pass() llpassmgr.add_gvn_pass()
llpassmgr.add_global_dce_pass() llpassmgr.add_global_dce_pass()

View File

@ -302,7 +302,8 @@ class ARTIQIRGenerator(algorithm.Visitor):
for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)): for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)):
default = codegen_default() default = codegen_default()
value = self.append(ir.Builtin("unwrap_or", [optargs[index], default], value = self.append(ir.Builtin("unwrap_or", [optargs[index], default],
typ.optargs[arg_name])) typ.optargs[arg_name],
name="DEF.{}".format(arg_name)))
self.append(ir.SetLocal(env, arg_name, value)) self.append(ir.SetLocal(env, arg_name, value))
result = self.visit(node.body) result = self.visit(node.body)
@ -574,9 +575,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
self.current_block = raise_proxy self.current_block = raise_proxy
if exn is not None: if exn is not None:
if loc is None: assert loc is not None
loc = self.current_loc
loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr()) loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr())
loc_line = ir.Constant(loc.line(), builtins.TInt32()) loc_line = ir.Constant(loc.line(), builtins.TInt32())
loc_column = ir.Constant(loc.column(), builtins.TInt32()) loc_column = ir.Constant(loc.column(), builtins.TInt32())
@ -598,7 +597,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
self.append(ir.Reraise()) self.append(ir.Reraise())
def visit_Raise(self, node): def visit_Raise(self, node):
self.raise_exn(self.visit(node.exc)) self.raise_exn(self.visit(node.exc), loc=self.current_loc)
def visit_Try(self, node): def visit_Try(self, node):
dispatcher = self.add_block("try.dispatch") dispatcher = self.add_block("try.dispatch")
@ -927,6 +926,55 @@ class ARTIQIRGenerator(algorithm.Visitor):
else: else:
return self.append(ir.SetAttr(obj, node.attr, self.current_assign)) return self.append(ir.SetAttr(obj, node.attr, self.current_assign))
def _make_check(self, cond, exn_gen, loc=None, params=[]):
if loc is None:
loc = self.current_loc
try:
name = "check:{}:{}".format(loc.line(), loc.column())
args = [ir.EnvironmentArgument(self.current_env.type, "ARG.ENV")] + \
[ir.Argument(param.type, "ARG.{}".format(index))
for index, param in enumerate(params)]
typ = types.TFunction(OrderedDict([("arg{}".format(index), param.type)
for index, param in enumerate(params)]),
OrderedDict(),
builtins.TNone())
func = ir.Function(typ, ".".join(self.name + [name]), args, loc=loc)
func.is_internal = True
func.is_cold = True
self.functions.append(func)
old_func, self.current_function = self.current_function, func
entry = self.add_block("entry")
old_block, self.current_block = self.current_block, entry
old_final_branch, self.final_branch = self.final_branch, None
old_unwind, self.unwind_target = self.unwind_target, None
self.raise_exn(exn_gen(*args[1:]), loc=loc)
finally:
self.current_function = old_func
self.current_block = old_block
self.final_branch = old_final_branch
self.unwind_target = old_unwind
# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block
self.current_block = body_block = self.add_block("check.body")
closure = self.append(ir.Closure(func, ir.Constant(None, ir.TEnvironment("check", {}))))
if self.unwind_target is None:
insn = self.append(ir.Call(closure, params, {}))
else:
after_invoke = self.add_block("check.invoke")
insn = self.append(ir.Invoke(closure, params, {}, after_invoke, self.unwind_target))
self.current_block = after_invoke
insn.is_cold = True
self.append(ir.Unreachable())
self.current_block = tail_block = self.add_block("check.tail")
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
def _map_index(self, length, index, one_past_the_end=False, loc=None): def _map_index(self, length, index, one_past_the_end=False, loc=None):
lt_0 = self.append(ir.Compare(ast.Lt(loc=None), lt_0 = self.append(ir.Compare(ast.Lt(loc=None),
index, ir.Constant(0, index.type))) index, ir.Constant(0, index.type)))
@ -940,28 +988,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
ir.Constant(False, builtins.TBool()))) ir.Constant(False, builtins.TBool())))
head = self.current_block head = self.current_block
self.current_block = out_of_bounds_block = self.add_block("index.outofbounds") self._make_check(
exn = self.alloc_exn(builtins.TException("IndexError"), in_bounds,
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()), lambda index, length: self.alloc_exn(builtins.TException("IndexError"),
index, length) ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
self.raise_exn(exn, loc=loc) index, length),
params=[index, length],
self.current_block = in_bounds_block = self.add_block("index.inbounds") loc=loc)
head.append(ir.BranchIf(in_bounds, in_bounds_block, out_of_bounds_block))
return mapped_index return mapped_index
def _make_check(self, cond, exn_gen, loc=None, name="check"):
# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block
self.current_block = body_block = self.add_block("{}.body".format(name))
self.raise_exn(exn_gen(), loc=loc)
self.current_block = tail_block = self.add_block("{}.tail".format(name))
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
def _make_loop(self, init, cond_gen, body_gen, name="loop"): def _make_loop(self, init, cond_gen, body_gen, name="loop"):
# init: 'iter Value, initial loop variable value # init: 'iter Value, initial loop variable value
# cond_gen: lambda('iter Value)->bool Value, loop condition # cond_gen: lambda('iter Value)->bool Value, loop condition
@ -1064,10 +1100,11 @@ class ARTIQIRGenerator(algorithm.Visitor):
name="slice.size")) name="slice.size"))
self._make_check( self._make_check(
self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)), self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)),
lambda: self.alloc_exn(builtins.TException("ValueError"), lambda slice_size, length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("slice size {0} is larger than iterable length {1}", ir.Constant("slice size {0} is larger than iterable length {1}",
builtins.TStr()), builtins.TStr()),
slice_size, length), slice_size, length),
params=[slice_size, length],
loc=node.slice.loc) loc=node.slice.loc)
if self.current_assign is None: if self.current_assign is None:
@ -1147,9 +1184,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
self._make_check( self._make_check(
self.append(ir.Compare(ast.Eq(loc=None), length, self.append(ir.Compare(ast.Eq(loc=None), length,
ir.Constant(len(node.elts), self._size_type))), ir.Constant(len(node.elts), self._size_type))),
lambda: self.alloc_exn(builtins.TException("ValueError"), lambda length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("list must be {0} elements long to decompose", builtins.TStr()), ir.Constant("list must be {0} elements long to decompose", builtins.TStr()),
length)) length),
params=[length])
for index, elt_node in enumerate(node.elts): for index, elt_node in enumerate(node.elts):
elt = self.append(ir.GetElem(self.current_assign, elt = self.append(ir.GetElem(self.current_assign,

View File

@ -313,7 +313,10 @@ class LLVMIRGenerator:
def llconst_of_const(self, const): def llconst_of_const(self, const):
llty = self.llty_of_type(const.type) llty = self.llty_of_type(const.type)
if const.value is None: if const.value is None:
return ll.Constant(llty, []) if isinstance(llty, ll.PointerType):
return ll.Constant(llty, None)
else:
return ll.Constant(llty, [])
elif const.value is True: elif const.value is True:
return ll.Constant(llty, True) return ll.Constant(llty, True)
elif const.value is False: elif const.value is False:
@ -539,6 +542,10 @@ class LLVMIRGenerator:
if func.is_internal: if func.is_internal:
self.llfunction.linkage = 'private' self.llfunction.linkage = 'private'
if func.is_cold:
self.llfunction.calling_convention = 'coldcc'
self.llfunction.attributes.add('cold')
self.llfunction.attributes.add('noinline')
self.llfunction.attributes.add('uwtable') self.llfunction.attributes.add('uwtable')
@ -1039,7 +1046,7 @@ class LLVMIRGenerator:
def process_Closure(self, insn): def process_Closure(self, insn):
llenv = self.map(insn.environment()) llenv = self.map(insn.environment())
llenv = self.llbuilder.bitcast(llenv, llptr, name="ptr.{}".format(llenv.name)) llenv = self.llbuilder.bitcast(llenv, llptr)
llfun = self.map(insn.target_function) llfun = self.map(insn.target_function)
llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined) llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined)
llvalue = self.llbuilder.insert_value(llvalue, llenv, 0) llvalue = self.llbuilder.insert_value(llvalue, llenv, 0)
@ -1244,15 +1251,17 @@ class LLVMIRGenerator:
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), []) llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee) llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
self.llbuilder.call(llfun, [llresultslot] + llargs) llcall = self.llbuilder.call(llfun, [llresultslot] + llargs)
llresult = self.llbuilder.load(llresultslot) llresult = self.llbuilder.load(llresultslot)
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr]) self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
return llresult
else: else:
return self.llbuilder.call(llfun, llargs, llcall = llresult = self.llbuilder.call(llfun, llargs, name=insn.name)
name=insn.name)
if insn.is_cold:
llcall.cconv = 'coldcc'
return llresult
def process_Invoke(self, insn): def process_Invoke(self, insn):
llnormalblock = self.map(insn.normal_target()) llnormalblock = self.map(insn.normal_target())
@ -1264,12 +1273,26 @@ class LLVMIRGenerator:
llnormalblock, llunwindblock) llnormalblock, llunwindblock)
elif types.is_c_function(insn.target_function().type): elif types.is_c_function(insn.target_function().type):
llfun, llargs = self._prepare_ffi_call(insn) llfun, llargs = self._prepare_ffi_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
else: else:
llfun, llargs = self._prepare_closure_call(insn) llfun, llargs = self._prepare_closure_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name) if self.has_sret(insn.target_function().type):
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
llresult = self.llbuilder.load(llresultslot)
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
else:
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
if insn.is_cold:
llcall.cconv = 'coldcc'
return llcall
def _quote(self, value, typ, path): def _quote(self, value, typ, path):
value_id = id(value) value_id = id(value)