forked from M-Labs/artiq
1
0
Fork 0

compiler: extract runtime checks into separate cold functions.

This reduces register pressure as well as function size, which
favorably affects the inliner.
This commit is contained in:
whitequark 2016-03-27 01:02:15 +00:00
parent 2a210d74fb
commit e75ad3d1aa
4 changed files with 110 additions and 39 deletions

View File

@ -423,6 +423,8 @@ class Function:
:ivar is_internal:
(bool) if True, the function should not be accessible from outside
the module it is contained in
:ivar is_cold:
(bool) if True, the function should be considered rarely called
"""
def __init__(self, typ, name, arguments, loc=None):
@ -431,6 +433,7 @@ class Function:
self.next_name = 1
self.set_arguments(arguments)
self.is_internal = False
self.is_cold = False
def _remove_name(self, name):
self.names.remove(name)
@ -922,6 +925,8 @@ class Call(Instruction):
iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None)
statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
"""
"""
@ -938,6 +943,7 @@ class Call(Instruction):
super().__init__([func] + args, func.type.ret, name)
self.arg_exprs = arg_exprs
self.static_target_function = None
self.is_cold = False
def copy(self, mapper):
self_copy = super().copy(mapper)
@ -1186,6 +1192,8 @@ class Invoke(Terminator):
iodelay expressions for values of arguments
:ivar static_target_function: (:class:`Function` or None)
statically resolved callee
:ivar is_cold: (bool)
the callee function is cold
"""
"""
@ -1206,6 +1214,7 @@ class Invoke(Terminator):
super().__init__([func] + args + [normal, exn], func.type.ret, name)
self.arg_exprs = arg_exprs
self.static_target_function = None
self.is_cold = False
def copy(self, mapper):
self_copy = super().copy(mapper)

View File

@ -101,8 +101,9 @@ class Target:
# Now, actually optimize the code.
llpassmgr.add_function_inlining_pass(70)
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_instruction_combining_pass()
llpassmgr.add_cfg_simplification_pass()
llpassmgr.add_dead_arg_elimination_pass()
llpassmgr.add_gvn_pass()
llpassmgr.add_global_dce_pass()

View File

@ -302,7 +302,8 @@ class ARTIQIRGenerator(algorithm.Visitor):
for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)):
default = codegen_default()
value = self.append(ir.Builtin("unwrap_or", [optargs[index], default],
typ.optargs[arg_name]))
typ.optargs[arg_name],
name="DEF.{}".format(arg_name)))
self.append(ir.SetLocal(env, arg_name, value))
result = self.visit(node.body)
@ -574,9 +575,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
self.current_block = raise_proxy
if exn is not None:
if loc is None:
loc = self.current_loc
assert loc is not None
loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr())
loc_line = ir.Constant(loc.line(), builtins.TInt32())
loc_column = ir.Constant(loc.column(), builtins.TInt32())
@ -598,7 +597,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
self.append(ir.Reraise())
def visit_Raise(self, node):
self.raise_exn(self.visit(node.exc))
self.raise_exn(self.visit(node.exc), loc=self.current_loc)
def visit_Try(self, node):
dispatcher = self.add_block("try.dispatch")
@ -927,6 +926,55 @@ class ARTIQIRGenerator(algorithm.Visitor):
else:
return self.append(ir.SetAttr(obj, node.attr, self.current_assign))
def _make_check(self, cond, exn_gen, loc=None, params=[]):
if loc is None:
loc = self.current_loc
try:
name = "check:{}:{}".format(loc.line(), loc.column())
args = [ir.EnvironmentArgument(self.current_env.type, "ARG.ENV")] + \
[ir.Argument(param.type, "ARG.{}".format(index))
for index, param in enumerate(params)]
typ = types.TFunction(OrderedDict([("arg{}".format(index), param.type)
for index, param in enumerate(params)]),
OrderedDict(),
builtins.TNone())
func = ir.Function(typ, ".".join(self.name + [name]), args, loc=loc)
func.is_internal = True
func.is_cold = True
self.functions.append(func)
old_func, self.current_function = self.current_function, func
entry = self.add_block("entry")
old_block, self.current_block = self.current_block, entry
old_final_branch, self.final_branch = self.final_branch, None
old_unwind, self.unwind_target = self.unwind_target, None
self.raise_exn(exn_gen(*args[1:]), loc=loc)
finally:
self.current_function = old_func
self.current_block = old_block
self.final_branch = old_final_branch
self.unwind_target = old_unwind
# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block
self.current_block = body_block = self.add_block("check.body")
closure = self.append(ir.Closure(func, ir.Constant(None, ir.TEnvironment("check", {}))))
if self.unwind_target is None:
insn = self.append(ir.Call(closure, params, {}))
else:
after_invoke = self.add_block("check.invoke")
insn = self.append(ir.Invoke(closure, params, {}, after_invoke, self.unwind_target))
self.current_block = after_invoke
insn.is_cold = True
self.append(ir.Unreachable())
self.current_block = tail_block = self.add_block("check.tail")
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
def _map_index(self, length, index, one_past_the_end=False, loc=None):
lt_0 = self.append(ir.Compare(ast.Lt(loc=None),
index, ir.Constant(0, index.type)))
@ -940,28 +988,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
ir.Constant(False, builtins.TBool())))
head = self.current_block
self.current_block = out_of_bounds_block = self.add_block("index.outofbounds")
exn = self.alloc_exn(builtins.TException("IndexError"),
self._make_check(
in_bounds,
lambda index, length: self.alloc_exn(builtins.TException("IndexError"),
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
index, length)
self.raise_exn(exn, loc=loc)
self.current_block = in_bounds_block = self.add_block("index.inbounds")
head.append(ir.BranchIf(in_bounds, in_bounds_block, out_of_bounds_block))
index, length),
params=[index, length],
loc=loc)
return mapped_index
def _make_check(self, cond, exn_gen, loc=None, name="check"):
# cond: bool Value, condition
# exn_gen: lambda()->exn Value, exception if condition not true
cond_block = self.current_block
self.current_block = body_block = self.add_block("{}.body".format(name))
self.raise_exn(exn_gen(), loc=loc)
self.current_block = tail_block = self.add_block("{}.tail".format(name))
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
def _make_loop(self, init, cond_gen, body_gen, name="loop"):
# init: 'iter Value, initial loop variable value
# cond_gen: lambda('iter Value)->bool Value, loop condition
@ -1064,10 +1100,11 @@ class ARTIQIRGenerator(algorithm.Visitor):
name="slice.size"))
self._make_check(
self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)),
lambda: self.alloc_exn(builtins.TException("ValueError"),
lambda slice_size, length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("slice size {0} is larger than iterable length {1}",
builtins.TStr()),
slice_size, length),
params=[slice_size, length],
loc=node.slice.loc)
if self.current_assign is None:
@ -1147,9 +1184,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
self._make_check(
self.append(ir.Compare(ast.Eq(loc=None), length,
ir.Constant(len(node.elts), self._size_type))),
lambda: self.alloc_exn(builtins.TException("ValueError"),
lambda length: self.alloc_exn(builtins.TException("ValueError"),
ir.Constant("list must be {0} elements long to decompose", builtins.TStr()),
length))
length),
params=[length])
for index, elt_node in enumerate(node.elts):
elt = self.append(ir.GetElem(self.current_assign,

View File

@ -313,6 +313,9 @@ class LLVMIRGenerator:
def llconst_of_const(self, const):
llty = self.llty_of_type(const.type)
if const.value is None:
if isinstance(llty, ll.PointerType):
return ll.Constant(llty, None)
else:
return ll.Constant(llty, [])
elif const.value is True:
return ll.Constant(llty, True)
@ -539,6 +542,10 @@ class LLVMIRGenerator:
if func.is_internal:
self.llfunction.linkage = 'private'
if func.is_cold:
self.llfunction.calling_convention = 'coldcc'
self.llfunction.attributes.add('cold')
self.llfunction.attributes.add('noinline')
self.llfunction.attributes.add('uwtable')
@ -1039,7 +1046,7 @@ class LLVMIRGenerator:
def process_Closure(self, insn):
llenv = self.map(insn.environment())
llenv = self.llbuilder.bitcast(llenv, llptr, name="ptr.{}".format(llenv.name))
llenv = self.llbuilder.bitcast(llenv, llptr)
llfun = self.map(insn.target_function)
llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined)
llvalue = self.llbuilder.insert_value(llvalue, llenv, 0)
@ -1244,15 +1251,17 @@ class LLVMIRGenerator:
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
self.llbuilder.call(llfun, [llresultslot] + llargs)
llcall = self.llbuilder.call(llfun, [llresultslot] + llargs)
llresult = self.llbuilder.load(llresultslot)
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
else:
llcall = llresult = self.llbuilder.call(llfun, llargs, name=insn.name)
if insn.is_cold:
llcall.cconv = 'coldcc'
return llresult
else:
return self.llbuilder.call(llfun, llargs,
name=insn.name)
def process_Invoke(self, insn):
llnormalblock = self.map(insn.normal_target())
@ -1264,12 +1273,26 @@ class LLVMIRGenerator:
llnormalblock, llunwindblock)
elif types.is_c_function(insn.target_function().type):
llfun, llargs = self._prepare_ffi_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
else:
llfun, llargs = self._prepare_closure_call(insn)
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
if self.has_sret(insn.target_function().type):
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
llresult = self.llbuilder.load(llresultslot)
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
else:
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
name=insn.name)
if insn.is_cold:
llcall.cconv = 'coldcc'
return llcall
def _quote(self, value, typ, path):
value_id = id(value)