forked from M-Labs/artiq
compiler: extract runtime checks into separate cold functions.
This reduces register pressure as well as function size, which favorably affects the inliner.
This commit is contained in:
parent
fcf2a73f82
commit
3fa5762c10
@ -423,6 +423,8 @@ class Function:
|
|||||||
:ivar is_internal:
|
:ivar is_internal:
|
||||||
(bool) if True, the function should not be accessible from outside
|
(bool) if True, the function should not be accessible from outside
|
||||||
the module it is contained in
|
the module it is contained in
|
||||||
|
:ivar is_cold:
|
||||||
|
(bool) if True, the function should be considered rarely called
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, typ, name, arguments, loc=None):
|
def __init__(self, typ, name, arguments, loc=None):
|
||||||
@ -431,6 +433,7 @@ class Function:
|
|||||||
self.next_name = 1
|
self.next_name = 1
|
||||||
self.set_arguments(arguments)
|
self.set_arguments(arguments)
|
||||||
self.is_internal = False
|
self.is_internal = False
|
||||||
|
self.is_cold = False
|
||||||
|
|
||||||
def _remove_name(self, name):
|
def _remove_name(self, name):
|
||||||
self.names.remove(name)
|
self.names.remove(name)
|
||||||
@ -922,6 +925,8 @@ class Call(Instruction):
|
|||||||
iodelay expressions for values of arguments
|
iodelay expressions for values of arguments
|
||||||
:ivar static_target_function: (:class:`Function` or None)
|
:ivar static_target_function: (:class:`Function` or None)
|
||||||
statically resolved callee
|
statically resolved callee
|
||||||
|
:ivar is_cold: (bool)
|
||||||
|
the callee function is cold
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -938,6 +943,7 @@ class Call(Instruction):
|
|||||||
super().__init__([func] + args, func.type.ret, name)
|
super().__init__([func] + args, func.type.ret, name)
|
||||||
self.arg_exprs = arg_exprs
|
self.arg_exprs = arg_exprs
|
||||||
self.static_target_function = None
|
self.static_target_function = None
|
||||||
|
self.is_cold = False
|
||||||
|
|
||||||
def copy(self, mapper):
|
def copy(self, mapper):
|
||||||
self_copy = super().copy(mapper)
|
self_copy = super().copy(mapper)
|
||||||
@ -1186,6 +1192,8 @@ class Invoke(Terminator):
|
|||||||
iodelay expressions for values of arguments
|
iodelay expressions for values of arguments
|
||||||
:ivar static_target_function: (:class:`Function` or None)
|
:ivar static_target_function: (:class:`Function` or None)
|
||||||
statically resolved callee
|
statically resolved callee
|
||||||
|
:ivar is_cold: (bool)
|
||||||
|
the callee function is cold
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -1206,6 +1214,7 @@ class Invoke(Terminator):
|
|||||||
super().__init__([func] + args + [normal, exn], func.type.ret, name)
|
super().__init__([func] + args + [normal, exn], func.type.ret, name)
|
||||||
self.arg_exprs = arg_exprs
|
self.arg_exprs = arg_exprs
|
||||||
self.static_target_function = None
|
self.static_target_function = None
|
||||||
|
self.is_cold = False
|
||||||
|
|
||||||
def copy(self, mapper):
|
def copy(self, mapper):
|
||||||
self_copy = super().copy(mapper)
|
self_copy = super().copy(mapper)
|
||||||
|
@ -101,8 +101,9 @@ class Target:
|
|||||||
|
|
||||||
# Now, actually optimize the code.
|
# Now, actually optimize the code.
|
||||||
llpassmgr.add_function_inlining_pass(70)
|
llpassmgr.add_function_inlining_pass(70)
|
||||||
llpassmgr.add_cfg_simplification_pass()
|
|
||||||
llpassmgr.add_instruction_combining_pass()
|
llpassmgr.add_instruction_combining_pass()
|
||||||
|
llpassmgr.add_cfg_simplification_pass()
|
||||||
|
llpassmgr.add_dead_arg_elimination_pass()
|
||||||
llpassmgr.add_gvn_pass()
|
llpassmgr.add_gvn_pass()
|
||||||
llpassmgr.add_global_dce_pass()
|
llpassmgr.add_global_dce_pass()
|
||||||
|
|
||||||
|
@ -302,7 +302,8 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)):
|
for index, (arg_name, codegen_default) in enumerate(zip(typ.optargs, defaults)):
|
||||||
default = codegen_default()
|
default = codegen_default()
|
||||||
value = self.append(ir.Builtin("unwrap_or", [optargs[index], default],
|
value = self.append(ir.Builtin("unwrap_or", [optargs[index], default],
|
||||||
typ.optargs[arg_name]))
|
typ.optargs[arg_name],
|
||||||
|
name="DEF.{}".format(arg_name)))
|
||||||
self.append(ir.SetLocal(env, arg_name, value))
|
self.append(ir.SetLocal(env, arg_name, value))
|
||||||
|
|
||||||
result = self.visit(node.body)
|
result = self.visit(node.body)
|
||||||
@ -574,9 +575,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
self.current_block = raise_proxy
|
self.current_block = raise_proxy
|
||||||
|
|
||||||
if exn is not None:
|
if exn is not None:
|
||||||
if loc is None:
|
assert loc is not None
|
||||||
loc = self.current_loc
|
|
||||||
|
|
||||||
loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr())
|
loc_file = ir.Constant(loc.source_buffer.name, builtins.TStr())
|
||||||
loc_line = ir.Constant(loc.line(), builtins.TInt32())
|
loc_line = ir.Constant(loc.line(), builtins.TInt32())
|
||||||
loc_column = ir.Constant(loc.column(), builtins.TInt32())
|
loc_column = ir.Constant(loc.column(), builtins.TInt32())
|
||||||
@ -598,7 +597,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
self.append(ir.Reraise())
|
self.append(ir.Reraise())
|
||||||
|
|
||||||
def visit_Raise(self, node):
|
def visit_Raise(self, node):
|
||||||
self.raise_exn(self.visit(node.exc))
|
self.raise_exn(self.visit(node.exc), loc=self.current_loc)
|
||||||
|
|
||||||
def visit_Try(self, node):
|
def visit_Try(self, node):
|
||||||
dispatcher = self.add_block("try.dispatch")
|
dispatcher = self.add_block("try.dispatch")
|
||||||
@ -927,6 +926,55 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
else:
|
else:
|
||||||
return self.append(ir.SetAttr(obj, node.attr, self.current_assign))
|
return self.append(ir.SetAttr(obj, node.attr, self.current_assign))
|
||||||
|
|
||||||
|
def _make_check(self, cond, exn_gen, loc=None, params=[]):
|
||||||
|
if loc is None:
|
||||||
|
loc = self.current_loc
|
||||||
|
|
||||||
|
try:
|
||||||
|
name = "check:{}:{}".format(loc.line(), loc.column())
|
||||||
|
args = [ir.EnvironmentArgument(self.current_env.type, "ARG.ENV")] + \
|
||||||
|
[ir.Argument(param.type, "ARG.{}".format(index))
|
||||||
|
for index, param in enumerate(params)]
|
||||||
|
typ = types.TFunction(OrderedDict([("arg{}".format(index), param.type)
|
||||||
|
for index, param in enumerate(params)]),
|
||||||
|
OrderedDict(),
|
||||||
|
builtins.TNone())
|
||||||
|
func = ir.Function(typ, ".".join(self.name + [name]), args, loc=loc)
|
||||||
|
func.is_internal = True
|
||||||
|
func.is_cold = True
|
||||||
|
self.functions.append(func)
|
||||||
|
old_func, self.current_function = self.current_function, func
|
||||||
|
|
||||||
|
entry = self.add_block("entry")
|
||||||
|
old_block, self.current_block = self.current_block, entry
|
||||||
|
|
||||||
|
old_final_branch, self.final_branch = self.final_branch, None
|
||||||
|
old_unwind, self.unwind_target = self.unwind_target, None
|
||||||
|
self.raise_exn(exn_gen(*args[1:]), loc=loc)
|
||||||
|
finally:
|
||||||
|
self.current_function = old_func
|
||||||
|
self.current_block = old_block
|
||||||
|
self.final_branch = old_final_branch
|
||||||
|
self.unwind_target = old_unwind
|
||||||
|
|
||||||
|
# cond: bool Value, condition
|
||||||
|
# exn_gen: lambda()->exn Value, exception if condition not true
|
||||||
|
cond_block = self.current_block
|
||||||
|
|
||||||
|
self.current_block = body_block = self.add_block("check.body")
|
||||||
|
closure = self.append(ir.Closure(func, ir.Constant(None, ir.TEnvironment("check", {}))))
|
||||||
|
if self.unwind_target is None:
|
||||||
|
insn = self.append(ir.Call(closure, params, {}))
|
||||||
|
else:
|
||||||
|
after_invoke = self.add_block("check.invoke")
|
||||||
|
insn = self.append(ir.Invoke(closure, params, {}, after_invoke, self.unwind_target))
|
||||||
|
self.current_block = after_invoke
|
||||||
|
insn.is_cold = True
|
||||||
|
self.append(ir.Unreachable())
|
||||||
|
|
||||||
|
self.current_block = tail_block = self.add_block("check.tail")
|
||||||
|
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
|
||||||
|
|
||||||
def _map_index(self, length, index, one_past_the_end=False, loc=None):
|
def _map_index(self, length, index, one_past_the_end=False, loc=None):
|
||||||
lt_0 = self.append(ir.Compare(ast.Lt(loc=None),
|
lt_0 = self.append(ir.Compare(ast.Lt(loc=None),
|
||||||
index, ir.Constant(0, index.type)))
|
index, ir.Constant(0, index.type)))
|
||||||
@ -940,28 +988,16 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
ir.Constant(False, builtins.TBool())))
|
ir.Constant(False, builtins.TBool())))
|
||||||
head = self.current_block
|
head = self.current_block
|
||||||
|
|
||||||
self.current_block = out_of_bounds_block = self.add_block("index.outofbounds")
|
self._make_check(
|
||||||
exn = self.alloc_exn(builtins.TException("IndexError"),
|
in_bounds,
|
||||||
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
|
lambda index, length: self.alloc_exn(builtins.TException("IndexError"),
|
||||||
index, length)
|
ir.Constant("index {0} out of bounds 0:{1}", builtins.TStr()),
|
||||||
self.raise_exn(exn, loc=loc)
|
index, length),
|
||||||
|
params=[index, length],
|
||||||
self.current_block = in_bounds_block = self.add_block("index.inbounds")
|
loc=loc)
|
||||||
head.append(ir.BranchIf(in_bounds, in_bounds_block, out_of_bounds_block))
|
|
||||||
|
|
||||||
return mapped_index
|
return mapped_index
|
||||||
|
|
||||||
def _make_check(self, cond, exn_gen, loc=None, name="check"):
|
|
||||||
# cond: bool Value, condition
|
|
||||||
# exn_gen: lambda()->exn Value, exception if condition not true
|
|
||||||
cond_block = self.current_block
|
|
||||||
|
|
||||||
self.current_block = body_block = self.add_block("{}.body".format(name))
|
|
||||||
self.raise_exn(exn_gen(), loc=loc)
|
|
||||||
|
|
||||||
self.current_block = tail_block = self.add_block("{}.tail".format(name))
|
|
||||||
cond_block.append(ir.BranchIf(cond, tail_block, body_block))
|
|
||||||
|
|
||||||
def _make_loop(self, init, cond_gen, body_gen, name="loop"):
|
def _make_loop(self, init, cond_gen, body_gen, name="loop"):
|
||||||
# init: 'iter Value, initial loop variable value
|
# init: 'iter Value, initial loop variable value
|
||||||
# cond_gen: lambda('iter Value)->bool Value, loop condition
|
# cond_gen: lambda('iter Value)->bool Value, loop condition
|
||||||
@ -1064,10 +1100,11 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
name="slice.size"))
|
name="slice.size"))
|
||||||
self._make_check(
|
self._make_check(
|
||||||
self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)),
|
self.append(ir.Compare(ast.LtE(loc=None), slice_size, length)),
|
||||||
lambda: self.alloc_exn(builtins.TException("ValueError"),
|
lambda slice_size, length: self.alloc_exn(builtins.TException("ValueError"),
|
||||||
ir.Constant("slice size {0} is larger than iterable length {1}",
|
ir.Constant("slice size {0} is larger than iterable length {1}",
|
||||||
builtins.TStr()),
|
builtins.TStr()),
|
||||||
slice_size, length),
|
slice_size, length),
|
||||||
|
params=[slice_size, length],
|
||||||
loc=node.slice.loc)
|
loc=node.slice.loc)
|
||||||
|
|
||||||
if self.current_assign is None:
|
if self.current_assign is None:
|
||||||
@ -1147,9 +1184,10 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
|||||||
self._make_check(
|
self._make_check(
|
||||||
self.append(ir.Compare(ast.Eq(loc=None), length,
|
self.append(ir.Compare(ast.Eq(loc=None), length,
|
||||||
ir.Constant(len(node.elts), self._size_type))),
|
ir.Constant(len(node.elts), self._size_type))),
|
||||||
lambda: self.alloc_exn(builtins.TException("ValueError"),
|
lambda length: self.alloc_exn(builtins.TException("ValueError"),
|
||||||
ir.Constant("list must be {0} elements long to decompose", builtins.TStr()),
|
ir.Constant("list must be {0} elements long to decompose", builtins.TStr()),
|
||||||
length))
|
length),
|
||||||
|
params=[length])
|
||||||
|
|
||||||
for index, elt_node in enumerate(node.elts):
|
for index, elt_node in enumerate(node.elts):
|
||||||
elt = self.append(ir.GetElem(self.current_assign,
|
elt = self.append(ir.GetElem(self.current_assign,
|
||||||
|
@ -313,7 +313,10 @@ class LLVMIRGenerator:
|
|||||||
def llconst_of_const(self, const):
|
def llconst_of_const(self, const):
|
||||||
llty = self.llty_of_type(const.type)
|
llty = self.llty_of_type(const.type)
|
||||||
if const.value is None:
|
if const.value is None:
|
||||||
return ll.Constant(llty, [])
|
if isinstance(llty, ll.PointerType):
|
||||||
|
return ll.Constant(llty, None)
|
||||||
|
else:
|
||||||
|
return ll.Constant(llty, [])
|
||||||
elif const.value is True:
|
elif const.value is True:
|
||||||
return ll.Constant(llty, True)
|
return ll.Constant(llty, True)
|
||||||
elif const.value is False:
|
elif const.value is False:
|
||||||
@ -539,6 +542,10 @@ class LLVMIRGenerator:
|
|||||||
|
|
||||||
if func.is_internal:
|
if func.is_internal:
|
||||||
self.llfunction.linkage = 'private'
|
self.llfunction.linkage = 'private'
|
||||||
|
if func.is_cold:
|
||||||
|
self.llfunction.calling_convention = 'coldcc'
|
||||||
|
self.llfunction.attributes.add('cold')
|
||||||
|
self.llfunction.attributes.add('noinline')
|
||||||
|
|
||||||
self.llfunction.attributes.add('uwtable')
|
self.llfunction.attributes.add('uwtable')
|
||||||
|
|
||||||
@ -1039,7 +1046,7 @@ class LLVMIRGenerator:
|
|||||||
|
|
||||||
def process_Closure(self, insn):
|
def process_Closure(self, insn):
|
||||||
llenv = self.map(insn.environment())
|
llenv = self.map(insn.environment())
|
||||||
llenv = self.llbuilder.bitcast(llenv, llptr, name="ptr.{}".format(llenv.name))
|
llenv = self.llbuilder.bitcast(llenv, llptr)
|
||||||
llfun = self.map(insn.target_function)
|
llfun = self.map(insn.target_function)
|
||||||
llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined)
|
llvalue = ll.Constant(self.llty_of_type(insn.target_function.type), ll.Undefined)
|
||||||
llvalue = self.llbuilder.insert_value(llvalue, llenv, 0)
|
llvalue = self.llbuilder.insert_value(llvalue, llenv, 0)
|
||||||
@ -1244,15 +1251,17 @@ class LLVMIRGenerator:
|
|||||||
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
|
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
|
||||||
|
|
||||||
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
|
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
|
||||||
self.llbuilder.call(llfun, [llresultslot] + llargs)
|
llcall = self.llbuilder.call(llfun, [llresultslot] + llargs)
|
||||||
llresult = self.llbuilder.load(llresultslot)
|
llresult = self.llbuilder.load(llresultslot)
|
||||||
|
|
||||||
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
|
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
|
||||||
|
|
||||||
return llresult
|
|
||||||
else:
|
else:
|
||||||
return self.llbuilder.call(llfun, llargs,
|
llcall = llresult = self.llbuilder.call(llfun, llargs, name=insn.name)
|
||||||
name=insn.name)
|
|
||||||
|
if insn.is_cold:
|
||||||
|
llcall.cconv = 'coldcc'
|
||||||
|
|
||||||
|
return llresult
|
||||||
|
|
||||||
def process_Invoke(self, insn):
|
def process_Invoke(self, insn):
|
||||||
llnormalblock = self.map(insn.normal_target())
|
llnormalblock = self.map(insn.normal_target())
|
||||||
@ -1264,12 +1273,26 @@ class LLVMIRGenerator:
|
|||||||
llnormalblock, llunwindblock)
|
llnormalblock, llunwindblock)
|
||||||
elif types.is_c_function(insn.target_function().type):
|
elif types.is_c_function(insn.target_function().type):
|
||||||
llfun, llargs = self._prepare_ffi_call(insn)
|
llfun, llargs = self._prepare_ffi_call(insn)
|
||||||
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
|
|
||||||
name=insn.name)
|
|
||||||
else:
|
else:
|
||||||
llfun, llargs = self._prepare_closure_call(insn)
|
llfun, llargs = self._prepare_closure_call(insn)
|
||||||
return self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
|
|
||||||
name=insn.name)
|
if self.has_sret(insn.target_function().type):
|
||||||
|
llstackptr = self.llbuilder.call(self.llbuiltin("llvm.stacksave"), [])
|
||||||
|
|
||||||
|
llresultslot = self.llbuilder.alloca(llfun.type.pointee.args[0].pointee)
|
||||||
|
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
|
||||||
|
name=insn.name)
|
||||||
|
llresult = self.llbuilder.load(llresultslot)
|
||||||
|
|
||||||
|
self.llbuilder.call(self.llbuiltin("llvm.stackrestore"), [llstackptr])
|
||||||
|
else:
|
||||||
|
llcall = self.llbuilder.invoke(llfun, llargs, llnormalblock, llunwindblock,
|
||||||
|
name=insn.name)
|
||||||
|
|
||||||
|
if insn.is_cold:
|
||||||
|
llcall.cconv = 'coldcc'
|
||||||
|
|
||||||
|
return llcall
|
||||||
|
|
||||||
def _quote(self, value, typ, path):
|
def _quote(self, value, typ, path):
|
||||||
value_id = id(value)
|
value_id = id(value)
|
||||||
|
Loading…
Reference in New Issue
Block a user