forked from M-Labs/artiq
compiler: actually implement interleaving correctly (calls are still broken).
The previous implementation was completely wrong: it always advanced the global timeline by the same amount as the non-interleaved basic block did. The new implementation only advances the global timeline by the difference between its current time and the virtual time of the branch, which requires it to adjust the delay instructions. Previously, the delay expression was present in the IR twice: once as the iodelay.Expr transformation-visible form, and once as regular IR instructions, with the latter form being passed to the delay_mu builtin and advancing the runtime timeline. As a result of this change, this strategy is no longer valid: we can meaningfully mutate the iodelay.Expr form but not the IR instruction form. Thus, IR instructions are no longer generated for delay expressions, and the LLVM lowering pass now has to lower the iodelay.Expr objects as well. This works OK for flat `with parallel:` expressions, but breaks down outside of `with parallel:` or when calls are present. The reasons it breaks down are as follows: * Outside of `with parallel:`, delay() and delay_mu() must accept any expression, but iodelay.Expr's are not nearly expressive enough. So, the IR instruction form must actually be kept as well. * A delay instruction is currently inserted after a call to a user-defined function; this delay instruction introduces a point where basic block reordering is possible as well as provides delay information. However, the callee knows nothing about the context in which it is called, which means that the runtime timeline is advanced twice. So, a new terminator instruction must be added that combines the properties of delay and call instructions (and another for delay and invoke as well).
This commit is contained in:
parent
73c358a59a
commit
50e7b44d04
|
@ -1418,6 +1418,15 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
||||||
|
|
||||||
return self.append(ir.Alloc(attributes, typ))
|
return self.append(ir.Alloc(attributes, typ))
|
||||||
|
|
||||||
|
def _make_delay(self, delay):
|
||||||
|
if not iodelay.is_const(delay, 0):
|
||||||
|
after_delay = self.add_block()
|
||||||
|
self.append(ir.Delay(delay,
|
||||||
|
{var_name: self.current_args[var_name]
|
||||||
|
for var_name in delay.free_vars()},
|
||||||
|
after_delay))
|
||||||
|
self.current_block = after_delay
|
||||||
|
|
||||||
def visit_builtin_call(self, node):
|
def visit_builtin_call(self, node):
|
||||||
# A builtin by any other name... Ignore node.func, just use the type.
|
# A builtin by any other name... Ignore node.func, just use the type.
|
||||||
typ = node.func.type
|
typ = node.func.type
|
||||||
|
@ -1520,7 +1529,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
||||||
return self.append(ir.Arith(ast.Mult(loc=None), now_mu_float, self.ref_period))
|
return self.append(ir.Arith(ast.Mult(loc=None), now_mu_float, self.ref_period))
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
elif types.is_builtin(typ, "delay") or types.is_builtin(typ, "at"):
|
elif types.is_builtin(typ, "at"):
|
||||||
if len(node.args) == 1 and len(node.keywords) == 0:
|
if len(node.args) == 1 and len(node.keywords) == 0:
|
||||||
arg = self.visit(node.args[0])
|
arg = self.visit(node.args[0])
|
||||||
arg_mu_float = self.append(ir.Arith(ast.Div(loc=None), arg, self.ref_period))
|
arg_mu_float = self.append(ir.Arith(ast.Div(loc=None), arg, self.ref_period))
|
||||||
|
@ -1528,8 +1537,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
||||||
self.append(ir.Builtin(typ.name + "_mu", [arg_mu], builtins.TNone()))
|
self.append(ir.Builtin(typ.name + "_mu", [arg_mu], builtins.TNone()))
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
elif types.is_builtin(typ, "now_mu") or types.is_builtin(typ, "delay_mu") \
|
elif types.is_builtin(typ, "now_mu") or types.is_builtin(typ, "at_mu"):
|
||||||
or types.is_builtin(typ, "at_mu"):
|
|
||||||
return self.append(ir.Builtin(typ.name,
|
return self.append(ir.Builtin(typ.name,
|
||||||
[self.visit(arg) for arg in node.args], node.type))
|
[self.visit(arg) for arg in node.args], node.type))
|
||||||
elif types.is_builtin(typ, "mu_to_seconds"):
|
elif types.is_builtin(typ, "mu_to_seconds"):
|
||||||
|
@ -1546,6 +1554,9 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
||||||
return self.append(ir.Coerce(arg_mu, builtins.TInt(types.TValue(64))))
|
return self.append(ir.Coerce(arg_mu, builtins.TInt(types.TValue(64))))
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
|
elif types.is_builtin(typ, "delay") or types.is_builtin(typ, "delay_mu"):
|
||||||
|
assert node.iodelay is not None
|
||||||
|
self._make_delay(node.iodelay)
|
||||||
elif types.is_exn_constructor(typ):
|
elif types.is_exn_constructor(typ):
|
||||||
return self.alloc_exn(node.type, *[self.visit(arg_node) for arg_node in node.args])
|
return self.alloc_exn(node.type, *[self.visit(arg_node) for arg_node in node.args])
|
||||||
elif types.is_constructor(typ):
|
elif types.is_constructor(typ):
|
||||||
|
@ -1557,18 +1568,7 @@ class ARTIQIRGenerator(algorithm.Visitor):
|
||||||
typ = node.func.type.find()
|
typ = node.func.type.find()
|
||||||
|
|
||||||
if types.is_builtin(typ):
|
if types.is_builtin(typ):
|
||||||
insn = self.visit_builtin_call(node)
|
return self.visit_builtin_call(node)
|
||||||
|
|
||||||
# Temporary.
|
|
||||||
if node.iodelay is not None and not iodelay.is_const(node.iodelay, 0):
|
|
||||||
after_delay = self.add_block()
|
|
||||||
self.append(ir.Delay(node.iodelay,
|
|
||||||
{var_name: self.current_args[var_name]
|
|
||||||
for var_name in node.iodelay.free_vars()},
|
|
||||||
after_delay))
|
|
||||||
self.current_block = after_delay
|
|
||||||
|
|
||||||
return insn
|
|
||||||
|
|
||||||
if types.is_function(typ):
|
if types.is_function(typ):
|
||||||
func = self.visit(node.func)
|
func = self.visit(node.func)
|
||||||
|
|
|
@ -77,6 +77,9 @@ class Interleaver:
|
||||||
index, source_block = min(enumerate(source_blocks), key=time_after_block)
|
index, source_block = min(enumerate(source_blocks), key=time_after_block)
|
||||||
source_block_delay = iodelay_of_block(source_block)
|
source_block_delay = iodelay_of_block(source_block)
|
||||||
|
|
||||||
|
new_target_time = source_times[index] + source_block_delay
|
||||||
|
target_time_delta = new_target_time - target_time
|
||||||
|
|
||||||
target_terminator = target_block.terminator()
|
target_terminator = target_block.terminator()
|
||||||
if isinstance(target_terminator, (ir.Delay, ir.Branch)):
|
if isinstance(target_terminator, (ir.Delay, ir.Branch)):
|
||||||
target_terminator.set_target(source_block)
|
target_terminator.set_target(source_block)
|
||||||
|
@ -85,8 +88,15 @@ class Interleaver:
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
source_terminator = source_block.terminator()
|
||||||
|
if target_time_delta > 0:
|
||||||
|
assert isinstance(source_terminator, ir.Delay)
|
||||||
|
source_terminator.expr = iodelay.Const(target_time_delta)
|
||||||
|
else:
|
||||||
|
source_terminator.replace_with(ir.Branch(source_terminator.target()))
|
||||||
|
|
||||||
target_block = source_block
|
target_block = source_block
|
||||||
target_time += source_block_delay
|
target_time = new_target_time
|
||||||
|
|
||||||
new_source_block = postdom_tree.immediate_dominator(source_block)
|
new_source_block = postdom_tree.immediate_dominator(source_block)
|
||||||
assert (new_source_block is not None)
|
assert (new_source_block is not None)
|
||||||
|
@ -98,4 +108,4 @@ class Interleaver:
|
||||||
del source_times[index]
|
del source_times[index]
|
||||||
else:
|
else:
|
||||||
source_blocks[index] = new_source_block
|
source_blocks[index] = new_source_block
|
||||||
source_times[index] = target_time
|
source_times[index] = new_target_time
|
||||||
|
|
|
@ -7,7 +7,7 @@ import os
|
||||||
from pythonparser import ast, diagnostic
|
from pythonparser import ast, diagnostic
|
||||||
from llvmlite_artiq import ir as ll
|
from llvmlite_artiq import ir as ll
|
||||||
from ...language import core as language_core
|
from ...language import core as language_core
|
||||||
from .. import types, builtins, ir
|
from .. import types, builtins, ir, iodelay
|
||||||
|
|
||||||
|
|
||||||
llvoid = ll.VoidType()
|
llvoid = ll.VoidType()
|
||||||
|
@ -784,12 +784,6 @@ class LLVMIRGenerator:
|
||||||
return self.map(insn.operands[0])
|
return self.map(insn.operands[0])
|
||||||
elif insn.op == "now_mu":
|
elif insn.op == "now_mu":
|
||||||
return self.llbuilder.load(self.llbuiltin("now"), name=insn.name)
|
return self.llbuilder.load(self.llbuiltin("now"), name=insn.name)
|
||||||
elif insn.op == "delay_mu":
|
|
||||||
interval, = insn.operands
|
|
||||||
llnowptr = self.llbuiltin("now")
|
|
||||||
llnow = self.llbuilder.load(llnowptr)
|
|
||||||
lladjusted = self.llbuilder.add(llnow, self.map(interval))
|
|
||||||
return self.llbuilder.store(lladjusted, llnowptr)
|
|
||||||
elif insn.op == "at_mu":
|
elif insn.op == "at_mu":
|
||||||
time, = insn.operands
|
time, = insn.operands
|
||||||
return self.llbuilder.store(self.map(time), self.llbuiltin("now"))
|
return self.llbuilder.store(self.map(time), self.llbuiltin("now"))
|
||||||
|
@ -1068,8 +1062,6 @@ class LLVMIRGenerator:
|
||||||
def process_Branch(self, insn):
|
def process_Branch(self, insn):
|
||||||
return self.llbuilder.branch(self.map(insn.target()))
|
return self.llbuilder.branch(self.map(insn.target()))
|
||||||
|
|
||||||
process_Delay = process_Branch
|
|
||||||
|
|
||||||
def process_BranchIf(self, insn):
|
def process_BranchIf(self, insn):
|
||||||
return self.llbuilder.cbranch(self.map(insn.condition()),
|
return self.llbuilder.cbranch(self.map(insn.condition()),
|
||||||
self.map(insn.if_true()), self.map(insn.if_false()))
|
self.map(insn.if_true()), self.map(insn.if_false()))
|
||||||
|
@ -1150,3 +1142,16 @@ class LLVMIRGenerator:
|
||||||
|
|
||||||
return llexn
|
return llexn
|
||||||
|
|
||||||
|
def process_Delay(self, insn):
|
||||||
|
def map_delay(expr):
|
||||||
|
if isinstance(expr, iodelay.Const):
|
||||||
|
return ll.Constant(lli64, int(expr.value))
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
llnowptr = self.llbuiltin("now")
|
||||||
|
llnow = self.llbuilder.load(llnowptr)
|
||||||
|
lladjusted = self.llbuilder.add(llnow, map_delay(insn.expr))
|
||||||
|
self.llbuilder.store(lladjusted, llnowptr)
|
||||||
|
|
||||||
|
return self.llbuilder.branch(self.map(insn.target()))
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
# RUN: %python -m artiq.compiler.testbench.jit %s >%t
|
||||||
|
# RUN: OutputCheck %s --file-to-check=%t
|
||||||
|
|
||||||
|
def g():
|
||||||
|
with parallel:
|
||||||
|
with sequential:
|
||||||
|
print("A", now_mu())
|
||||||
|
delay_mu(2)
|
||||||
|
#
|
||||||
|
print("B", now_mu())
|
||||||
|
with sequential:
|
||||||
|
print("C", now_mu())
|
||||||
|
delay_mu(2)
|
||||||
|
#
|
||||||
|
print("D", now_mu())
|
||||||
|
delay_mu(2)
|
||||||
|
#
|
||||||
|
print("E", now_mu())
|
||||||
|
|
||||||
|
# CHECK-L: A 0
|
||||||
|
# CHECK-L: B 2
|
||||||
|
# CHECK-L: C 2
|
||||||
|
# CHECK-L: D 2
|
||||||
|
# CHECK-L: E 4
|
||||||
|
g()
|
|
@ -6,17 +6,20 @@ def g():
|
||||||
with sequential:
|
with sequential:
|
||||||
print("A", now_mu())
|
print("A", now_mu())
|
||||||
delay_mu(3)
|
delay_mu(3)
|
||||||
|
#
|
||||||
print("B", now_mu())
|
print("B", now_mu())
|
||||||
with sequential:
|
with sequential:
|
||||||
print("C", now_mu())
|
print("C", now_mu())
|
||||||
delay_mu(2)
|
delay_mu(2)
|
||||||
|
#
|
||||||
print("D", now_mu())
|
print("D", now_mu())
|
||||||
delay_mu(2)
|
delay_mu(2)
|
||||||
|
#
|
||||||
print("E", now_mu())
|
print("E", now_mu())
|
||||||
|
|
||||||
# CHECK-L: C 0
|
# CHECK-L: C 0
|
||||||
# CHECK-L: A 2
|
# CHECK-L: A 2
|
||||||
# CHECK-L: D 5
|
# CHECK-L: B 3
|
||||||
# CHECK-L: B 7
|
# CHECK-L: D 3
|
||||||
# CHECK-L: E 7
|
# CHECK-L: E 4
|
||||||
g()
|
g()
|
Loading…
Reference in New Issue