From dceb5ae501a538fabefc4c30a7b16d74a21bd0e2 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 5 Jul 2019 23:42:37 +0800 Subject: [PATCH 01/17] wrpll: Si590 I2C mux, CDC --- artiq/gateware/wrpll/__init__.py | 0 artiq/gateware/wrpll/si590.py | 111 +++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 artiq/gateware/wrpll/__init__.py create mode 100644 artiq/gateware/wrpll/si590.py diff --git a/artiq/gateware/wrpll/__init__.py b/artiq/gateware/wrpll/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/artiq/gateware/wrpll/si590.py b/artiq/gateware/wrpll/si590.py new file mode 100644 index 000000000..565cdea0d --- /dev/null +++ b/artiq/gateware/wrpll/si590.py @@ -0,0 +1,111 @@ +from migen import * +from migen.genlib.fsm import * +from migen.genlib.cdc import MultiReg, PulseSynchronizer, BlindTransfer + +from misoc.interconnect.csr import * + + +class ADPLLProgrammer(Module): + def __init__(self): + self.i2c_divider = Signal(16) + self.i2c_address = Signal(7) + + self.adpll = Signal(24) + self.stb = Signal() + self.busy = Signal() + self.nack = Signal() + + self.scl = Signal() + self.sda_i = Signal() + self.sda_o = Signal() + self.sda_oe = Signal() + + self.scl.attr.add("no_retiming") + self.sda_o.attr.add("no_retiming") + self.sda_oe.attr.add("no_retiming") + + +class Si590(Module, AutoCSR): + def __init__(self, pads): + self.gpio_enable = CSRStorage(reset=1) + self.gpio_in = CSRStatus(2) + self.gpio_out = CSRStorage(2) + self.gpio_oe = CSRStorage(2) + + self.i2c_divider = CSRStorage(16) + self.i2c_address = CSRStorage(7) + self.errors = CSR(2) + + # in helper clock domain + self.adpll = Signal(24) + self.adpll_stb = Signal() + + # # # + + programmer = ClockDomainsRenamer("helper")(ADPLLProgrammer()) + self.submodules += programmer + + self.i2c_divider.storage.attr.add("no_retiming") + self.i2c_address.storage.attr.add("no_retiming") + self.specials += [ + MultiReg(self.i2c_divider.storage, programmer.i2c_divider, "helper"), + MultiReg(self.i2c_address.storage, programmer.i2c_address, "helper") + ] + self.comb += [ + programmer.adpll.eq(self.adpll), + programmer.adpll_stb.eq(self.adpll_stb) + ] + + # SCL GPIO and mux + ts_scl = TSTriple(1) + self.specials += ts_scl.get_tristate(pads.scl) + + status = Signal() + self.comb += self.gpio_in.status[0].eq(status) + + self.specials += MultiReg(ts_scl.i, status) + self.gpio_enable.storage.attr.add("no_retiming") + self.comb += [ + If(self.gpio_enable.storage, + ts_scl.o.eq(self.gpio_out.storage[0]), + ts_scl.oe.eq(self.gpio_oe.storage[0]) + ).Else( + ts_scl.o.eq(programmer.scl), + ts_scl.oe.eq(1) + ) + ] + + # SDA GPIO and mux + ts_sda = TSTriple(1) + self.specials += ts_sda.get_tristate(pads.sda) + + status = Signal() + self.comb += self.gpio_in.status[1].eq(status) + + self.specials += MultiReg(ts_sda.i, status) + self.gpio_enable.storage.attr.add("no_retiming") + self.comb += [ + If(self.gpio_enable.storage, + ts_sda.o.eq(self.gpio_out.storage[1]), + ts_sda.oe.eq(self.gpio_oe.storage[1]) + ).Else( + ts_sda.o.eq(programmer.sda_o), + ts_sda.oe.eq(programmer.sda_oe) + ) + ] + self.specials += MultiReg(ts_sda.i, programmer.sda_i, "helper") + + # Error reporting + collision_cdc = BlindTransfer("helper", "sys") + self.submodules += collision_cdc + self.comb += collision_cdc.i.eq(programmer.stb & programmer.busy) + + nack_cdc = PulseSynchronizer("helper", "sys") + self.submodules += nack_cdc + self.comb += nack_cdc.i.eq(programmer.nack) + + for n, trig in enumerate([collision_cdc.o, nack_cdc.o]): + self.sync += [ + If(self.errors.re & self.errors.r[n], self.errors.w[n].eq(0)), + If(trig, self.errors.w[n].eq(1)) + ] From e4fff390a8f587b20b3ddb477aaf4975d9b9481d Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sun, 7 Jul 2019 09:39:55 +0800 Subject: [PATCH 02/17] si590 -> si549 Had mistaken the part numbers. --- artiq/gateware/wrpll/{si590.py => si549.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename artiq/gateware/wrpll/{si590.py => si549.py} (99%) diff --git a/artiq/gateware/wrpll/si590.py b/artiq/gateware/wrpll/si549.py similarity index 99% rename from artiq/gateware/wrpll/si590.py rename to artiq/gateware/wrpll/si549.py index 565cdea0d..fd87c2cfd 100644 --- a/artiq/gateware/wrpll/si590.py +++ b/artiq/gateware/wrpll/si549.py @@ -25,7 +25,7 @@ class ADPLLProgrammer(Module): self.sda_oe.attr.add("no_retiming") -class Si590(Module, AutoCSR): +class Si549(Module, AutoCSR): def __init__(self, pads): self.gpio_enable = CSRStorage(reset=1) self.gpio_in = CSRStatus(2) From 5f461d08cd9d5fe5ade6cc574198a042e16ae32a Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 9 Jul 2019 16:07:31 +0800 Subject: [PATCH 03/17] wrpll: add simple thls compiler --- artiq/gateware/wrpll/thls.py | 267 +++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 artiq/gateware/wrpll/thls.py diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py new file mode 100644 index 000000000..04c68628c --- /dev/null +++ b/artiq/gateware/wrpll/thls.py @@ -0,0 +1,267 @@ +import inspect +import ast +from copy import copy + + +class Isn: + def __init__(self, immediate=None, inputs=None, outputs=None): + if inputs is None: + inputs = [] + if outputs is None: + outputs = [] + self.immediate = immediate + self.inputs = inputs + self.outputs = outputs + + def __repr__(self): + r = "<" + r += self.__class__.__name__ + if self.immediate is not None: + r += " (" + str(self.immediate) + ")" + for inp in self.inputs: + r += " r" + str(inp) + if self.outputs: + r += " ->" + for outp in self.outputs: + r += " r" + str(outp) + r += ">" + return r + + +class NopIsn(Isn): + pass + + +class AddIsn(Isn): + pass + + +class SubIsn(Isn): + pass + + +class MulIsn(Isn): + pass + + +class ShiftIsn(Isn): + pass + + +class CopyIsn(Isn): + pass + + +class InputIsn(Isn): + pass + + +class OutputIsn(Isn): + pass + + +class ASTCompiler: + def __init__(self): + self.program = [] + self.data = [] + self.next_ssa_reg = -1 + self.constants = dict() + self.names = dict() + self.globals = dict() + + def get_ssa_reg(self): + r = self.next_ssa_reg + self.next_ssa_reg -= 1 + return r + + def add_global(self, name): + r = len(self.data) + self.data.append(0) + self.names[name] = r + self.globals[name] = r + return r + + def input(self, name): + target = self.get_ssa_reg() + self.program.append(InputIsn(outputs=[target])) + self.names[name] = target + + def emit(self, node): + if isinstance(node, ast.BinOp): + left = self.emit(node.left) + right = self.emit(node.right) + if isinstance(node.op, ast.Add): + cls = AddIsn + elif isinstance(node.op, ast.Sub): + cls = SubIsn + elif isinstance(node.op, ast.Mult): + cls = MulIsn + else: + raise NotImplementedError + output = self.get_ssa_reg() + self.program.append(cls(inputs=[left, right], outputs=[output])) + return output + elif isinstance(node, ast.Num): + if node.n in self.constants: + return self.constants[node.n] + else: + r = len(self.data) + self.data.append(node.n) + self.constants[node.n] = r + return r + elif isinstance(node, ast.Name): + return self.names[node.id] + elif isinstance(node, ast.Assign): + output = self.emit(node.value) + for target in node.targets: + assert isinstance(target, ast.Name) + self.names[target.id] = output + elif isinstance(node, ast.Return): + value = self.emit(node.value) + self.program.append(OutputIsn(inputs=[value])) + elif isinstance(node, ast.Global): + pass + else: + raise NotImplementedError + + +class Processor: + def __init__(self, multiplier_stages=2): + self.multiplier_stages = multiplier_stages + + def get_instruction_latency(self, isn): + return { + AddIsn: 2, + SubIsn: 2, + MulIsn: 1 + self.multiplier_stages, + ShiftIsn: 2, + CopyIsn: 1, + InputIsn: 1 + }[isn.__class__] + + +class Scheduler: + def __init__(self, processor, reserved_data, program): + self.processor = processor + self.reserved_data = reserved_data + self.used_registers = set(range(self.reserved_data)) + self.exits = dict() + self.program = program + self.remaining = copy(program) + self.output = [] + + def allocate_register(self): + r = min(set(range(max(self.used_registers) + 2)) - self.used_registers) + self.used_registers.add(r) + return r + + def free_register(self, r): + assert r >= self.reserved_data + self.used_registers.discard(r) + + def find_inputs(self, cycle, isn): + mapped_inputs = [] + for inp in isn.inputs: + if inp >= 0: + mapped_inputs.append(inp) + else: + found = False + for i in range(cycle): + if i in self.exits: + r, rm = self.exits[i] + if r == inp: + mapped_inputs.append(rm) + found = True + break + if not found: + return None + return mapped_inputs + + def schedule_one(self, isn): + cycle = len(self.output) + mapped_inputs = self.find_inputs(cycle, isn) + if mapped_inputs is None: + return False + + if isn.outputs: + latency = self.processor.get_instruction_latency(isn) + exit = cycle + latency + if exit in self.exits: + return False + + # Instruction can be scheduled + + self.remaining.remove(isn) + + for inp, minp in zip(isn.inputs, mapped_inputs): + can_free = inp < 0 and all(inp != rinp for risn in self.remaining for rinp in risn.inputs) + if can_free: + self.free_register(minp) + + if isn.outputs: + assert len(isn.outputs) == 1 + output = self.allocate_register() + self.exits[exit] = (isn.outputs[0], output) + self.output.append(isn.__class__(immediate=isn.immediate, inputs=mapped_inputs)) + + return True + + def schedule(self): + while self.remaining: + success = False + for isn in self.remaining: + if self.schedule_one(isn): + success = True + break + if not success: + self.output.append(NopIsn()) + self.output += [NopIsn()]*(max(self.exits.keys()) - len(self.output) + 1) + return self.output + + def pretty_print(self): + for cycle, isn in enumerate(self.output): + l = "{:4d} {:15}".format(cycle, str(isn)) + if cycle in self.exits: + l += " -> r{}".format(self.exits[cycle][1]) + print(l) + + +def compile(function): + node = ast.parse(inspect.getsource(function)) + assert isinstance(node, ast.Module) + assert len(node.body) == 1 + node = node.body[0] + assert isinstance(node, ast.FunctionDef) + assert len(node.args.args) == 1 + arg = node.args.args[0].arg + body = node.body + + astcompiler = ASTCompiler() + for node in body: + if isinstance(node, ast.Global): + for name in node.names: + astcompiler.add_global(name) + arg_r = astcompiler.input(arg) + for node in body: + astcompiler.emit(node) + print(astcompiler.data) + print(astcompiler.program) + + scheduler = Scheduler(Processor(), len(astcompiler.data), astcompiler.program) + scheduler.schedule() + scheduler.pretty_print() + + +a = 0 +b = 0 +c = 0 + +def foo(x): + global a, b, c + c = b + b = a + a = x + return 4748*a + 259*b - 155*c + + +compile(foo) From 34222b3f386d2faa04b462c3459c5340cf329dbe Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 9 Jul 2019 17:56:14 +0800 Subject: [PATCH 04/17] wrpll: encode thls program --- artiq/gateware/wrpll/thls.py | 97 +++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index 04c68628c..a3e97f32f 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -29,35 +29,28 @@ class Isn: class NopIsn(Isn): - pass - + opcode = 0 class AddIsn(Isn): - pass - + opcode = 1 class SubIsn(Isn): - pass - + opcode = 2 class MulIsn(Isn): - pass - + opcode = 3 class ShiftIsn(Isn): - pass - + opcode = 4 class CopyIsn(Isn): - pass - + opcode = 5 class InputIsn(Isn): - pass - + opcode = 6 class OutputIsn(Isn): - pass + opcode = 7 class ASTCompiler: @@ -126,8 +119,13 @@ class ASTCompiler: class Processor: - def __init__(self, multiplier_stages=2): + def __init__(self, data_width=32, multiplier_stages=2): + self.data_width = data_width self.multiplier_stages = multiplier_stages + self.program_rom_size = None + self.data_ram_size = None + self.opcode_bits = 3 + self.reg_bits = None def get_instruction_latency(self, isn): return { @@ -139,6 +137,29 @@ class Processor: InputIsn: 1 }[isn.__class__] + def encode_instruction(self, isn, exit): + opcode = isn.opcode + if isn.immediate is not None: + r0 = isn.immediate + if len(isn.inputs) >= 1: + r1 = isn.inputs[0] + else: + r1 = 0 + else: + if len(isn.inputs) >= 1: + r0 = isn.inputs[0] + else: + r0 = 0 + if len(isn.inputs) >= 2: + r1 = isn.inputs[1] + else: + r1 = 0 + r = 0 + for value, bits in ((exit, self.reg_bits), (r1, self.reg_bits), (r0, self.reg_bits), (opcode, self.opcode_bits)): + r <<= bits + r |= value + return r + class Scheduler: def __init__(self, processor, reserved_data, program): @@ -218,15 +239,36 @@ class Scheduler: self.output += [NopIsn()]*(max(self.exits.keys()) - len(self.output) + 1) return self.output + +class CompiledProgram: + def __init__(self, processor, program, exits, data, glbs): + self.processor = processor + self.program = program + self.exits = exits + self.data = data + self.globals = glbs + def pretty_print(self): - for cycle, isn in enumerate(self.output): + for cycle, isn in enumerate(self.program): l = "{:4d} {:15}".format(cycle, str(isn)) if cycle in self.exits: - l += " -> r{}".format(self.exits[cycle][1]) + l += " -> r{}".format(self.exits[cycle]) print(l) + def dimension_memories(self): + self.processor.program_rom_size = len(self.program) + self.processor.data_ram_size = len(self.data) + self.processor.reg_bits = (self.processor.data_ram_size - 1).bit_length() -def compile(function): + def encode(self): + r = [] + for i, isn in enumerate(self.program): + exit = self.exits.get(i, 0) + r.append(self.processor.encode_instruction(isn, exit)) + return r + + +def compile(processor, function): node = ast.parse(inspect.getsource(function)) assert isinstance(node, ast.Module) assert len(node.body) == 1 @@ -244,12 +286,16 @@ def compile(function): arg_r = astcompiler.input(arg) for node in body: astcompiler.emit(node) - print(astcompiler.data) - print(astcompiler.program) - scheduler = Scheduler(Processor(), len(astcompiler.data), astcompiler.program) + scheduler = Scheduler(processor, len(astcompiler.data), astcompiler.program) scheduler.schedule() - scheduler.pretty_print() + + return CompiledProgram( + processor=processor, + program=scheduler.output, + exits={k: v[1] for k,v in scheduler.exits.items()}, + data=astcompiler.data, + glbs=astcompiler.globals) a = 0 @@ -264,4 +310,7 @@ def foo(x): return 4748*a + 259*b - 155*c -compile(foo) +cp = compile(Processor(), foo) +cp.pretty_print() +cp.dimension_memories() +print(cp.encode()) From 831b3514d3e430c68da2a9079a666cabe9e09384 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 19 Jul 2019 16:27:29 +0800 Subject: [PATCH 05/17] wrpll/thls: stop at return statement --- artiq/gateware/wrpll/thls.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index a3e97f32f..dc95e343d 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -286,6 +286,8 @@ def compile(processor, function): arg_r = astcompiler.input(arg) for node in body: astcompiler.emit(node) + if isinstance(node, ast.Return): + break scheduler = Scheduler(processor, len(astcompiler.data), astcompiler.program) scheduler.schedule() From 623446f82cc9e9bbd36b5d26af84a0c7ceac6f7f Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 20 Jul 2019 18:50:57 +0800 Subject: [PATCH 06/17] wrpll/thls: simple simulation demo --- artiq/gateware/wrpll/thls.py | 200 ++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 13 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index dc95e343d..99ffbc17b 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -1,6 +1,10 @@ import inspect import ast from copy import copy +import operator +from functools import reduce + +from migen import * class Isn: @@ -40,17 +44,14 @@ class SubIsn(Isn): class MulIsn(Isn): opcode = 3 -class ShiftIsn(Isn): +class CopyIsn(Isn): opcode = 4 -class CopyIsn(Isn): +class InputIsn(Isn): opcode = 5 -class InputIsn(Isn): - opcode = 6 - class OutputIsn(Isn): - opcode = 7 + opcode = 6 class ASTCompiler: @@ -132,7 +133,6 @@ class Processor: AddIsn: 2, SubIsn: 2, MulIsn: 1 + self.multiplier_stages, - ShiftIsn: 2, CopyIsn: 1, InputIsn: 1 }[isn.__class__] @@ -160,6 +160,12 @@ class Processor: r |= value return r + def instruction_bits(self): + return 3*self.reg_bits + self.opcode_bits + + def implement(self, program, data): + return ProcessorImpl(self, program, data) + class Scheduler: def __init__(self, processor, reserved_data, program): @@ -292,14 +298,167 @@ def compile(processor, function): scheduler = Scheduler(processor, len(astcompiler.data), astcompiler.program) scheduler.schedule() + max_reg = max(max(max(isn.inputs + [0]) for isn in scheduler.output), max(v[1] for k, v in scheduler.exits.items())) + return CompiledProgram( processor=processor, program=scheduler.output, - exits={k: v[1] for k,v in scheduler.exits.items()}, - data=astcompiler.data, + exits={k: v[1] for k, v in scheduler.exits.items()}, + data=astcompiler.data + [0]*(max_reg - len(astcompiler.data) + 1), glbs=astcompiler.globals) +class BaseUnit(Module): + def __init__(self, data_width): + self.stb_i = Signal() + self.i0 = Signal(data_width) + self.i1 = Signal(data_width) + self.stb_o = Signal() + self.o = Signal(data_width) + + +class NopUnit(BaseUnit): + pass + + +class OpUnit(BaseUnit): + def __init__(self, op, data_width, stages): + BaseUnit.__init__(self, data_width) + + o = op(self.i0, self.i1) + stb_o = self.stb_i + for i in range(stages): + n_o = Signal(data_width) + n_stb_o = Signal() + self.sync += [ + n_o.eq(o), + n_stb_o.eq(stb_o) + ] + o = n_o + stb_o = n_stb_o + self.comb += [ + self.o.eq(o), + self.stb_o.eq(stb_o) + ] + + +class CopyUnit(BaseUnit): + def __init__(self, data_width): + BaseUnit.__init__(self, data_width) + + self.comb += [ + self.stb_o.eq(self.stb_i), + self.o.eq(self.i0) + ] + + +class InputUnit(BaseUnit): + def __init__(self, data_width, input_stb, input): + BaseUnit.__init__(self, data_width) + + # TODO + self.comb += [ + self.stb_o.eq(self.stb_i), + self.o.eq(42) + ] + + +class OutputUnit(BaseUnit): + def __init__(self, data_width, output_stb, output): + BaseUnit.__init__(self, data_width) + + self.sync += [ + output_stb.eq(self.stb_i), + output.eq(self.i0) + ] + + +class ProcessorImpl(Module): + def __init__(self, pd, program, data): + self.input_stb = Signal() + self.input = Signal(pd.data_width) + + self.output_stb = Signal() + self.output = Signal(pd.data_width) + + # # # + + program_mem = Memory(pd.instruction_bits(), pd.program_rom_size, init=program) + data_mem0 = Memory(pd.data_width, pd.data_ram_size, init=data) + data_mem1 = Memory(pd.data_width, pd.data_ram_size, init=data) + self.specials += program_mem, data_mem0, data_mem1 + + pc = Signal(pd.instruction_bits()) + pc_next = Signal.like(pc) + pc_en = Signal() + self.sync += pc.eq(pc_next) + self.comb += [ + If(pc_en, + pc_next.eq(pc + 1) + ).Else( + pc_next.eq(0) + ) + ] + program_mem_port = program_mem.get_port() + self.specials += program_mem_port + self.comb += program_mem_port.adr.eq(pc_next) + + # TODO + self.comb += pc_en.eq(1) + + s = 0 + opcode = Signal(pd.opcode_bits) + self.comb += opcode.eq(program_mem_port.dat_r[s:s+pd.opcode_bits]) + s += pd.opcode_bits + r0 = Signal(pd.reg_bits) + self.comb += r0.eq(program_mem_port.dat_r[s:s+pd.reg_bits]) + s += pd.reg_bits + r1 = Signal(pd.reg_bits) + self.comb += r1.eq(program_mem_port.dat_r[s:s+pd.reg_bits]) + s += pd.reg_bits + exit = Signal(pd.reg_bits) + self.comb += exit.eq(program_mem_port.dat_r[s:s+pd.reg_bits]) + + data_read_port0 = data_mem0.get_port() + data_read_port1 = data_mem1.get_port() + self.specials += data_read_port0, data_read_port1 + self.comb += [ + data_read_port0.adr.eq(r0), + data_read_port1.adr.eq(r1) + ] + + data_write_port = data_mem0.get_port(write_capable=True) + data_write_port_dup = data_mem1.get_port(write_capable=True) + self.specials += data_write_port, data_write_port_dup + self.comb += [ + data_write_port_dup.we.eq(data_write_port.we), + data_write_port_dup.adr.eq(data_write_port.adr), + data_write_port_dup.dat_w.eq(data_write_port.dat_w), + data_write_port.adr.eq(exit) + ] + + nop = NopUnit(pd.data_width) + adder = OpUnit(operator.add, pd.data_width, 1) + subtractor = OpUnit(operator.sub, pd.data_width, 1) + multiplier = OpUnit(operator.mul, pd.data_width, pd.multiplier_stages) + copier = CopyUnit(pd.data_width) + inu = InputUnit(pd.data_width, self.input_stb, self.input) + outu = OutputUnit(pd.data_width, self.output_stb, self.output) + units = [nop, adder, subtractor, multiplier, copier, inu, outu] + self.submodules += units + + for n, unit in enumerate(units): + self.sync += unit.stb_i.eq(opcode == n) + self.comb += [ + unit.i0.eq(data_read_port0.dat_r), + unit.i1.eq(data_read_port1.dat_r), + If(unit.stb_o, + data_write_port.we.eq(1), + data_write_port.dat_w.eq(unit.o) + ) + ] + + a = 0 b = 0 c = 0 @@ -312,7 +471,22 @@ def foo(x): return 4748*a + 259*b - 155*c -cp = compile(Processor(), foo) -cp.pretty_print() -cp.dimension_memories() -print(cp.encode()) +def simple_test(x): + a = 5 + 3 + return a*4 + + +if __name__ == "__main__": + proc = Processor() + cp = compile(proc, simple_test) + cp.pretty_print() + cp.dimension_memories() + print(cp.encode()) + proc_impl = proc.implement(cp.encode(), cp.data) + + def wait_result(): + while not (yield proc_impl.output_stb): + yield + result = yield proc_impl.output + print(result) + run_simulation(proc_impl, [wait_result()], vcd_name="test.vcd") From 7a5dcbe60e15d77a97980e02cc0652f7d7e8ce92 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Wed, 24 Jul 2019 18:51:33 +0800 Subject: [PATCH 07/17] wrpll/thls: support processor start/stop --- artiq/gateware/wrpll/thls.py | 64 +++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index 99ffbc17b..e28d812bd 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -5,6 +5,7 @@ import operator from functools import reduce from migen import * +from migen.genlib.fsm import * class Isn: @@ -53,6 +54,9 @@ class InputIsn(Isn): class OutputIsn(Isn): opcode = 6 +class EndIsn(Isn): + opcode = 7 + class ASTCompiler: def __init__(self): @@ -298,11 +302,14 @@ def compile(processor, function): scheduler = Scheduler(processor, len(astcompiler.data), astcompiler.program) scheduler.schedule() - max_reg = max(max(max(isn.inputs + [0]) for isn in scheduler.output), max(v[1] for k, v in scheduler.exits.items())) + program = copy(scheduler.output) + program.append(EndIsn()) + + max_reg = max(max(max(isn.inputs + [0]) for isn in program), max(v[1] for k, v in scheduler.exits.items())) return CompiledProgram( processor=processor, - program=scheduler.output, + program=program, exits={k: v[1] for k, v in scheduler.exits.items()}, data=astcompiler.data + [0]*(max_reg - len(astcompiler.data) + 1), glbs=astcompiler.globals) @@ -355,11 +362,11 @@ class CopyUnit(BaseUnit): class InputUnit(BaseUnit): def __init__(self, data_width, input_stb, input): BaseUnit.__init__(self, data_width) + self.buffer = Signal(data_width) - # TODO self.comb += [ self.stb_o.eq(self.stb_i), - self.o.eq(42) + self.o.eq(self.buffer) ] @@ -381,6 +388,8 @@ class ProcessorImpl(Module): self.output_stb = Signal() self.output = Signal(pd.data_width) + self.busy = Signal() + # # # program_mem = Memory(pd.instruction_bits(), pd.program_rom_size, init=program) @@ -403,9 +412,6 @@ class ProcessorImpl(Module): self.specials += program_mem_port self.comb += program_mem_port.adr.eq(pc_next) - # TODO - self.comb += pc_en.eq(1) - s = 0 opcode = Signal(pd.opcode_bits) self.comb += opcode.eq(program_mem_port.dat_r[s:s+pd.opcode_bits]) @@ -448,7 +454,7 @@ class ProcessorImpl(Module): self.submodules += units for n, unit in enumerate(units): - self.sync += unit.stb_i.eq(opcode == n) + self.sync += unit.stb_i.eq(pc_en & (opcode == n)) self.comb += [ unit.i0.eq(data_read_port0.dat_r), unit.i1.eq(data_read_port1.dat_r), @@ -458,6 +464,22 @@ class ProcessorImpl(Module): ) ] + fsm = FSM() + self.submodules += fsm + fsm.act("IDLE", + pc_en.eq(0), + NextValue(inu.buffer, self.input), + If(self.input_stb, NextState("PROCESSING")) + ) + fsm.act("PROCESSING", + self.busy.eq(1), + pc_en.eq(1), + If(opcode == EndIsn.opcode, + pc_en.eq(0), + NextState("IDLE") + ) + ) + a = 0 b = 0 @@ -472,8 +494,7 @@ def foo(x): def simple_test(x): - a = 5 + 3 - return a*4 + return x*2+2 if __name__ == "__main__": @@ -484,9 +505,22 @@ if __name__ == "__main__": print(cp.encode()) proc_impl = proc.implement(cp.encode(), cp.data) - def wait_result(): - while not (yield proc_impl.output_stb): + def send_values(values): + for value in values: + yield proc_impl.input.eq(value) + yield proc_impl.input_stb.eq(1) yield - result = yield proc_impl.output - print(result) - run_simulation(proc_impl, [wait_result()], vcd_name="test.vcd") + yield proc_impl.input.eq(0) + yield proc_impl.input_stb.eq(0) + yield + while (yield proc_impl.busy): + yield + @passive + def receive_values(callback): + while True: + while not (yield proc_impl.output_stb): + yield + callback((yield proc_impl.output)) + yield + + run_simulation(proc_impl, [send_values([42, 40]), receive_values(print)]) From f861459ace8cc945a447d5b0895c37d3ab8a539a Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 2 Aug 2019 13:23:16 +0800 Subject: [PATCH 08/17] wrpll: add filter algorithms (WIP) --- artiq/gateware/wrpll/filters.py | 54 +++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 artiq/gateware/wrpll/filters.py diff --git a/artiq/gateware/wrpll/filters.py b/artiq/gateware/wrpll/filters.py new file mode 100644 index 000000000..64dbe7c85 --- /dev/null +++ b/artiq/gateware/wrpll/filters.py @@ -0,0 +1,54 @@ +main_xn1 = 0 +main_xn2 = 0 +main_yn0 = 0 +main_yn1 = 0 +main_yn2 = 0 + +main_y0 = 23 +main_yr = 43 + +def filter_main(xn0): + global main_xn1, main_xn2, main_yn0, main_yn1, main_yn2 + + main_yn2 = main_yn1 + main_yn1 = main_yn0 + main_yn0 = ( + ((133450380908*((35184372088832*xn0 >> 44) + (17592186044417*main_xn1 >> 44))) >> 44) + + (29455872930889*main_yn1 >> 44) + - (12673794781453*main_yn2 >> 44)) + + main_xn2 = main_xn1 + main_xn1 = xn0 + + main_yn0 = min(main_yn0, main_y0 + main_yr) + main_yn0 = max(main_yn0, main_y0 - main_yr) + + return main_yn0 + + +helper_xn1 = 0 +helper_xn2 = 0 +helper_yn0 = 0 +helper_yn1 = 0 +helper_yn2 = 0 + +helper_y0 = 23 +helper_yr = 43 + +def filter_helper(xn0): + global helper_xn1, helper_xn2, helper_yn0, helper_yn1, helper_yn2 + + helper_yn2 = helper_yn1 + helper_yn1 = helper_yn0 + helper_yn0 = ( + ((133450380908*((35184372088832*xn0 >> 44) + (17592186044417*helper_xn1 >> 44))) >> 44) + + (29455872930889*helper_yn1 >> 44) + - (12673794781453*helper_yn2 >> 44)) + + helper_xn2 = helper_xn1 + helper_xn1 = xn0 + + helper_yn0 = min(helper_yn0, helper_y0 + helper_yr) + helper_yn0 = max(helper_yn0, helper_y0 - helper_yr) + + return helper_yn0 From 2776c5b16b555ce61a2ec1335020237910129b44 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 15:07:13 +0800 Subject: [PATCH 09/17] wrpll/thls: support mulshift --- artiq/gateware/wrpll/thls.py | 53 ++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index e28d812bd..ba2dde7f6 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -42,7 +42,7 @@ class AddIsn(Isn): class SubIsn(Isn): opcode = 2 -class MulIsn(Isn): +class MulShiftIsn(Isn): opcode = 3 class CopyIsn(Isn): @@ -86,18 +86,27 @@ class ASTCompiler: def emit(self, node): if isinstance(node, ast.BinOp): - left = self.emit(node.left) - right = self.emit(node.right) - if isinstance(node.op, ast.Add): - cls = AddIsn - elif isinstance(node.op, ast.Sub): - cls = SubIsn - elif isinstance(node.op, ast.Mult): - cls = MulIsn + if isinstance(node.op, ast.RShift): + if not isinstance(node.left, ast.BinOp) or not isinstance(node.left.op, ast.Mult): + raise NotImplementedError + if not isinstance(node.right, ast.Num): + raise NotImplementedError + left = self.emit(node.left.left) + right = self.emit(node.left.right) + cons = lambda **kwargs: MulShiftIsn(immediate=node.right.n, **kwargs) else: - raise NotImplementedError + left = self.emit(node.left) + right = self.emit(node.right) + if isinstance(node.op, ast.Add): + cons = AddIsn + elif isinstance(node.op, ast.Sub): + cons = SubIsn + elif isinstance(node.op, ast.Mult): + cons = lambda **kwargs: MulShiftIsn(immediate=0, **kwargs) + else: + raise NotImplementedError output = self.get_ssa_reg() - self.program.append(cls(inputs=[left, right], outputs=[output])) + self.program.append(cons(inputs=[left, right], outputs=[output])) return output elif isinstance(node, ast.Num): if node.n in self.constants: @@ -127,6 +136,7 @@ class Processor: def __init__(self, data_width=32, multiplier_stages=2): self.data_width = data_width self.multiplier_stages = multiplier_stages + self.multiplier_shifts = [] self.program_rom_size = None self.data_ram_size = None self.opcode_bits = 3 @@ -136,14 +146,14 @@ class Processor: return { AddIsn: 2, SubIsn: 2, - MulIsn: 1 + self.multiplier_stages, + MulShiftIsn: 1 + self.multiplier_stages, CopyIsn: 1, InputIsn: 1 }[isn.__class__] def encode_instruction(self, isn, exit): opcode = isn.opcode - if isn.immediate is not None: + if isn.immediate is not None and not isinstance(isn, MulShiftIsn): r0 = isn.immediate if len(isn.inputs) >= 1: r1 = isn.inputs[0] @@ -265,10 +275,13 @@ class CompiledProgram: l += " -> r{}".format(self.exits[cycle]) print(l) - def dimension_memories(self): + def dimension_processor(self): self.processor.program_rom_size = len(self.program) self.processor.data_ram_size = len(self.data) self.processor.reg_bits = (self.processor.data_ram_size - 1).bit_length() + for isn in self.program: + if isinstance(isn, MulShiftIsn) and isn.immediate not in self.processor.multiplier_shifts: + self.processor.multiplier_shifts.append(isn.immediate) def encode(self): r = [] @@ -446,7 +459,13 @@ class ProcessorImpl(Module): nop = NopUnit(pd.data_width) adder = OpUnit(operator.add, pd.data_width, 1) subtractor = OpUnit(operator.sub, pd.data_width, 1) - multiplier = OpUnit(operator.mul, pd.data_width, pd.multiplier_stages) + if pd.multiplier_shifts: + if len(pd.multiplier_shifts) != 1: + raise NotImplementedError + multiplier = OpUnit(lambda a, b: a * b >> pd.multiplier_shifts[0], + pd.data_width, pd.multiplier_stages) + else: + multiplier = NopUnit(pd.data_width) copier = CopyUnit(pd.data_width) inu = InputUnit(pd.data_width, self.input_stb, self.input) outu = OutputUnit(pd.data_width, self.output_stb, self.output) @@ -494,14 +513,14 @@ def foo(x): def simple_test(x): - return x*2+2 + return (x*2 >> 1) + 2 if __name__ == "__main__": proc = Processor() cp = compile(proc, simple_test) cp.pretty_print() - cp.dimension_memories() + cp.dimension_processor() print(cp.encode()) proc_impl = proc.implement(cp.encode(), cp.data) From 44969b03ad303a1efbdc7262867f54df5ebf6f28 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 15:55:13 +0800 Subject: [PATCH 10/17] wrpll/thls: rework instruction decoding --- artiq/gateware/wrpll/thls.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index ba2dde7f6..0cc244806 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -45,17 +45,19 @@ class SubIsn(Isn): class MulShiftIsn(Isn): opcode = 3 +# opcode = 4: MulShift with alternate shift + class CopyIsn(Isn): - opcode = 4 + opcode = 7 class InputIsn(Isn): - opcode = 5 + opcode = 8 class OutputIsn(Isn): - opcode = 6 + opcode = 9 class EndIsn(Isn): - opcode = 7 + opcode = 10 class ASTCompiler: @@ -139,7 +141,7 @@ class Processor: self.multiplier_shifts = [] self.program_rom_size = None self.data_ram_size = None - self.opcode_bits = 3 + self.opcode_bits = 4 self.reg_bits = None def get_instruction_latency(self, isn): @@ -472,8 +474,7 @@ class ProcessorImpl(Module): units = [nop, adder, subtractor, multiplier, copier, inu, outu] self.submodules += units - for n, unit in enumerate(units): - self.sync += unit.stb_i.eq(pc_en & (opcode == n)) + for unit in units: self.comb += [ unit.i0.eq(data_read_port0.dat_r), unit.i1.eq(data_read_port1.dat_r), @@ -483,6 +484,19 @@ class ProcessorImpl(Module): ) ] + decode_table = [ + (NopIsn.opcode, nop), + (AddIsn.opcode, adder), + (SubIsn.opcode, subtractor), + (MulShiftIsn.opcode, multiplier), + (MulShiftIsn.opcode + 1, multiplier), + (CopyIsn.opcode, copier), + (InputIsn.opcode, inu), + (OutputIsn.opcode, outu) + ] + for allocated_opcode, unit in decode_table: + self.sync += unit.stb_i.eq(pc_en & (opcode == allocated_opcode)) + fsm = FSM() self.submodules += fsm fsm.act("IDLE", From efc43142a69a81deffbd135719cd5933fda6f1d6 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 16:42:59 +0800 Subject: [PATCH 11/17] wrpll/thls: implement min/max --- artiq/gateware/wrpll/thls.py | 46 ++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index 0cc244806..f1b9a5947 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -47,6 +47,12 @@ class MulShiftIsn(Isn): # opcode = 4: MulShift with alternate shift +class MinIsn(Isn): + opcode = 5 + +class MaxIsn(Isn): + opcode = 6 + class CopyIsn(Isn): opcode = 7 @@ -110,6 +116,22 @@ class ASTCompiler: output = self.get_ssa_reg() self.program.append(cons(inputs=[left, right], outputs=[output])) return output + elif isinstance(node, ast.Call): + if not isinstance(node.func, ast.Name): + raise NotImplementedError + funcname = node.func.id + if node.keywords: + raise NotImplementedError + inputs = [self.emit(x) for x in node.args] + if funcname == "min": + cons = MinIsn + elif funcname == "max": + cons = MaxIsn + else: + raise NotImplementedError + output = self.get_ssa_reg() + self.program.append(cons(inputs=inputs, outputs=[output])) + return output elif isinstance(node, ast.Num): if node.n in self.constants: return self.constants[node.n] @@ -149,6 +171,8 @@ class Processor: AddIsn: 2, SubIsn: 2, MulShiftIsn: 1 + self.multiplier_stages, + MinIsn: 2, + MaxIsn: 2, CopyIsn: 1, InputIsn: 1 }[isn.__class__] @@ -364,6 +388,20 @@ class OpUnit(BaseUnit): ] +class SelectUnit(BaseUnit): + def __init__(self, op, data_width): + BaseUnit.__init__(self, data_width) + + self.sync += [ + self.stb_o.eq(self.stb_i), + If(op(self.i0, self.i1), + self.o.eq(self.i0) + ).Else( + self.o.eq(self.i1) + ) + ] + + class CopyUnit(BaseUnit): def __init__(self, data_width): BaseUnit.__init__(self, data_width) @@ -468,10 +506,12 @@ class ProcessorImpl(Module): pd.data_width, pd.multiplier_stages) else: multiplier = NopUnit(pd.data_width) + minu = SelectUnit(operator.lt, pd.data_width) + maxu = SelectUnit(operator.gt, pd.data_width) copier = CopyUnit(pd.data_width) inu = InputUnit(pd.data_width, self.input_stb, self.input) outu = OutputUnit(pd.data_width, self.output_stb, self.output) - units = [nop, adder, subtractor, multiplier, copier, inu, outu] + units = [nop, adder, subtractor, multiplier, minu, maxu, copier, inu, outu] self.submodules += units for unit in units: @@ -490,6 +530,8 @@ class ProcessorImpl(Module): (SubIsn.opcode, subtractor), (MulShiftIsn.opcode, multiplier), (MulShiftIsn.opcode + 1, multiplier), + (MinIsn.opcode, minu), + (MaxIsn.opcode, maxu), (CopyIsn.opcode, copier), (InputIsn.opcode, inu), (OutputIsn.opcode, outu) @@ -527,7 +569,7 @@ def foo(x): def simple_test(x): - return (x*2 >> 1) + 2 + return min((x*2 >> 1) + 2, 10) if __name__ == "__main__": From 19620948bf1d8aca3709d3eac966994d2479f817 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 17:04:17 +0800 Subject: [PATCH 12/17] wrpll/thls: implement signed numbers --- artiq/gateware/wrpll/thls.py | 37 +++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index f1b9a5947..b0e18ba72 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -132,13 +132,28 @@ class ASTCompiler: output = self.get_ssa_reg() self.program.append(cons(inputs=inputs, outputs=[output])) return output - elif isinstance(node, ast.Num): - if node.n in self.constants: - return self.constants[node.n] + elif isinstance(node, (ast.Num, ast.UnaryOp)): + if isinstance(node, ast.UnaryOp): + if not isinstance(node.operand, ast.Num): + raise NotImplementedError + if isinstance(node.op, ast.UAdd): + transform = lambda x: x + elif isinstance(node.op, ast.USub): + transform = operator.neg + elif isinstance(node.op, ast.Invert): + transform = operator.invert + else: + raise NotImplementedError + node = node.operand + else: + transform = lambda x: x + n = transform(node.n) + if n in self.constants: + return self.constants[n] else: r = len(self.data) - self.data.append(node.n) - self.constants[node.n] = r + self.data.append(n) + self.constants[n] = r return r elif isinstance(node, ast.Name): return self.names[node.id] @@ -357,10 +372,10 @@ def compile(processor, function): class BaseUnit(Module): def __init__(self, data_width): self.stb_i = Signal() - self.i0 = Signal(data_width) - self.i1 = Signal(data_width) + self.i0 = Signal((data_width, True)) + self.i1 = Signal((data_width, True)) self.stb_o = Signal() - self.o = Signal(data_width) + self.o = Signal((data_width, True)) class NopUnit(BaseUnit): @@ -436,10 +451,10 @@ class OutputUnit(BaseUnit): class ProcessorImpl(Module): def __init__(self, pd, program, data): self.input_stb = Signal() - self.input = Signal(pd.data_width) + self.input = Signal((pd.data_width, True)) self.output_stb = Signal() - self.output = Signal(pd.data_width) + self.output = Signal((pd.data_width, True)) self.busy = Signal() @@ -569,7 +584,7 @@ def foo(x): def simple_test(x): - return min((x*2 >> 1) + 2, 10) + return min((x*-2 >> 1) + 2 - 1000, 10) if __name__ == "__main__": From 5c3974c265014fb03a094a1f9b89fd50c4384a53 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 17:12:48 +0800 Subject: [PATCH 13/17] wrpll/thls: fix opcode decoding --- artiq/gateware/wrpll/thls.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index b0e18ba72..3cb545aa4 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -530,6 +530,7 @@ class ProcessorImpl(Module): self.submodules += units for unit in units: + self.sync += unit.stb_i.eq(0) self.comb += [ unit.i0.eq(data_read_port0.dat_r), unit.i1.eq(data_read_port1.dat_r), @@ -552,7 +553,7 @@ class ProcessorImpl(Module): (OutputIsn.opcode, outu) ] for allocated_opcode, unit in decode_table: - self.sync += unit.stb_i.eq(pc_en & (opcode == allocated_opcode)) + self.sync += If(pc_en & (opcode == allocated_opcode), unit.stb_i.eq(1)) fsm = FSM() self.submodules += fsm From 9331fafab03cd35e33e294fd30b9f339078dad80 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 17:24:40 +0800 Subject: [PATCH 14/17] wrpll/filters: new code from Weida --- artiq/gateware/wrpll/filters.py | 108 +++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/artiq/gateware/wrpll/filters.py b/artiq/gateware/wrpll/filters.py index 64dbe7c85..271124114 100644 --- a/artiq/gateware/wrpll/filters.py +++ b/artiq/gateware/wrpll/filters.py @@ -1,30 +1,4 @@ -main_xn1 = 0 -main_xn2 = 0 -main_yn0 = 0 -main_yn1 = 0 -main_yn2 = 0 - -main_y0 = 23 -main_yr = 43 - -def filter_main(xn0): - global main_xn1, main_xn2, main_yn0, main_yn1, main_yn2 - - main_yn2 = main_yn1 - main_yn1 = main_yn0 - main_yn0 = ( - ((133450380908*((35184372088832*xn0 >> 44) + (17592186044417*main_xn1 >> 44))) >> 44) - + (29455872930889*main_yn1 >> 44) - - (12673794781453*main_yn2 >> 44)) - - main_xn2 = main_xn1 - main_xn1 = xn0 - - main_yn0 = min(main_yn0, main_y0 + main_yr) - main_yn0 = max(main_yn0, main_y0 - main_yr) - - return main_yn0 - +import random as rand helper_xn1 = 0 helper_xn2 = 0 @@ -32,23 +6,81 @@ helper_yn0 = 0 helper_yn1 = 0 helper_yn2 = 0 -helper_y0 = 23 -helper_yr = 43 +previous_helper_tag = 0 -def filter_helper(xn0): - global helper_xn1, helper_xn2, helper_yn0, helper_yn1, helper_yn2 +main_xn1 = 0 +main_xn2 = 0 +main_yn0 = 0 +main_yn1 = 0 +main_yn2 = 0 + + +def filter_helper(helper_tag): + global helper_xn1, helper_xn2, helper_yn0, \ + helper_yn1, helper_yn2, previous_helper_tag + + helper_xn0 = helper_tag - previous_helper_tag - 1 << 15 + # This 1 << 15 is not an operation for filter. It is a constant. + # However, different loop may use 1 << 14 or 1 << 15. + + helper_yr = 4294967296 helper_yn2 = helper_yn1 helper_yn1 = helper_yn0 helper_yn0 = ( - ((133450380908*((35184372088832*xn0 >> 44) + (17592186044417*helper_xn1 >> 44))) >> 44) - + (29455872930889*helper_yn1 >> 44) - - (12673794781453*helper_yn2 >> 44)) + ((284885689*((217319150*helper_xn0 >> 44) + + (-17591968725107*helper_xn1 >> 44))) >> (44)) + + (-35184372088832*helper_yn1 >> 44) - + (17592186044416*helper_yn2 >> 44)) + # There is a 44 with (). All the other 44 will be a same constant value. + # But the () one can be different constant than others helper_xn2 = helper_xn1 - helper_xn1 = xn0 + helper_xn1 = helper_xn0 + + previous_helper_tag = helper_tag + + helper_yn0 = min(helper_yn0, helper_yr) + helper_yn0 = max(helper_yn0, - helper_yr) - helper_yn0 = min(helper_yn0, helper_y0 + helper_yr) - helper_yn0 = max(helper_yn0, helper_y0 - helper_yr) - return helper_yn0 + + +def main_filter(main_xn0): + + global main_xn1, main_xn2, main_yn0, main_yn1, main_yn2 + + main_yr = 4294967296 + + main_yn2 = main_yn1 + main_yn1 = main_yn0 + main_yn0 = ( + ((133450380908*(((35184372088832*main_xn0) >> 44) + + ((17592186044417*main_xn1) >> 44))) >> (44)) + + ((29455872930889*main_yn1) >> 44) - + ((12673794781453*main_yn2) >> 44)) + # There is a 44 with (). All the other 44 will be a same constant value. + # But the () one can be different constant than others + + main_xn2 = main_xn1 + main_xn1 = main_xn0 + + main_yn0 = min(main_yn0, main_yr) + main_yn0 = max(main_yn0, - main_yr) + + return main_yn0 + + +def main(): + i = 0 + helper_data = [] + main_data = [] + while 1: + helper_data.append(filter_helper(rand.randint(-128, 128)+i*32768)) + print(i, helper_data[i]) + main_data.append(main_filter(rand.randint(-128, 128))) + print(i, main_data[i]) + i = i+1 + +if __name__ == '__main__': + main() From 24082b687e1ed917cfbbc434c0fc373e79d5e949 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 17:58:22 +0800 Subject: [PATCH 15/17] wrpll/filters: clean up and make compatible with thls --- artiq/gateware/wrpll/filters.py | 38 ++++++--------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/artiq/gateware/wrpll/filters.py b/artiq/gateware/wrpll/filters.py index 271124114..58b664754 100644 --- a/artiq/gateware/wrpll/filters.py +++ b/artiq/gateware/wrpll/filters.py @@ -1,5 +1,3 @@ -import random as rand - helper_xn1 = 0 helper_xn2 = 0 helper_yn0 = 0 @@ -15,13 +13,11 @@ main_yn1 = 0 main_yn2 = 0 -def filter_helper(helper_tag): +def helper(helper_tag): global helper_xn1, helper_xn2, helper_yn0, \ helper_yn1, helper_yn2, previous_helper_tag - helper_xn0 = helper_tag - previous_helper_tag - 1 << 15 - # This 1 << 15 is not an operation for filter. It is a constant. - # However, different loop may use 1 << 14 or 1 << 15. + helper_xn0 = helper_tag - previous_helper_tag - 32768 helper_yr = 4294967296 @@ -29,11 +25,9 @@ def filter_helper(helper_tag): helper_yn1 = helper_yn0 helper_yn0 = ( ((284885689*((217319150*helper_xn0 >> 44) + - (-17591968725107*helper_xn1 >> 44))) >> (44)) + + (-17591968725107*helper_xn1 >> 44))) >> 44) + (-35184372088832*helper_yn1 >> 44) - (17592186044416*helper_yn2 >> 44)) - # There is a 44 with (). All the other 44 will be a same constant value. - # But the () one can be different constant than others helper_xn2 = helper_xn1 helper_xn1 = helper_xn0 @@ -41,13 +35,12 @@ def filter_helper(helper_tag): previous_helper_tag = helper_tag helper_yn0 = min(helper_yn0, helper_yr) - helper_yn0 = max(helper_yn0, - helper_yr) + helper_yn0 = max(helper_yn0, 0 - helper_yr) return helper_yn0 -def main_filter(main_xn0): - +def main(main_xn0): global main_xn1, main_xn2, main_yn0, main_yn1, main_yn2 main_yr = 4294967296 @@ -56,31 +49,14 @@ def main_filter(main_xn0): main_yn1 = main_yn0 main_yn0 = ( ((133450380908*(((35184372088832*main_xn0) >> 44) + - ((17592186044417*main_xn1) >> 44))) >> (44)) + + ((17592186044417*main_xn1) >> 44))) >> 44) + ((29455872930889*main_yn1) >> 44) - ((12673794781453*main_yn2) >> 44)) - # There is a 44 with (). All the other 44 will be a same constant value. - # But the () one can be different constant than others main_xn2 = main_xn1 main_xn1 = main_xn0 main_yn0 = min(main_yn0, main_yr) - main_yn0 = max(main_yn0, - main_yr) + main_yn0 = max(main_yn0, 0 - main_yr) return main_yn0 - - -def main(): - i = 0 - helper_data = [] - main_data = [] - while 1: - helper_data.append(filter_helper(rand.randint(-128, 128)+i*32768)) - print(i, helper_data[i]) - main_data.append(main_filter(rand.randint(-128, 128))) - print(i, main_data[i]) - i = i+1 - -if __name__ == '__main__': - main() From 1fd2322662b8148e4551d21fb050e72471fbe020 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 15 Aug 2019 23:16:17 +0800 Subject: [PATCH 16/17] wrpll/thls: implement global writeback --- artiq/gateware/wrpll/thls.py | 37 +++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/artiq/gateware/wrpll/thls.py b/artiq/gateware/wrpll/thls.py index 3cb545aa4..3f5aa6425 100644 --- a/artiq/gateware/wrpll/thls.py +++ b/artiq/gateware/wrpll/thls.py @@ -3,6 +3,7 @@ import ast from copy import copy import operator from functools import reduce +from collections import OrderedDict from migen import * from migen.genlib.fsm import * @@ -73,7 +74,7 @@ class ASTCompiler: self.next_ssa_reg = -1 self.constants = dict() self.names = dict() - self.globals = dict() + self.globals = OrderedDict() def get_ssa_reg(self): r = self.next_ssa_reg @@ -81,11 +82,11 @@ class ASTCompiler: return r def add_global(self, name): - r = len(self.data) - self.data.append(0) - self.names[name] = r - self.globals[name] = r - return r + if name not in self.globals: + r = len(self.data) + self.data.append(0) + self.names[name] = r + self.globals[name] = r def input(self, name): target = self.get_ssa_reg() @@ -266,11 +267,20 @@ class Scheduler: return False if isn.outputs: + # check that exit slot is free latency = self.processor.get_instruction_latency(isn) exit = cycle + latency if exit in self.exits: return False + # avoid RAW hazard with global writeback + for output in isn.outputs: + if output >= 0: + for risn in self.remaining: + for inp in risn.inputs: + if inp == output: + return False + # Instruction can be scheduled self.remaining.remove(isn) @@ -282,7 +292,10 @@ class Scheduler: if isn.outputs: assert len(isn.outputs) == 1 - output = self.allocate_register() + if isn.outputs[0] < 0: + output = self.allocate_register() + else: + output = isn.outputs[0] self.exits[exit] = (isn.outputs[0], output) self.output.append(isn.__class__(immediate=isn.immediate, inputs=mapped_inputs)) @@ -352,6 +365,10 @@ def compile(processor, function): astcompiler.emit(node) if isinstance(node, ast.Return): break + for glbl, location in astcompiler.globals.items(): + new_location = astcompiler.names[glbl] + if new_location != location: + astcompiler.program.append(CopyIsn(inputs=[new_location], outputs=[location])) scheduler = Scheduler(processor, len(astcompiler.data), astcompiler.program) scheduler.schedule() @@ -585,7 +602,9 @@ def foo(x): def simple_test(x): - return min((x*-2 >> 1) + 2 - 1000, 10) + global a + a = a + (x*4 >> 1) + return a if __name__ == "__main__": @@ -614,4 +633,4 @@ if __name__ == "__main__": callback((yield proc_impl.output)) yield - run_simulation(proc_impl, [send_values([42, 40]), receive_values(print)]) + run_simulation(proc_impl, [send_values([42, 40, 10, 10]), receive_values(print)]) From 959679d8b70577bf3cdbb74e465c70bf5992f266 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 27 Aug 2019 18:02:05 +0800 Subject: [PATCH 17/17] wrpll: add I2CMasterMachine --- artiq/gateware/wrpll/si549.py | 166 ++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 6 deletions(-) diff --git a/artiq/gateware/wrpll/si549.py b/artiq/gateware/wrpll/si549.py index fd87c2cfd..652e3f0b5 100644 --- a/artiq/gateware/wrpll/si549.py +++ b/artiq/gateware/wrpll/si549.py @@ -5,6 +5,148 @@ from migen.genlib.cdc import MultiReg, PulseSynchronizer, BlindTransfer from misoc.interconnect.csr import * +class I2CClockGen(Module): + def __init__(self, width): + self.load = Signal(width) + self.clk2x = Signal() + + cnt = Signal.like(self.load) + self.comb += [ + self.clk2x.eq(cnt == 0), + ] + self.sync += [ + If(self.clk2x, + cnt.eq(self.load), + ).Else( + cnt.eq(cnt - 1), + ) + ] + + +class I2CMasterMachine(Module): + def __init__(self, clock_width): + self.scl = Signal(reset=1) + self.sda_o = Signal(reset=1) + self.sda_i = Signal() + + self.submodules.cg = CEInserter()(I2CClockGen(clock_width)) + self.idle = Signal() + self.start = Signal() + self.stop = Signal() + self.write = Signal() + self.read = Signal() + self.ack = Signal() + self.data = Signal(8) + + ### + + busy = Signal() + bits = Signal(4) + + fsm = CEInserter()(FSM("IDLE")) + self.submodules += fsm + + fsm.act("IDLE", + If(self.start, + NextState("START0"), + ).Elif(self.stop & self.start, + NextState("RESTART0"), + ).Elif(self.stop, + NextState("STOP0"), + ).Elif(self.write, + NextValue(bits, 8), + NextState("WRITE0"), + ).Elif(self.read, + NextValue(bits, 8), + NextState("READ0"), + ) + ) + + fsm.act("START0", + NextValue(self.scl, 1), + NextState("START1")) + fsm.act("START1", + NextValue(self.sda_o, 0), + NextState("IDLE")) + + fsm.act("RESTART0", + NextValue(self.scl, 0), + NextState("RESTART1")) + fsm.act("RESTART1", + NextValue(self.sda_o, 1), + NextState("START0")) + + fsm.act("STOP0", + NextValue(self.scl, 0), + NextState("STOP1")) + fsm.act("STOP1", + NextValue(self.scl, 1), + NextValue(self.sda_o, 0), + NextState("STOP2")) + fsm.act("STOP2", + NextValue(self.sda_o, 1), + NextState("IDLE")) + + fsm.act("WRITE0", + NextValue(self.scl, 0), + If(bits == 0, + NextValue(self.sda_o, 1), + NextState("READACK0"), + ).Else( + NextValue(self.sda_o, self.data[7]), + NextState("WRITE1"), + ) + ) + fsm.act("WRITE1", + NextValue(self.scl, 1), + NextValue(self.data[1:], self.data[:-1]), + NextValue(bits, bits - 1), + NextState("WRITE0"), + ) + fsm.act("READACK0", + NextValue(self.scl, 1), + NextState("READACK1"), + ) + fsm.act("READACK1", + NextValue(self.ack, ~self.sda_i), + NextState("IDLE") + ) + + fsm.act("READ0", + NextValue(self.scl, 0), + NextState("READ1"), + ) + fsm.act("READ1", + NextValue(self.data[0], self.sda_i), + NextValue(self.scl, 0), + If(bits == 0, + NextValue(self.sda_o, ~self.ack), + NextState("WRITEACK0"), + ).Else( + NextValue(self.sda_o, 1), + NextState("READ2"), + ) + ) + fsm.act("READ2", + NextValue(self.scl, 1), + NextValue(self.data[:-1], self.data[1:]), + NextValue(bits, bits - 1), + NextState("READ1"), + ) + fsm.act("WRITEACK0", + NextValue(self.scl, 1), + NextState("IDLE"), + ) + + run = Signal() + self.comb += [ + run.eq(self.start | self.stop | self.write | self.read), + self.idle.eq(~run & fsm.ongoing("IDLE")), + self.cg.ce.eq(~self.idle), + fsm.ce.eq(run | self.cg.clk2x), + ] + + class ADPLLProgrammer(Module): def __init__(self): self.i2c_divider = Signal(16) @@ -18,11 +160,21 @@ class ADPLLProgrammer(Module): self.scl = Signal() self.sda_i = Signal() self.sda_o = Signal() - self.sda_oe = Signal() self.scl.attr.add("no_retiming") self.sda_o.attr.add("no_retiming") - self.sda_oe.attr.add("no_retiming") + + # # # + + master = I2CMasterMachine(16) + self.submodules += master + + self.comb += [ + master.cg.load.eq(self.i2c_divider.storage), + self.scl.eq(master.scl), + master.sda_i.eq(self.sda_i), + self.sda_o.eq(master.sda_o) + ] class Si549(Module, AutoCSR): @@ -56,6 +208,10 @@ class Si549(Module, AutoCSR): programmer.adpll_stb.eq(self.adpll_stb) ] + self.gpio_enable.storage.attr.add("no_retiming") + self.gpio_out.storage.attr.add("no_retiming") + self.gpio_oe.storage.attr.add("no_retiming") + # SCL GPIO and mux ts_scl = TSTriple(1) self.specials += ts_scl.get_tristate(pads.scl) @@ -64,7 +220,6 @@ class Si549(Module, AutoCSR): self.comb += self.gpio_in.status[0].eq(status) self.specials += MultiReg(ts_scl.i, status) - self.gpio_enable.storage.attr.add("no_retiming") self.comb += [ If(self.gpio_enable.storage, ts_scl.o.eq(self.gpio_out.storage[0]), @@ -83,14 +238,13 @@ class Si549(Module, AutoCSR): self.comb += self.gpio_in.status[1].eq(status) self.specials += MultiReg(ts_sda.i, status) - self.gpio_enable.storage.attr.add("no_retiming") self.comb += [ If(self.gpio_enable.storage, ts_sda.o.eq(self.gpio_out.storage[1]), ts_sda.oe.eq(self.gpio_oe.storage[1]) ).Else( - ts_sda.o.eq(programmer.sda_o), - ts_sda.oe.eq(programmer.sda_oe) + ts_sda.o.eq(0), + ts_sda.oe.eq(~programmer.sda_o) ) ] self.specials += MultiReg(ts_sda.i, programmer.sda_i, "helper")