from functools import reduce from operator import or_ from itertools import tee from nmigen import * from nmigen.lib.coding import PriorityEncoder from .isa import * from .stage import * from .csr import * from .units.adder import * from .units.compare import * from .units.debug import * from .units.decoder import * from .units.divider import * from .units.exception import * from .units.fetch import * from .units.rvficon import * from .units.loadstore import * from .units.logic import * from .units.multiplier import * from .units.predict import * from .units.shifter import * from .units.trigger import * from .units.debug.jtag import jtag_layout from .wishbone import wishbone_layout, WishboneArbiter __all__ = ["Minerva"] _af_layout = [ ("pc", (33, True)), ] _fd_layout = [ ("pc", 32), ("instruction", 32), ("fetch_error", 1), ("fetch_badaddr", 30) ] _dx_layout = [ ("pc", 32), ("instruction", 32), ("fetch_error", 1), ("fetch_badaddr", 30), ("illegal", 1), ("rd", 5), ("rs1", 5), ("rd_we", 1), ("rs1_re", 1), ("src1", 32), ("src2", 32), ("store_operand", 32), ("bypass_x", 1), ("bypass_m", 1), ("funct3", 3), ("load", 1), ("store", 1), ("adder_sub", 1), ("logic", 1), ("multiply", 1), ("divide", 1), ("shift", 1), ("direction", 1), ("sext", 1), ("jump", 1), ("compare", 1), ("branch", 1), ("branch_target", 32), ("branch_predict_taken", 1), ("fence_i", 1), ("csr", 1), ("csr_adr", 12), ("csr_we", 1), ("ecall", 1), ("ebreak", 1), ("mret", 1), ] _xm_layout = [ ("pc", 32), ("instruction", 32), ("fetch_error", 1), ("fetch_badaddr", 30), ("illegal", 1), ("loadstore_misaligned", 1), ("ecall", 1), ("ebreak", 1), ("rd", 5), ("rd_we", 1), ("bypass_m", 1), ("funct3", 3), ("result", 32), ("shift", 1), ("load", 1), ("store", 1), ("store_data", 32), ("compare", 1), ("multiply", 1), ("divide", 1), ("condition_met", 1), ("branch_target", 32), ("branch_taken", 1), ("branch_predict_taken", 1), ("csr", 1), ("csr_adr", 12), ("csr_we", 1), ("csr_result", 32), ("mret", 1), ("exception", 1) ] _mw_layout = [ ("pc", 32), ("rd", 5), ("rd_we", 1), ("funct3", 3), ("result", 32), ("load", 1), ("load_data", 32), ("multiply", 1), ("exception", 1) ] class Minerva(Elaboratable): def __init__(self, reset_address=0x00000000, with_icache=False, icache_nways=1, icache_nlines=32, icache_nwords=4, icache_base=0, icache_limit=2**31, with_dcache=False, dcache_nways=1, dcache_nlines=32, dcache_nwords=4, dcache_base=0, dcache_limit=2**31, with_muldiv=False, with_debug=False, with_trigger=False, nb_triggers=8, with_rvfi=False): self.external_interrupt = Signal(32) self.timer_interrupt = Signal() self.software_interrupt = Signal() self.ibus = Record(wishbone_layout) self.dbus = Record(wishbone_layout) if with_debug: self.jtag = Record(jtag_layout) if with_rvfi: self.rvfi = Record(rvfi_layout) self.reset_address = reset_address self.with_icache = with_icache self.icache_args = icache_nways, icache_nlines, icache_nwords, icache_base, icache_limit self.with_dcache = with_dcache self.dcache_args = dcache_nways, dcache_nlines, dcache_nwords, dcache_base, dcache_limit self.with_muldiv = with_muldiv self.with_debug = with_debug self.with_trigger = with_trigger self.nb_triggers = nb_triggers self.with_rvfi = with_rvfi def elaborate(self, platform): cpu = Module() # pipeline stages a = cpu.submodules.a = Stage(None, _af_layout) f = cpu.submodules.f = Stage(_af_layout, _fd_layout) d = cpu.submodules.d = Stage(_fd_layout, _dx_layout) x = cpu.submodules.x = Stage(_dx_layout, _xm_layout) m = cpu.submodules.m = Stage(_xm_layout, _mw_layout) w = cpu.submodules.w = Stage(_mw_layout, None) stages = a, f, d, x, m, w sources, sinks = tee(stages) next(sinks) for s1, s2 in zip(sources, sinks): cpu.d.comb += s1.source.connect(s2.sink) a.source.pc.reset = self.reset_address - 4 cpu.d.comb += a.valid.eq(Const(1)) # units pc_sel = cpu.submodules.pc_sel = PCSelector() data_sel = cpu.submodules.data_sel = DataSelector() adder = cpu.submodules.adder = Adder() compare = cpu.submodules.compare = CompareUnit() decoder = cpu.submodules.decoder = InstructionDecoder(self.with_muldiv) exception = cpu.submodules.exception = ExceptionUnit() logic = cpu.submodules.logic = LogicUnit() predict = cpu.submodules.predict = BranchPredictor() shifter = cpu.submodules.shifter = Shifter() if self.with_icache: fetch = cpu.submodules.fetch = CachedFetchUnit(*self.icache_args) else: fetch = cpu.submodules.fetch = BareFetchUnit() if self.with_dcache: loadstore = cpu.submodules.loadstore = CachedLoadStoreUnit(*self.dcache_args) else: loadstore = cpu.submodules.loadstore = BareLoadStoreUnit() if self.with_muldiv: multiplier = Multiplier() if not self.with_rvfi else DummyMultiplier() divider = Divider() if not self.with_rvfi else DummyDivider() cpu.submodules.multiplier = multiplier cpu.submodules.divider = divider if self.with_debug: debug = cpu.submodules.debug = DebugUnit() if self.with_trigger: trigger = cpu.submodules.trigger = TriggerUnit(self.nb_triggers) if self.with_rvfi: rvficon = cpu.submodules.rvficon = RVFIController() # register files gprf = Memory(width=32, depth=32) gprf_rp1 = gprf.read_port() gprf_rp2 = gprf.read_port() gprf_wp = gprf.write_port() cpu.submodules += gprf_rp1, gprf_rp2, gprf_wp csrf = cpu.submodules.csrf = CSRFile() csrf_rp = csrf.read_port() csrf_wp = csrf.write_port() csrf.add_csrs(exception.iter_csrs()) if self.with_debug: csrf.add_csrs(debug.iter_csrs()) if self.with_trigger: csrf.add_csrs(trigger.iter_csrs()) # pipeline logic cpu.d.comb += [ pc_sel.f_pc.eq(f.sink.pc), pc_sel.d_pc.eq(d.sink.pc), pc_sel.d_branch_predict_taken.eq(predict.d_branch_taken), pc_sel.d_branch_target.eq(predict.d_branch_target), pc_sel.d_valid.eq(d.valid), pc_sel.x_pc.eq(x.sink.pc), pc_sel.x_fence_i.eq(x.sink.fence_i), pc_sel.x_valid.eq(x.valid), pc_sel.m_branch_predict_taken.eq(m.sink.branch_predict_taken), pc_sel.m_branch_taken.eq(m.sink.branch_taken), pc_sel.m_branch_target.eq(m.sink.branch_target), pc_sel.m_exception.eq(exception.m_raise), pc_sel.m_mret.eq(m.sink.mret), pc_sel.m_valid.eq(m.valid), pc_sel.mtvec_r_base.eq(exception.mtvec.r.base), pc_sel.mepc_r_base.eq(exception.mepc.r.base) ] cpu.d.comb += [ fetch.a_pc.eq(pc_sel.a_pc), fetch.a_stall.eq(a.stall), fetch.a_valid.eq(a.valid), fetch.f_stall.eq(f.stall), fetch.f_valid.eq(f.valid), fetch.ibus.connect(self.ibus) ] m.stall_on(fetch.a_busy & a.valid) m.stall_on(fetch.f_busy & f.valid) if self.with_icache: flush_icache = x.sink.fence_i & x.valid if self.with_debug: flush_icache |= debug.resumereq cpu.d.comb += [ fetch.f_pc.eq(f.sink.pc), fetch.a_flush.eq(flush_icache) ] cpu.d.comb += [ decoder.instruction.eq(d.sink.instruction) ] if self.with_debug: with cpu.If(debug.halt & debug.halted): cpu.d.comb += gprf_rp1.addr.eq(debug.gprf_addr) with cpu.Elif(~d.stall): cpu.d.comb += gprf_rp1.addr.eq(fetch.f_instruction[15:20]) with cpu.Else(): cpu.d.comb += gprf_rp1.addr.eq(decoder.rs1) cpu.d.comb += debug.gprf_dat_r.eq(gprf_rp1.data) else: with cpu.If(~d.stall): cpu.d.comb += gprf_rp1.addr.eq(fetch.f_instruction[15:20]) with cpu.Else(): cpu.d.comb += gprf_rp1.addr.eq(decoder.rs1) with cpu.If(~d.stall): cpu.d.comb += gprf_rp2.addr.eq(fetch.f_instruction[20:25]) with cpu.Else(): cpu.d.comb += gprf_rp2.addr.eq(decoder.rs2) with cpu.If(~f.stall): cpu.d.sync += csrf_rp.addr.eq(fetch.f_instruction[20:32]) cpu.d.comb += csrf_rp.en.eq(decoder.csr & d.valid) # CSR set/clear instructions are translated to logic operations. x_csr_set_clear = x.sink.funct3[1] x_csr_clear = x_csr_set_clear & x.sink.funct3[0] x_csr_fmt_i = x.sink.funct3[2] x_csr_src1 = Mux(x_csr_fmt_i, x.sink.rs1, x.sink.src1) x_csr_src1 = Mux(x_csr_clear, ~x_csr_src1, x_csr_src1) x_csr_logic_op = x.sink.funct3 | 0b100 cpu.d.comb += [ logic.op.eq(Mux(x.sink.csr, x_csr_logic_op, x.sink.funct3)), logic.src1.eq(Mux(x.sink.csr, x_csr_src1, x.sink.src1)), logic.src2.eq(x.sink.src2) ] cpu.d.comb += [ adder.sub.eq(x.sink.adder_sub), adder.src1.eq(x.sink.src1), adder.src2.eq(x.sink.src2), ] if self.with_muldiv: cpu.d.comb += [ multiplier.x_op.eq(x.sink.funct3), multiplier.x_src1.eq(x.sink.src1), multiplier.x_src2.eq(x.sink.src2), multiplier.x_stall.eq(x.stall), multiplier.m_stall.eq(m.stall) ] cpu.d.comb += [ divider.x_op.eq(x.sink.funct3), divider.x_src1.eq(x.sink.src1), divider.x_src2.eq(x.sink.src2), divider.x_valid.eq(x.sink.valid), divider.x_stall.eq(x.stall) ] m.stall_on(divider.m_busy) cpu.d.comb += [ shifter.x_direction.eq(x.sink.direction), shifter.x_sext.eq(x.sink.sext), shifter.x_shamt.eq(x.sink.src2), shifter.x_src1.eq(x.sink.src1), shifter.x_stall.eq(x.stall) ] cpu.d.comb += [ # compare.op is shared by compare and branch instructions. compare.op.eq(Mux(x.sink.compare, x.sink.funct3 << 1, x.sink.funct3)), compare.zero.eq(x.sink.src1 == x.sink.src2), compare.negative.eq(adder.result[-1]), compare.overflow.eq(adder.overflow), compare.carry.eq(adder.carry) ] cpu.d.comb += [ exception.external_interrupt.eq(self.external_interrupt), exception.timer_interrupt.eq(self.timer_interrupt), exception.software_interrupt.eq(self.software_interrupt), exception.m_fetch_misaligned.eq(m.sink.branch_taken & m.sink.branch_target[:2].bool()), exception.m_fetch_error.eq(m.sink.fetch_error), exception.m_fetch_badaddr.eq(m.sink.fetch_badaddr), exception.m_load_misaligned.eq(m.sink.load & m.sink.loadstore_misaligned), exception.m_load_error.eq(loadstore.m_load_error), exception.m_store_misaligned.eq(m.sink.store & m.sink.loadstore_misaligned), exception.m_store_error.eq(loadstore.m_store_error), exception.m_loadstore_badaddr.eq(loadstore.m_badaddr), exception.m_branch_target.eq(m.sink.branch_target), exception.m_illegal.eq(m.sink.illegal), exception.m_ecall.eq(m.sink.ecall), exception.m_pc.eq(m.sink.pc), exception.m_instruction.eq(m.sink.instruction), exception.m_result.eq(m.sink.result), exception.m_mret.eq(m.sink.mret), exception.m_stall.eq(m.sink.stall), exception.m_valid.eq(m.valid) ] m_ebreak = m.sink.ebreak if self.with_debug: # If dcsr.ebreakm is set, EBREAK instructions enter Debug Mode. # We do not want to raise an exception in this case because Debug Mode # should be invisible to software execution. m_ebreak &= ~debug.dcsr_ebreakm if self.with_trigger: m_trigger_trap = Signal() with cpu.If(~x.stall): cpu.d.sync += m_trigger_trap.eq(trigger.x_trap) m_ebreak |= m_trigger_trap cpu.d.comb += exception.m_ebreak.eq(m_ebreak) m.kill_on(m.source.exception & m.source.valid) cpu.d.comb += [ data_sel.x_offset.eq(adder.result[:2]), data_sel.x_funct3.eq(x.sink.funct3), data_sel.x_store_operand.eq(x.sink.store_operand), data_sel.w_offset.eq(w.sink.result[:2]), data_sel.w_funct3.eq(w.sink.funct3), data_sel.w_load_data.eq(w.sink.load_data) ] cpu.d.comb += [ loadstore.x_addr.eq(adder.result), loadstore.x_mask.eq(data_sel.x_mask), loadstore.x_load.eq(x.sink.load), loadstore.x_store.eq(x.sink.store), loadstore.x_store_data.eq(data_sel.x_store_data), loadstore.x_stall.eq(x.stall), loadstore.x_valid.eq(x.valid), loadstore.m_stall.eq(m.stall), loadstore.m_valid.eq(m.valid) ] m.stall_on(loadstore.x_busy & x.valid) m.stall_on(loadstore.m_busy & m.valid) if self.with_dcache: if self.with_debug: cpu.d.comb += loadstore.m_flush.eq(debug.resumereq) cpu.d.comb += [ loadstore.x_fence_i.eq(x.sink.fence_i), loadstore.m_load.eq(m.sink.load), loadstore.m_store.eq(m.sink.store), ] for s in a, f: s.kill_on(x.sink.fence_i & x.valid) if self.with_debug: cpu.submodules.dbus_arbiter = dbus_arbiter = WishboneArbiter() debug_dbus_port = dbus_arbiter.port(priority=0) loadstore_dbus_port = dbus_arbiter.port(priority=1) cpu.d.comb += [ loadstore.dbus.connect(loadstore_dbus_port), debug.dbus.connect(debug_dbus_port), dbus_arbiter.bus.connect(self.dbus), ] else: cpu.d.comb += loadstore.dbus.connect(self.dbus) # RAW hazard management x_raw_rs1 = Signal() m_raw_rs1 = Signal() w_raw_rs1 = Signal() x_raw_rs2 = Signal() m_raw_rs2 = Signal() w_raw_rs2 = Signal() x_raw_csr = Signal() m_raw_csr = Signal() x_lock = Signal() m_lock = Signal() cpu.d.comb += [ x_raw_rs1.eq(x.sink.rd.any() & (x.sink.rd == decoder.rs1) & x.sink.rd_we), m_raw_rs1.eq(m.sink.rd.any() & (m.sink.rd == decoder.rs1) & m.sink.rd_we), w_raw_rs1.eq(w.sink.rd.any() & (w.sink.rd == decoder.rs1) & w.sink.rd_we), x_raw_rs2.eq(x.sink.rd.any() & (x.sink.rd == decoder.rs2) & x.sink.rd_we), m_raw_rs2.eq(m.sink.rd.any() & (m.sink.rd == decoder.rs2) & m.sink.rd_we), w_raw_rs2.eq(w.sink.rd.any() & (w.sink.rd == decoder.rs2) & w.sink.rd_we), x_raw_csr.eq((x.sink.csr_adr == decoder.immediate) & x.sink.csr_we), m_raw_csr.eq((m.sink.csr_adr == decoder.immediate) & m.sink.csr_we), x_lock.eq(~x.sink.bypass_x & (decoder.rs1_re & x_raw_rs1 | decoder.rs2_re & x_raw_rs2) | decoder.csr & x_raw_csr), m_lock.eq(~m.sink.bypass_m & (decoder.rs1_re & m_raw_rs1 | decoder.rs2_re & m_raw_rs2) | decoder.csr & m_raw_csr), ] if self.with_debug: d.stall_on((x_lock & x.valid | m_lock & m.valid) & d.valid & ~debug.dcsr_step) else: d.stall_on((x_lock & x.valid | m_lock & m.valid) & d.valid) # result selection x_result = Signal(32) m_result = Signal(32) w_result = Signal(32) x_csr_result = Signal(32) with cpu.If(x.sink.jump): cpu.d.comb += x_result.eq(x.sink.pc + 4) with cpu.Elif(x.sink.logic): cpu.d.comb += x_result.eq(logic.result) with cpu.Elif(x.sink.csr): cpu.d.comb += x_result.eq(x.sink.src2) with cpu.Else(): cpu.d.comb += x_result.eq(adder.result) with cpu.If(m.sink.compare): cpu.d.comb += m_result.eq(m.sink.condition_met) if self.with_muldiv: with cpu.Elif(m.sink.divide): cpu.d.comb += m_result.eq(divider.m_result) with cpu.Elif(m.sink.shift): cpu.d.comb += m_result.eq(shifter.m_result) with cpu.Else(): cpu.d.comb += m_result.eq(m.sink.result) with cpu.If(w.sink.load): cpu.d.comb += w_result.eq(data_sel.w_load_result) if self.with_muldiv: with cpu.Elif(w.sink.multiply): cpu.d.comb += w_result.eq(multiplier.w_result) with cpu.Else(): cpu.d.comb += w_result.eq(w.sink.result) with cpu.If(x_csr_set_clear): cpu.d.comb += x_csr_result.eq(logic.result) with cpu.Else(): cpu.d.comb += x_csr_result.eq(x_csr_src1) cpu.d.comb += [ csrf_wp.en.eq(m.sink.csr & m.sink.csr_we & m.valid & ~exception.m_raise & ~m.stall), csrf_wp.addr.eq(m.sink.csr_adr), csrf_wp.data.eq(m.sink.csr_result) ] if self.with_debug: with cpu.If(debug.halt & debug.halted): cpu.d.comb += [ gprf_wp.addr.eq(debug.gprf_addr), gprf_wp.en.eq(debug.gprf_we), gprf_wp.data.eq(debug.gprf_dat_w) ] with cpu.Else(): cpu.d.comb += [ gprf_wp.en.eq((w.sink.rd != 0) & w.sink.rd_we & w.valid & ~w.sink.exception), gprf_wp.addr.eq(w.sink.rd), gprf_wp.data.eq(w_result) ] else: cpu.d.comb += [ gprf_wp.en.eq((w.sink.rd != 0) & w.sink.rd_we & w.valid), gprf_wp.addr.eq(w.sink.rd), gprf_wp.data.eq(w_result) ] # D stage operand selection d_src1 = Signal(32) d_src2 = Signal(32) with cpu.If(decoder.lui): cpu.d.comb += d_src1.eq(0) with cpu.Elif(decoder.auipc): cpu.d.comb += d_src1.eq(d.sink.pc) with cpu.Elif(decoder.rs1_re & (decoder.rs1 == 0)): cpu.d.comb += d_src1.eq(0) with cpu.Elif(x_raw_rs1 & x.sink.valid): cpu.d.comb += d_src1.eq(x_result) with cpu.Elif(m_raw_rs1 & m.sink.valid): cpu.d.comb += d_src1.eq(m_result) with cpu.Elif(w_raw_rs1 & w.sink.valid): cpu.d.comb += d_src1.eq(w_result) with cpu.Else(): cpu.d.comb += d_src1.eq(gprf_rp1.data) with cpu.If(decoder.csr): cpu.d.comb += d_src2.eq(csrf_rp.data) with cpu.Elif(~decoder.rs2_re): cpu.d.comb += d_src2.eq(decoder.immediate) with cpu.Elif(decoder.rs2 == 0): cpu.d.comb += d_src2.eq(0) with cpu.Elif(x_raw_rs2 & x.sink.valid): cpu.d.comb += d_src2.eq(x_result) with cpu.Elif(m_raw_rs2 & m.sink.valid): cpu.d.comb += d_src2.eq(m_result) with cpu.Elif(w_raw_rs2 & w.sink.valid): cpu.d.comb += d_src2.eq(w_result) with cpu.Else(): cpu.d.comb += d_src2.eq(gprf_rp2.data) # branch prediction cpu.d.comb += [ predict.d_branch.eq(decoder.branch), predict.d_jump.eq(decoder.jump), predict.d_offset.eq(decoder.immediate), predict.d_pc.eq(d.sink.pc), predict.d_rs1_re.eq(decoder.rs1_re) ] a.kill_on(predict.d_branch_taken & d.valid) for s in a, f: s.kill_on(m.sink.branch_predict_taken & ~m.sink.branch_taken & m.valid) for s in a, f, d: s.kill_on(~m.sink.branch_predict_taken & m.sink.branch_taken & m.valid) s.kill_on((exception.m_raise | m.sink.mret) & m.valid) # debug unit if self.with_debug: cpu.d.comb += [ debug.jtag.connect(self.jtag), debug.x_pc.eq(x.sink.pc), debug.x_ebreak.eq(x.sink.ebreak), debug.x_stall.eq(x.stall), debug.m_branch_taken.eq(m.sink.branch_taken), debug.m_branch_target.eq(m.sink.branch_target), debug.m_mret.eq(m.sink.mret), debug.m_exception.eq(exception.m_raise), debug.m_pc.eq(m.sink.pc), debug.m_valid.eq(m.valid), debug.mepc_r_base.eq(exception.mepc.r.base), debug.mtvec_r_base.eq(exception.mtvec.r.base) ] if self.with_trigger: cpu.d.comb += debug.trigger_haltreq.eq(trigger.haltreq) else: cpu.d.comb += debug.trigger_haltreq.eq(Const(0)) csrf_debug_rp = csrf.read_port() csrf_debug_wp = csrf.write_port() cpu.d.comb += [ csrf_debug_rp.addr.eq(debug.csrf_addr), csrf_debug_rp.en.eq(debug.csrf_re), debug.csrf_dat_r.eq(csrf_debug_rp.data), csrf_debug_wp.addr.eq(debug.csrf_addr), csrf_debug_wp.en.eq(debug.csrf_we), csrf_debug_wp.data.eq(debug.csrf_dat_w) ] x.stall_on(debug.halt) m.stall_on(debug.dcsr_step & m.valid & ~debug.halt) for s in a, f, d, x: s.kill_on(debug.killall) halted = x.stall & ~reduce(or_, (s.valid for s in (m, w))) cpu.d.sync += debug.halted.eq(halted) with cpu.If(debug.resumereq): with cpu.If(~debug.dbus_busy): cpu.d.comb += debug.resumeack.eq(1) cpu.d.sync += a.source.pc.eq(debug.dpc_value - 4) if self.with_trigger: cpu.d.comb += [ trigger.x_pc.eq(x.sink.pc), trigger.x_valid.eq(x.valid), ] if self.with_rvfi: cpu.d.comb += [ rvficon.d_insn.eq(decoder.instruction), rvficon.d_rs1_addr.eq(Mux(decoder.rs1_re, decoder.rs1, 0)), rvficon.d_rs2_addr.eq(Mux(decoder.rs2_re, decoder.rs2, 0)), rvficon.d_rs1_rdata.eq(Mux(decoder.rs1_re, d_src1, 0)), rvficon.d_rs2_rdata.eq(Mux(decoder.rs2_re, d_src2, 0)), rvficon.d_stall.eq(d.stall), rvficon.x_mem_addr.eq(loadstore.x_addr[2:] << 2), rvficon.x_mem_wmask.eq(Mux(loadstore.x_store, loadstore.x_mask, 0)), rvficon.x_mem_rmask.eq(Mux(loadstore.x_load, loadstore.x_mask, 0)), rvficon.x_mem_wdata.eq(loadstore.x_store_data), rvficon.x_stall.eq(x.stall), rvficon.m_mem_rdata.eq(loadstore.m_load_data), rvficon.m_fetch_misaligned.eq(exception.m_fetch_misaligned), rvficon.m_illegal_insn.eq(m.sink.illegal), rvficon.m_load_misaligned.eq(exception.m_load_misaligned), rvficon.m_store_misaligned.eq(exception.m_store_misaligned), rvficon.m_exception.eq(exception.m_raise), rvficon.m_mret.eq(m.sink.mret), rvficon.m_branch_taken.eq(m.sink.branch_taken), rvficon.m_branch_target.eq(m.sink.branch_target), rvficon.m_pc_rdata.eq(m.sink.pc), rvficon.m_stall.eq(m.stall), rvficon.m_valid.eq(m.valid), rvficon.w_rd_addr.eq(Mux(gprf_wp.en, gprf_wp.addr, 0)), rvficon.w_rd_wdata.eq(Mux(gprf_wp.en, gprf_wp.data, 0)), rvficon.mtvec_r_base.eq(exception.mtvec.r.base), rvficon.mepc_r_value.eq(exception.mepc.r), rvficon.rvfi.connect(self.rvfi) ] # pipeline registers # A/F with cpu.If(~a.stall): cpu.d.sync += a.source.pc.eq(fetch.a_pc) # F/D with cpu.If(~f.stall): cpu.d.sync += [ f.source.pc.eq(f.sink.pc), f.source.instruction.eq(fetch.f_instruction), f.source.fetch_error.eq(fetch.f_fetch_error), f.source.fetch_badaddr.eq(fetch.f_badaddr) ] # D/X with cpu.If(~d.stall): cpu.d.sync += [ d.source.pc.eq(d.sink.pc), d.source.instruction.eq(d.sink.instruction), d.source.fetch_error.eq(d.sink.fetch_error), d.source.fetch_badaddr.eq(d.sink.fetch_badaddr), d.source.illegal.eq(decoder.illegal), d.source.rd.eq(decoder.rd), d.source.rs1.eq(decoder.rs1), d.source.rd_we.eq(decoder.rd_we), d.source.rs1_re.eq(decoder.rs1_re), d.source.bypass_x.eq(decoder.bypass_x), d.source.bypass_m.eq(decoder.bypass_m), d.source.funct3.eq(decoder.funct3), d.source.load.eq(decoder.load), d.source.store.eq(decoder.store), d.source.adder_sub.eq(decoder.adder & decoder.adder_sub | decoder.compare | decoder.branch), d.source.compare.eq(decoder.compare), d.source.logic.eq(decoder.logic), d.source.shift.eq(decoder.shift), d.source.direction.eq(decoder.direction), d.source.sext.eq(decoder.sext), d.source.jump.eq(decoder.jump), d.source.branch.eq(decoder.branch), d.source.fence_i.eq(decoder.fence_i), d.source.csr.eq(decoder.csr), d.source.csr_adr.eq(decoder.immediate), d.source.csr_we.eq(decoder.csr_we), d.source.ecall.eq(decoder.ecall), d.source.ebreak.eq(decoder.ebreak), d.source.mret.eq(decoder.mret), d.source.src1.eq(d_src1), d.source.src2.eq(Mux(decoder.store, decoder.immediate, d_src2)), d.source.store_operand.eq(d_src2), d.source.branch_predict_taken.eq(predict.d_branch_taken), d.source.branch_target.eq(predict.d_branch_target) ] if self.with_muldiv: cpu.d.sync += [ d.source.multiply.eq(decoder.multiply), d.source.divide.eq(decoder.divide) ] # X/M with cpu.If(~x.stall): cpu.d.sync += [ x.source.pc.eq(x.sink.pc), x.source.instruction.eq(x.sink.instruction), x.source.fetch_error.eq(x.sink.fetch_error), x.source.fetch_badaddr.eq(x.sink.fetch_badaddr), x.source.illegal.eq(x.sink.illegal), x.source.loadstore_misaligned.eq(data_sel.x_misaligned), x.source.ecall.eq(x.sink.ecall), x.source.ebreak.eq(x.sink.ebreak), x.source.rd.eq(x.sink.rd), x.source.rd_we.eq(x.sink.rd_we), x.source.bypass_m.eq(x.sink.bypass_m | x.sink.bypass_x), x.source.funct3.eq(x.sink.funct3), x.source.load.eq(x.sink.load), x.source.store.eq(x.sink.store), x.source.store_data.eq(loadstore.x_store_data), x.source.compare.eq(x.sink.compare), x.source.shift.eq(x.sink.shift), x.source.mret.eq(x.sink.mret), x.source.condition_met.eq(compare.condition_met), x.source.branch_taken.eq(x.sink.jump | x.sink.branch & compare.condition_met), x.source.branch_target.eq(Mux(x.sink.jump & x.sink.rs1_re, adder.result[1:] << 1, x.sink.branch_target)), x.source.branch_predict_taken.eq(x.sink.branch_predict_taken), x.source.csr.eq(x.sink.csr), x.source.csr_adr.eq(x.sink.csr_adr), x.source.csr_we.eq(x.sink.csr_we), x.source.csr_result.eq(x_csr_result), x.source.result.eq(x_result) ] if self.with_muldiv: cpu.d.sync += [ x.source.multiply.eq(x.sink.multiply), x.source.divide.eq(x.sink.divide) ] # M/W with cpu.If(~m.stall): cpu.d.sync += [ m.source.pc.eq(m.sink.pc), m.source.rd.eq(m.sink.rd), m.source.load.eq(m.sink.load), m.source.funct3.eq(m.sink.funct3), m.source.load_data.eq(loadstore.m_load_data), m.source.rd_we.eq(m.sink.rd_we), m.source.result.eq(m_result), m.source.exception.eq(exception.m_raise) ] if self.with_muldiv: cpu.d.sync += [ m.source.multiply.eq(m.sink.multiply) ] return cpu