295 lines
10 KiB
Python
295 lines
10 KiB
Python
|
from nmigen import *
|
||
|
from nmigen.utils import log2_int
|
||
|
from nmigen.lib.fifo import SyncFIFO
|
||
|
|
||
|
from ..cache import *
|
||
|
from ..isa import Funct3
|
||
|
from ..wishbone import *
|
||
|
|
||
|
|
||
|
__all__ = ["DataSelector", "LoadStoreUnitInterface", "BareLoadStoreUnit", "CachedLoadStoreUnit"]
|
||
|
|
||
|
|
||
|
class DataSelector(Elaboratable):
|
||
|
def __init__(self):
|
||
|
self.x_offset = Signal(2)
|
||
|
self.x_funct3 = Signal(3)
|
||
|
self.x_store_operand = Signal(32)
|
||
|
self.w_offset = Signal(2)
|
||
|
self.w_funct3 = Signal(3)
|
||
|
self.w_load_data = Signal(32)
|
||
|
|
||
|
self.x_misaligned = Signal()
|
||
|
self.x_mask = Signal(4)
|
||
|
self.x_store_data = Signal(32)
|
||
|
self.w_load_result = Signal((32, True))
|
||
|
|
||
|
def elaborate(self, platform):
|
||
|
m = Module()
|
||
|
|
||
|
with m.Switch(self.x_funct3):
|
||
|
with m.Case(Funct3.H, Funct3.HU):
|
||
|
m.d.comb += self.x_misaligned.eq(self.x_offset[0])
|
||
|
with m.Case(Funct3.W):
|
||
|
m.d.comb += self.x_misaligned.eq(self.x_offset.bool())
|
||
|
|
||
|
with m.Switch(self.x_funct3):
|
||
|
with m.Case(Funct3.B, Funct3.BU):
|
||
|
m.d.comb += self.x_mask.eq(0b1 << self.x_offset)
|
||
|
with m.Case(Funct3.H, Funct3.HU):
|
||
|
m.d.comb += self.x_mask.eq(0b11 << self.x_offset)
|
||
|
with m.Case(Funct3.W):
|
||
|
m.d.comb += self.x_mask.eq(0b1111)
|
||
|
|
||
|
with m.Switch(self.x_funct3):
|
||
|
with m.Case(Funct3.B):
|
||
|
m.d.comb += self.x_store_data.eq(self.x_store_operand[:8] << self.x_offset*8)
|
||
|
with m.Case(Funct3.H):
|
||
|
m.d.comb += self.x_store_data.eq(self.x_store_operand[:16] << self.x_offset[1]*16)
|
||
|
with m.Case(Funct3.W):
|
||
|
m.d.comb += self.x_store_data.eq(self.x_store_operand)
|
||
|
|
||
|
w_byte = Signal((8, True))
|
||
|
w_half = Signal((16, True))
|
||
|
|
||
|
m.d.comb += [
|
||
|
w_byte.eq(self.w_load_data.word_select(self.w_offset, 8)),
|
||
|
w_half.eq(self.w_load_data.word_select(self.w_offset[1], 16))
|
||
|
]
|
||
|
|
||
|
with m.Switch(self.w_funct3):
|
||
|
with m.Case(Funct3.B):
|
||
|
m.d.comb += self.w_load_result.eq(w_byte)
|
||
|
with m.Case(Funct3.BU):
|
||
|
m.d.comb += self.w_load_result.eq(Cat(w_byte, 0))
|
||
|
with m.Case(Funct3.H):
|
||
|
m.d.comb += self.w_load_result.eq(w_half)
|
||
|
with m.Case(Funct3.HU):
|
||
|
m.d.comb += self.w_load_result.eq(Cat(w_half, 0))
|
||
|
with m.Case(Funct3.W):
|
||
|
m.d.comb += self.w_load_result.eq(self.w_load_data)
|
||
|
|
||
|
return m
|
||
|
|
||
|
|
||
|
class LoadStoreUnitInterface:
|
||
|
def __init__(self):
|
||
|
self.dbus = Record(wishbone_layout)
|
||
|
|
||
|
self.x_addr = Signal(32)
|
||
|
self.x_mask = Signal(4)
|
||
|
self.x_load = Signal()
|
||
|
self.x_store = Signal()
|
||
|
self.x_store_data = Signal(32)
|
||
|
self.x_stall = Signal()
|
||
|
self.x_valid = Signal()
|
||
|
self.m_stall = Signal()
|
||
|
self.m_valid = Signal()
|
||
|
|
||
|
self.x_busy = Signal()
|
||
|
self.m_busy = Signal()
|
||
|
self.m_load_data = Signal(32)
|
||
|
self.m_load_error = Signal()
|
||
|
self.m_store_error = Signal()
|
||
|
self.m_badaddr = Signal(30)
|
||
|
|
||
|
|
||
|
class BareLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
|
||
|
def elaborate(self, platform):
|
||
|
m = Module()
|
||
|
|
||
|
with m.If(self.dbus.cyc):
|
||
|
with m.If(self.dbus.ack | self.dbus.err | ~self.m_valid):
|
||
|
m.d.sync += [
|
||
|
self.dbus.cyc.eq(0),
|
||
|
self.dbus.stb.eq(0),
|
||
|
self.m_load_data.eq(self.dbus.dat_r)
|
||
|
]
|
||
|
with m.Elif((self.x_load | self.x_store) & self.x_valid & ~self.x_stall):
|
||
|
m.d.sync += [
|
||
|
self.dbus.cyc.eq(1),
|
||
|
self.dbus.stb.eq(1),
|
||
|
self.dbus.adr.eq(self.x_addr[2:]),
|
||
|
self.dbus.sel.eq(self.x_mask),
|
||
|
self.dbus.we.eq(self.x_store),
|
||
|
self.dbus.dat_w.eq(self.x_store_data)
|
||
|
]
|
||
|
|
||
|
with m.If(self.dbus.cyc & self.dbus.err):
|
||
|
m.d.sync += [
|
||
|
self.m_load_error.eq(~self.dbus.we),
|
||
|
self.m_store_error.eq(self.dbus.we),
|
||
|
self.m_badaddr.eq(self.dbus.adr)
|
||
|
]
|
||
|
with m.Elif(~self.m_stall):
|
||
|
m.d.sync += [
|
||
|
self.m_load_error.eq(0),
|
||
|
self.m_store_error.eq(0)
|
||
|
]
|
||
|
|
||
|
m.d.comb += self.x_busy.eq(self.dbus.cyc)
|
||
|
|
||
|
with m.If(self.m_load_error | self.m_store_error):
|
||
|
m.d.comb += self.m_busy.eq(0)
|
||
|
with m.Else():
|
||
|
m.d.comb += self.m_busy.eq(self.dbus.cyc)
|
||
|
|
||
|
return m
|
||
|
|
||
|
|
||
|
class CachedLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
|
||
|
def __init__(self, *dcache_args):
|
||
|
super().__init__()
|
||
|
|
||
|
self.dcache_args = dcache_args
|
||
|
|
||
|
self.x_fence_i = Signal()
|
||
|
self.m_load = Signal()
|
||
|
self.m_store = Signal()
|
||
|
self.m_flush = Signal()
|
||
|
|
||
|
def elaborate(self, platform):
|
||
|
m = Module()
|
||
|
|
||
|
dcache = m.submodules.dcache = L1Cache(*self.dcache_args)
|
||
|
|
||
|
x_dcache_select = Signal()
|
||
|
|
||
|
# Test whether the target address is inside the L1 cache region. We use bit masks in order
|
||
|
# to avoid carry chains from arithmetic comparisons. This restricts the region boundaries
|
||
|
# to powers of 2.
|
||
|
with m.Switch(self.x_addr[2:]):
|
||
|
def addr_below(limit):
|
||
|
assert limit in range(1, 2**30 + 1)
|
||
|
range_bits = log2_int(limit)
|
||
|
const_bits = 30 - range_bits
|
||
|
return "{}{}".format("0" * const_bits, "-" * range_bits)
|
||
|
|
||
|
if dcache.base >= 4:
|
||
|
with m.Case(addr_below(dcache.base >> 2)):
|
||
|
m.d.comb += x_dcache_select.eq(0)
|
||
|
with m.Case(addr_below(dcache.limit >> 2)):
|
||
|
m.d.comb += x_dcache_select.eq(1)
|
||
|
with m.Default():
|
||
|
m.d.comb += x_dcache_select.eq(0)
|
||
|
|
||
|
m_dcache_select = Signal()
|
||
|
m_addr = Signal.like(self.x_addr)
|
||
|
|
||
|
with m.If(~self.x_stall):
|
||
|
m.d.sync += [
|
||
|
m_dcache_select.eq(x_dcache_select),
|
||
|
m_addr.eq(self.x_addr),
|
||
|
]
|
||
|
|
||
|
m.d.comb += [
|
||
|
dcache.s1_addr.eq(self.x_addr[2:]),
|
||
|
dcache.s1_stall.eq(self.x_stall),
|
||
|
dcache.s1_valid.eq(self.x_valid),
|
||
|
dcache.s2_addr.eq(m_addr[2:]),
|
||
|
dcache.s2_re.eq(self.m_load & m_dcache_select),
|
||
|
dcache.s2_evict.eq(self.m_store & m_dcache_select),
|
||
|
dcache.s2_flush.eq(self.m_flush),
|
||
|
dcache.s2_valid.eq(self.m_valid),
|
||
|
]
|
||
|
|
||
|
wrbuf_w_data = Record([("addr", 30), ("mask", 4), ("data", 32)])
|
||
|
wrbuf_r_data = Record.like(wrbuf_w_data)
|
||
|
wrbuf = m.submodules.wrbuf = SyncFIFO(width=len(wrbuf_w_data), depth=dcache.nwords)
|
||
|
m.d.comb += [
|
||
|
wrbuf.w_data.eq(wrbuf_w_data),
|
||
|
wrbuf_w_data.addr.eq(self.x_addr[2:]),
|
||
|
wrbuf_w_data.mask.eq(self.x_mask),
|
||
|
wrbuf_w_data.data.eq(self.x_store_data),
|
||
|
wrbuf.w_en.eq(self.x_store & self.x_valid & x_dcache_select & ~self.x_stall),
|
||
|
wrbuf_r_data.eq(wrbuf.r_data),
|
||
|
]
|
||
|
|
||
|
dbus_arbiter = m.submodules.dbus_arbiter = WishboneArbiter()
|
||
|
m.d.comb += dbus_arbiter.bus.connect(self.dbus)
|
||
|
|
||
|
wrbuf_port = dbus_arbiter.port(priority=0)
|
||
|
m.d.comb += [
|
||
|
wrbuf_port.cyc.eq(wrbuf.r_rdy),
|
||
|
wrbuf_port.we.eq(Const(1)),
|
||
|
]
|
||
|
with m.If(wrbuf_port.stb):
|
||
|
with m.If(wrbuf_port.ack | wrbuf_port.err):
|
||
|
m.d.sync += wrbuf_port.stb.eq(0)
|
||
|
m.d.comb += wrbuf.r_en.eq(1)
|
||
|
with m.Elif(wrbuf.r_rdy):
|
||
|
m.d.sync += [
|
||
|
wrbuf_port.stb.eq(1),
|
||
|
wrbuf_port.adr.eq(wrbuf_r_data.addr),
|
||
|
wrbuf_port.sel.eq(wrbuf_r_data.mask),
|
||
|
wrbuf_port.dat_w.eq(wrbuf_r_data.data)
|
||
|
]
|
||
|
|
||
|
dcache_port = dbus_arbiter.port(priority=1)
|
||
|
m.d.comb += [
|
||
|
dcache_port.cyc.eq(dcache.bus_re),
|
||
|
dcache_port.stb.eq(dcache.bus_re),
|
||
|
dcache_port.adr.eq(dcache.bus_addr),
|
||
|
dcache_port.sel.eq(0b1111),
|
||
|
dcache_port.cti.eq(Mux(dcache.bus_last, Cycle.END, Cycle.INCREMENT)),
|
||
|
dcache_port.bte.eq(Const(log2_int(dcache.nwords) - 1)),
|
||
|
dcache.bus_valid.eq(dcache_port.ack),
|
||
|
dcache.bus_error.eq(dcache_port.err),
|
||
|
dcache.bus_rdata.eq(dcache_port.dat_r)
|
||
|
]
|
||
|
|
||
|
bare_port = dbus_arbiter.port(priority=2)
|
||
|
bare_rdata = Signal.like(bare_port.dat_r)
|
||
|
with m.If(bare_port.cyc):
|
||
|
with m.If(bare_port.ack | bare_port.err | ~self.m_valid):
|
||
|
m.d.sync += [
|
||
|
bare_port.cyc.eq(0),
|
||
|
bare_port.stb.eq(0),
|
||
|
bare_rdata.eq(bare_port.dat_r)
|
||
|
]
|
||
|
with m.Elif((self.x_load | self.x_store) & ~x_dcache_select & self.x_valid & ~self.x_stall):
|
||
|
m.d.sync += [
|
||
|
bare_port.cyc.eq(1),
|
||
|
bare_port.stb.eq(1),
|
||
|
bare_port.adr.eq(self.x_addr[2:]),
|
||
|
bare_port.sel.eq(self.x_mask),
|
||
|
bare_port.we.eq(self.x_store),
|
||
|
bare_port.dat_w.eq(self.x_store_data)
|
||
|
]
|
||
|
|
||
|
with m.If(self.dbus.cyc & self.dbus.err):
|
||
|
m.d.sync += [
|
||
|
self.m_load_error.eq(~self.dbus.we),
|
||
|
self.m_store_error.eq(self.dbus.we),
|
||
|
self.m_badaddr.eq(self.dbus.adr)
|
||
|
]
|
||
|
with m.Elif(~self.m_stall):
|
||
|
m.d.sync += [
|
||
|
self.m_load_error.eq(0),
|
||
|
self.m_store_error.eq(0)
|
||
|
]
|
||
|
|
||
|
with m.If(self.x_fence_i):
|
||
|
m.d.comb += self.x_busy.eq(wrbuf.r_rdy)
|
||
|
with m.Elif(x_dcache_select):
|
||
|
m.d.comb += self.x_busy.eq(self.x_store & ~wrbuf.w_rdy)
|
||
|
with m.Else():
|
||
|
m.d.comb += self.x_busy.eq(bare_port.cyc)
|
||
|
|
||
|
with m.If(self.m_flush):
|
||
|
m.d.comb += self.m_busy.eq(~dcache.s2_flush_ack)
|
||
|
with m.If(self.m_load_error | self.m_store_error):
|
||
|
m.d.comb += self.m_busy.eq(0)
|
||
|
with m.Elif(m_dcache_select):
|
||
|
m.d.comb += [
|
||
|
self.m_busy.eq(self.m_load & dcache.s2_miss),
|
||
|
self.m_load_data.eq(dcache.s2_rdata)
|
||
|
]
|
||
|
with m.Else():
|
||
|
m.d.comb += [
|
||
|
self.m_busy.eq(bare_port.cyc),
|
||
|
self.m_load_data.eq(bare_rdata)
|
||
|
]
|
||
|
|
||
|
return m
|