forked from M-Labs/artiq-zynq
cxp: add PHY and pipeline
testing: add loopback tx for rx testing testing: add trigger, trigger ack for testing cxp: add upconn & downconn phy cxp: add upconn & downconn pipeline cxp: add rtlink cxp: add test packet & error counter CSR cxp: fix ch1 rx mem cannot be read cxp: add frame buffer to use KiB instead of KB
This commit is contained in:
parent
89e9a438e7
commit
0d13231eef
342
src/gateware/cxp.py
Normal file
342
src/gateware/cxp.py
Normal file
@ -0,0 +1,342 @@
|
|||||||
|
from migen import *
|
||||||
|
from migen.genlib.cdc import MultiReg, PulseSynchronizer, BusSynchronizer
|
||||||
|
from misoc.interconnect.csr import *
|
||||||
|
|
||||||
|
from artiq.gateware.rtio import rtlink
|
||||||
|
|
||||||
|
from cxp_downconn import CXP_DownConn_PHYS
|
||||||
|
from cxp_upconn import CXP_UpConn_PHYS
|
||||||
|
from cxp_pipeline import *
|
||||||
|
from cxp_frame_pipeline import *
|
||||||
|
|
||||||
|
from functools import reduce
|
||||||
|
from operator import add
|
||||||
|
|
||||||
|
class CXP_PHYS(Module, AutoCSR):
|
||||||
|
def __init__(self, refclk, upconn_pads, downconn_pads, sys_clk_freq, debug_sma, pmod_pads):
|
||||||
|
assert len(upconn_pads) == len(downconn_pads)
|
||||||
|
|
||||||
|
self.submodules.upconn = CXP_UpConn_PHYS(upconn_pads, sys_clk_freq, debug_sma, pmod_pads)
|
||||||
|
self.submodules.downconn = CXP_DownConn_PHYS(refclk, downconn_pads, sys_clk_freq, debug_sma, pmod_pads)
|
||||||
|
|
||||||
|
@FullMemoryWE()
|
||||||
|
class CXP_Interface(Module, AutoCSR):
|
||||||
|
def __init__(self, upconn_phy, downconn_phy, debug_sma, pmod_pads):
|
||||||
|
self.submodules.upconn = UpConn_Interface(upconn_phy, debug_sma, pmod_pads)
|
||||||
|
self.submodules.downconn = DownConn_Interface(downconn_phy, debug_sma, pmod_pads)
|
||||||
|
|
||||||
|
def get_tx_port(self):
|
||||||
|
return self.upconn.bootstrap.mem.get_port(write_capable=True)
|
||||||
|
|
||||||
|
def get_tx_mem_size(self):
|
||||||
|
# FIXME: if tx mem size is NOT same as rx, for some reason when rx mem is writen, tx mem cannot be access anymore
|
||||||
|
# and each time tx mem is read, CPU will return rx mem instead (fixed by reordering the mem allocation order)
|
||||||
|
# FIXME: seems like there are address alignment issue, if tx mem size is 0x800, the mem following the tx mem cannot be read correctly
|
||||||
|
# However, if tx mem is 0x2000 (same size as rx mem) the following rx mem can be read correctly
|
||||||
|
return self.upconn.bootstrap.mem.depth*self.upconn.bootstrap.mem.width // 8 # 0x800
|
||||||
|
# return self.downconn.bootstrap.mem.depth*self.downconn.bootstrap.mem.width // 8 # 0x2000
|
||||||
|
|
||||||
|
def get_mem_size(self):
|
||||||
|
return word_dw * buffer_count * buffer_depth // 8
|
||||||
|
|
||||||
|
def get_rx_port(self):
|
||||||
|
return self.downconn.bootstrap.mem.get_port(write_capable=False)
|
||||||
|
|
||||||
|
def get_rx_mem_size(self):
|
||||||
|
return self.downconn.bootstrap.mem.depth*self.downconn.bootstrap.mem.width // 8
|
||||||
|
|
||||||
|
def get_rx_downconn(self):
|
||||||
|
return self.downconn
|
||||||
|
|
||||||
|
class CXP_Master(CXP_Interface):
|
||||||
|
def __init__(self, upconn_phy, downconn_phy, debug_sma, pmod_pads):
|
||||||
|
CXP_Interface.__init__(self, upconn_phy, downconn_phy, debug_sma, pmod_pads)
|
||||||
|
nbit_trigdelay = 8
|
||||||
|
nbit_linktrig = 1
|
||||||
|
|
||||||
|
self.rtlink = rtlink.Interface(
|
||||||
|
rtlink.OInterface(nbit_trigdelay + nbit_linktrig),
|
||||||
|
rtlink.IInterface(word_dw, timestamped=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.sync.rio += [
|
||||||
|
If(self.rtlink.o.stb,
|
||||||
|
self.upconn.trig.delay.eq(self.rtlink.o.data[nbit_linktrig:]),
|
||||||
|
self.upconn.trig.linktrig_mode.eq(self.rtlink.o.data[:nbit_linktrig]),
|
||||||
|
),
|
||||||
|
self.upconn.trig.stb.eq(self.rtlink.o.stb),
|
||||||
|
]
|
||||||
|
|
||||||
|
# DEBUG: out
|
||||||
|
self.specials += Instance("OBUF", i_I=self.rtlink.o.stb, o_O=debug_sma.p_tx),
|
||||||
|
# self.specials += Instance("OBUF", i_I=self.rtlink.o.stb, o_O=debug_sma.n_rx),
|
||||||
|
|
||||||
|
class CXP_Extension(CXP_Interface):
|
||||||
|
def __init__(self, upconn_phy, downconn_phy, debug_sma, pmod_pads):
|
||||||
|
CXP_Interface.__init__(self, upconn_phy, downconn_phy, debug_sma, pmod_pads)
|
||||||
|
|
||||||
|
|
||||||
|
class DownConn_Interface(Module, AutoCSR):
|
||||||
|
def __init__(self, phy, debug_sma, pmod_pads):
|
||||||
|
self.rx_ready = CSRStatus()
|
||||||
|
|
||||||
|
# # #
|
||||||
|
|
||||||
|
gtx = phy.gtx
|
||||||
|
|
||||||
|
# GTX status
|
||||||
|
self.sync += self.rx_ready.status.eq(gtx.rx_ready)
|
||||||
|
|
||||||
|
# DEBUG: init status
|
||||||
|
self.txinit_phaligndone = CSRStatus()
|
||||||
|
self.rxinit_phaligndone = CSRStatus()
|
||||||
|
self.comb += [
|
||||||
|
self.txinit_phaligndone.status.eq(gtx.tx_init.Xxphaligndone),
|
||||||
|
self.rxinit_phaligndone.status.eq(gtx.rx_init.Xxphaligndone),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Connect all GTX connections' DRP
|
||||||
|
self.gtx_daddr = CSRStorage(9)
|
||||||
|
self.gtx_dread = CSR()
|
||||||
|
self.gtx_din_stb = CSR()
|
||||||
|
self.gtx_din = CSRStorage(16)
|
||||||
|
|
||||||
|
self.gtx_dout = CSRStatus(16)
|
||||||
|
self.gtx_dready = CSR()
|
||||||
|
|
||||||
|
self.comb += gtx.dclk.eq(ClockSignal("sys"))
|
||||||
|
self.sync += [
|
||||||
|
gtx.daddr.eq(self.gtx_daddr.storage),
|
||||||
|
gtx.den.eq(self.gtx_dread.re | self.gtx_din_stb.re),
|
||||||
|
gtx.dwen.eq(self.gtx_din_stb.re),
|
||||||
|
gtx.din.eq(self.gtx_din.storage),
|
||||||
|
If(gtx.dready,
|
||||||
|
self.gtx_dready.w.eq(1),
|
||||||
|
self.gtx_dout.status.eq(gtx.dout),
|
||||||
|
).Elif(self.gtx_dready.re,
|
||||||
|
self.gtx_dready.w.eq(0),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Receiver Pipeline WIP
|
||||||
|
#
|
||||||
|
# 32 32+8(dchar)
|
||||||
|
# PHY ---/---> dchar -----/-----> trigger ack ------> packet ------> CDC FIFO ------> debug buffer
|
||||||
|
# decoder checker decoder
|
||||||
|
#
|
||||||
|
cdr = ClockDomainsRenamer("cxp_gtx_rx")
|
||||||
|
|
||||||
|
# decode all incoming data as duplicate char and inject the result into the bus for downstream modules
|
||||||
|
self.submodules.dchar_decoder = dchar_decoder = cdr(Duplicated_Char_Decoder())
|
||||||
|
|
||||||
|
# Priority level 1 packet - Trigger ack packet
|
||||||
|
self.submodules.trig_ack_checker = trig_ack_checker = cdr(Trigger_Ack_Checker())
|
||||||
|
|
||||||
|
self.submodules.trig_ack_ps = trig_ack_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
|
||||||
|
self.sync.cxp_gtx_rx += trig_ack_ps.i.eq(trig_ack_checker.ack)
|
||||||
|
|
||||||
|
self.trig_ack = Signal()
|
||||||
|
self.trig_clr = Signal()
|
||||||
|
# Error are latched
|
||||||
|
self.sync += [
|
||||||
|
If(trig_ack_ps.o,
|
||||||
|
self.trig_ack.eq(1),
|
||||||
|
).Elif(self.trig_clr,
|
||||||
|
self.trig_ack.eq(0),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Priority level 2 packet - data, test packet
|
||||||
|
self.submodules.bootstrap = bootstrap = cdr(RX_Bootstrap())
|
||||||
|
|
||||||
|
self.bootstrap_decoder_err = CSR()
|
||||||
|
self.bootstrap_buffer_err = CSR()
|
||||||
|
|
||||||
|
decode_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
|
||||||
|
buffer_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
|
||||||
|
self.submodules += decode_err_ps, buffer_err_ps
|
||||||
|
self.sync.cxp_gtx_rx += [
|
||||||
|
decode_err_ps.i.eq(bootstrap.decode_err),
|
||||||
|
buffer_err_ps.i.eq(bootstrap.buffer_err),
|
||||||
|
]
|
||||||
|
self.sync += [
|
||||||
|
If(decode_err_ps.o,
|
||||||
|
self.bootstrap_decoder_err.w.eq(1),
|
||||||
|
).Elif(self.bootstrap_decoder_err.re,
|
||||||
|
self.bootstrap_decoder_err.w.eq(0),
|
||||||
|
),
|
||||||
|
If(buffer_err_ps.o,
|
||||||
|
self.bootstrap_buffer_err.w.eq(1),
|
||||||
|
).Elif(self.bootstrap_buffer_err.re,
|
||||||
|
self.bootstrap_buffer_err.w.eq(0),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# test packet error & packet counters
|
||||||
|
self.bootstrap_test_error_counter = CSRStatus(len(bootstrap.test_err_cnt))
|
||||||
|
self.bootstrap_test_packet_counter = CSRStatus(len(bootstrap.test_pak_cnt))
|
||||||
|
self.bootstrap_test_counts_reset = CSR()
|
||||||
|
|
||||||
|
|
||||||
|
test_reset_ps = PulseSynchronizer("sys", "cxp_gtx_rx")
|
||||||
|
self.submodules += test_reset_ps
|
||||||
|
self.sync += test_reset_ps.i.eq(self.bootstrap_test_counts_reset.re),
|
||||||
|
|
||||||
|
self.sync.cxp_gtx_rx += bootstrap.test_cnt_reset.eq(test_reset_ps.o),
|
||||||
|
self.specials += [
|
||||||
|
MultiReg(bootstrap.test_err_cnt, self.bootstrap_test_error_counter.status),
|
||||||
|
MultiReg(bootstrap.test_pak_cnt, self.bootstrap_test_packet_counter.status),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Cicular buffer interface
|
||||||
|
self.packet_type = CSRStatus(8)
|
||||||
|
self.pending_packet = CSR()
|
||||||
|
self.read_ptr = CSRStatus(log2_int(buffer_count))
|
||||||
|
|
||||||
|
self.specials += [
|
||||||
|
MultiReg(bootstrap.packet_type, self.packet_type.status),
|
||||||
|
MultiReg(self.read_ptr.status, bootstrap.read_ptr_rx, odomain="cxp_gtx_rx"),
|
||||||
|
]
|
||||||
|
self.sync += [
|
||||||
|
self.pending_packet.w.eq(self.read_ptr.status != bootstrap.write_ptr_sys),
|
||||||
|
If(~gtx.rx_ready,
|
||||||
|
self.read_ptr.status.eq(0),
|
||||||
|
).Elif(self.pending_packet.re & self.pending_packet.w,
|
||||||
|
self.read_ptr.status.eq(self.read_ptr.status + 1),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# DEBUG:
|
||||||
|
# # add buffer to improve timing & reduce tight setup/hold time
|
||||||
|
# self.submodules.buffer_cdc_fifo = buffer_cdc_fifo = cdr(Buffer(word_layout_dchar))
|
||||||
|
# cdc_fifo = stream.AsyncFIFO(word_layout_dchar, 512)
|
||||||
|
# self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(cdc_fifo)
|
||||||
|
# self.submodules.debug_out = debug_out = RX_Debug_Buffer(word_layout_dchar)
|
||||||
|
|
||||||
|
rx_pipeline = [phy, dchar_decoder, trig_ack_checker, bootstrap]
|
||||||
|
for s, d in zip(rx_pipeline, rx_pipeline[1:]):
|
||||||
|
self.comb += s.source.connect(d.sink)
|
||||||
|
self.source = rx_pipeline[-1].source
|
||||||
|
|
||||||
|
|
||||||
|
# DEBUG: CSR
|
||||||
|
self.trigger_ack = CSR()
|
||||||
|
self.sync += [
|
||||||
|
self.trig_clr.eq(self.trigger_ack.re),
|
||||||
|
self.trigger_ack.w.eq(self.trig_ack),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class UpConn_Interface(Module, AutoCSR):
|
||||||
|
def __init__(self, phy, debug_sma, pmod_pads):
|
||||||
|
# Transmission Pipeline
|
||||||
|
#
|
||||||
|
# 32 32 8
|
||||||
|
# ctrl/test ---/---> packet -----> idle word -----> trigger ack ---/--> conv ---/---> trigger -----> PHY
|
||||||
|
# packet wrapper inserter inserter inserter
|
||||||
|
#
|
||||||
|
# Equivalent transmission priority:
|
||||||
|
# trigger > trigger ack > idle > test/data packet
|
||||||
|
# To maintain the trigger performance, idle word should not be inserted into trigger or trigger ack.
|
||||||
|
#
|
||||||
|
# In low speed CoaXpress, the higher priority packet can be inserted in two types of boundary
|
||||||
|
# Insertion @ char boundary: Trigger packets
|
||||||
|
# Insertion @ word boundary: Trigger ack & IDLE packets
|
||||||
|
# The 32 bit part of the pipeline handles the word boundary insertion while the 8 bit part handles the char boundary insertion
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Packet FIFOs with transmission priority
|
||||||
|
# 0: Trigger packet
|
||||||
|
self.submodules.trig = trig = TX_Trigger()
|
||||||
|
|
||||||
|
# # DEBUG: INPUT
|
||||||
|
self.trig_stb = CSR()
|
||||||
|
self.trig_delay = CSRStorage(8)
|
||||||
|
self.linktrigger = CSRStorage()
|
||||||
|
|
||||||
|
# self.sync += [
|
||||||
|
# trig.stb.eq(self.trig_stb.re),
|
||||||
|
# trig.delay.eq(self.trig_delay.storage),
|
||||||
|
# trig.linktrig_mode.eq(self.linktrigger.storage),
|
||||||
|
# ]
|
||||||
|
|
||||||
|
|
||||||
|
# 1: IO acknowledgment for trigger packet
|
||||||
|
self.submodules.trig_ack = trig_ack = Trigger_ACK_Inserter()
|
||||||
|
|
||||||
|
# DEBUG: INPUT
|
||||||
|
self.ack = CSR()
|
||||||
|
self.sync += trig_ack.stb.eq(self.ack.re),
|
||||||
|
|
||||||
|
|
||||||
|
# 2: All other packets (data & test packet)
|
||||||
|
# Control is not timing dependent, all the data packets are handled in firmware
|
||||||
|
self.submodules.bootstrap = bootstrap = TX_Bootstrap()
|
||||||
|
|
||||||
|
self.submodules.pak_wrp = pak_wrp = Packet_Wrapper()
|
||||||
|
self.submodules.idle = idle = Idle_Word_Inserter()
|
||||||
|
|
||||||
|
self.submodules.converter = converter = stream.StrideConverter(word_layout, char_layout)
|
||||||
|
|
||||||
|
tx_pipeline = [bootstrap, pak_wrp, idle, trig_ack, converter, trig, phy]
|
||||||
|
for s, d in zip(tx_pipeline, tx_pipeline[1:]):
|
||||||
|
self.comb += s.source.connect(d.sink)
|
||||||
|
|
||||||
|
class CXP_Frame_Buffer(Module, AutoCSR):
|
||||||
|
# optimal stream packet size is 2 KiB - Section 9.5.2 (CXP-001-2021)
|
||||||
|
def __init__(self, downconns, pmod_pads, packet_size=16384, n_buffer=2):
|
||||||
|
n_downconn = len(downconns)
|
||||||
|
|
||||||
|
framebuffers = []
|
||||||
|
arr_csr = []
|
||||||
|
cdr = ClockDomainsRenamer("cxp_gtx_rx")
|
||||||
|
for i in range(n_buffer):
|
||||||
|
# TODO: change this to rtio
|
||||||
|
if i > 0:
|
||||||
|
name = "buffer_" + str(i) + "_routingid"
|
||||||
|
csr = CSRStorage(char_width, name=name, reset=i)
|
||||||
|
arr_csr.append(csr)
|
||||||
|
setattr(self, name, csr)
|
||||||
|
|
||||||
|
crc_checker = cdr(CXPCRC32_Checker())
|
||||||
|
|
||||||
|
# TODO: handle full buffer gracefully
|
||||||
|
|
||||||
|
# TODO: investigate why there is a heartbeat message in the middle of the frame with k27.7 code too???
|
||||||
|
# NOTE: sometimes there are 0xFBFBFBFB K=0b1111
|
||||||
|
# perhaps the buffer is full overflowing and doing strange stuff
|
||||||
|
|
||||||
|
# it should be mem block not "cycle buffer"
|
||||||
|
# self.submodules.dropper = dropper = cdr(DChar_Dropper())
|
||||||
|
buffer_cdc_fifo = cdr(Buffer(word_layout_dchar)) # to improve timing
|
||||||
|
cdc_fifo = stream.AsyncFIFO(word_layout_dchar, 2**log2_int(packet_size//word_dw))
|
||||||
|
self.submodules += crc_checker, buffer_cdc_fifo
|
||||||
|
self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(cdc_fifo)
|
||||||
|
|
||||||
|
pipeline = [crc_checker, buffer_cdc_fifo, cdc_fifo]
|
||||||
|
for s, d in zip(pipeline, pipeline[1:]):
|
||||||
|
self.comb += s.source.connect(d.sink)
|
||||||
|
framebuffers.append(pipeline[0])
|
||||||
|
|
||||||
|
# DEBUG:
|
||||||
|
if i == 0:
|
||||||
|
self.submodules.debug_out = debug_out = RX_Debug_Buffer(word_layout_dchar, 2**log2_int(packet_size//word_dw))
|
||||||
|
self.comb += pipeline[-1].source.connect(debug_out.sink)
|
||||||
|
else:
|
||||||
|
# remove any backpressure
|
||||||
|
self.comb += pipeline[-1].source.ack.eq(1)
|
||||||
|
|
||||||
|
|
||||||
|
self.submodules.router = router = cdr(Frame_Packet_Router(downconns, framebuffers, packet_size, pmod_pads))
|
||||||
|
|
||||||
|
for i, csr in enumerate(arr_csr):
|
||||||
|
self.specials += MultiReg(csr.storage, router.routing_table[i], odomain="cxp_gtx_rx"),
|
||||||
|
|
||||||
|
# only the simple topology MASTER:ch0, extension:ch1,2,3 is supported right now
|
||||||
|
active_extensions = Signal(max=n_downconn)
|
||||||
|
self.sync += active_extensions.eq(reduce(add, [d.rx_ready.status for d in downconns[1:]]))
|
||||||
|
self.specials += MultiReg(active_extensions, router.n_ext_active, odomain="cxp_gtx_rx"),
|
Loading…
Reference in New Issue
Block a user