cxp: use tx/rx instead of up/downconn

cxp GW: add single chs roi support

CXP GW: move rx/tx pipeline to misoc
This commit is contained in:
morgan 2025-02-04 16:05:22 +08:00
parent 43042e7efa
commit e09840e1d0

View File

@ -1,38 +1,24 @@
from migen import *
from migen.genlib.cdc import MultiReg, PulseSynchronizer
from misoc.interconnect.csr import *
from misoc.interconnect.stream import StrideConverter
from misoc.cores.coaxpress.core import HostTXCore, HostRXCore
from misoc.cores.coaxpress.phy.high_speed_gtx import HostRXPHYs
from misoc.cores.coaxpress.phy.low_speed_serdes import HostTXPHYs
from artiq.gateware.rtio import rtlink
from cxp_downconn import CXP_RXPHYs
from cxp_upconn import CXP_TXPHYs
from cxp_pipeline import (
Command_Packet_Reader,
Command_Test_Packet_Writer,
Duplicated_Char_Decoder,
Heartbeat_Packet_Reader,
Idle_Word_Inserter,
Packet_Arbiter,
Packet_Wrapper,
Test_Sequence_Checker,
Trigger_ACK_Inserter,
Trigger_ACK_Reader,
Trigger_Inserter,
Trigger_Reader,
)
from cxp_frame_pipeline import *
import cxp_router
from types import SimpleNamespace
class CXP_PHYS(Module, AutoCSR):
def __init__(self, refclk, upconn_pads, downconn_pads, sys_clk_freq, master=0):
assert len(upconn_pads) == len(downconn_pads)
class CXP_Host_PHYS(Module, AutoCSR):
def __init__(self, refclk, tx_pads, rx_pads, sys_clk_freq, master=0):
assert len(tx_pads) == len(rx_pads)
self.submodules.tx = CXP_TXPHYs(upconn_pads, sys_clk_freq)
self.submodules.rx = CXP_RXPHYs(refclk, downconn_pads, sys_clk_freq, master)
self.submodules.tx = HostTXPHYs(tx_pads, sys_clk_freq)
self.submodules.rx = HostRXPHYs(refclk, rx_pads, sys_clk_freq, master)
self.phys = []
for tx, rx in zip(self.tx.phys, self.rx.phys):
@ -47,8 +33,8 @@ class CXP_Core(Module, AutoCSR):
# Section 12.1.6 (CXP-001-2021)
self.buffer_depth, self.nslots = command_buffer_depth, nrxslot
self.submodules.tx = TX_Pipeline(phy.tx, command_buffer_depth, False)
self.submodules.rx = RX_Pipeline(phy.rx, command_buffer_depth, nrxslot, False)
self.submodules.tx = HostTXCore(phy.tx, command_buffer_depth, False)
self.submodules.rx = HostRXCore(phy.rx, command_buffer_depth, nrxslot, False)
def get_tx_port(self):
return self.tx.writer.mem.get_port(write_capable=True)
@ -59,265 +45,6 @@ class CXP_Core(Module, AutoCSR):
def get_mem_size(self):
return word_width * self.buffer_depth * self.nslots // 8
class RX_Pipeline(Module, AutoCSR):
def __init__(self, phy, command_buffer_depth, nslot, with_trigger):
self.ready = CSRStatus()
self.trigger_ack = CSR()
self.pending_packet = CSR()
self.read_ptr = CSRStatus(log2_int(nslot))
self.reader_buffer_err = CSR()
self.reader_decode_err = CSR()
self.test_error_counter = CSRStatus(16)
self.test_packet_counter = CSRStatus(16)
self.test_counts_reset = CSR()
self.heartbeat = CSR()
self.host_id = CSRStatus(32)
self.device_time = CSRStatus(64)
# for multilane router
self.active = Signal()
if with_trigger:
self.trig = Signal()
self.trig_delay = Signal(char_width)
self.trig_linktrigger_n = Signal(char_width)
# # #
gtx = phy.gtx
self.sync += [
self.ready.status.eq(gtx.rx_ready),
self.active.eq(gtx.rx_ready),
]
# Host rx pipeline
#
# 32 32+8(dchar)
# PHY ───/───> dchar ─────/─────> trigger ─────> trigger ack ─────> packet parser ─────> EOP Marker ─────> stream data packet
# decoder reader reader │ │ │ with CRC
# (optional) │ │ └──────> test sequence checker
# │ │
# │ └─────────> heartbeat packet reader
# │
# └────────────> command packet reader
#
cdr = ClockDomainsRenamer("cxp_gtx_rx")
# decode all incoming data as duplicate char and inject the result into the bus for downstream modules
self.submodules.dchar_decoder = dchar_decoder = cdr(Duplicated_Char_Decoder())
# Priority level 0 packet - Trigger packet
if with_trigger:
self.submodules.trig_reader = trig_reader = cdr(Trigger_Reader())
self.sync.cxp_gtx_rx += [
self.trig.eq(trig_reader.trig),
self.trig_delay.eq(trig_reader.delay),
self.trig_linktrigger_n.eq(trig_reader.linktrigger_n),
]
# Priority level 1 packet - Trigger ack packet
self.submodules.trig_ack_reader= trig_ack_reader = cdr(Trigger_ACK_Reader())
self.submodules.trig_ack_ps = trig_ack_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
self.sync.cxp_gtx_rx += trig_ack_ps.i.eq(trig_ack_reader.ack)
self.sync += [
If(trig_ack_ps.o,
self.trigger_ack.w.eq(1),
).Elif(self.trigger_ack.re,
self.trigger_ack.w.eq(0),
),
]
# Priority level 2 packet - stream, test, heartbeat and command packets
self.submodules.arbiter = arbiter = cdr(Packet_Arbiter())
self.submodules.decode_err_ps = decode_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
self.sync.cxp_gtx_rx += decode_err_ps.i.eq(arbiter.decode_err)
self.sync += [
If(decode_err_ps.o,
self.reader_decode_err.w.eq(1),
).Elif(self.reader_decode_err.re,
self.reader_decode_err.w.eq(0),
),
]
if with_trigger:
rx_pipeline = [phy, dchar_decoder, trig_reader, trig_ack_reader, arbiter]
else:
rx_pipeline = [phy, dchar_decoder, trig_ack_reader, arbiter]
for s, d in zip(rx_pipeline, rx_pipeline[1:]):
self.comb += s.source.connect(d.sink)
# Stream packet
# Drop the K29.7 and mark the EOP for downstream
self.submodules.eop_marker = eop_marker = cdr(EOP_Marker())
self.comb += arbiter.source_stream.connect(eop_marker.sink)
# set pipeline source to output stream packet
self.source = eop_marker.source
# Test packet
self.submodules.test_seq_checker = test_seq_checker = cdr(Test_Sequence_Checker())
self.comb += arbiter.source_test.connect(test_seq_checker.sink)
self.submodules.test_reset_ps = test_reset_ps = PulseSynchronizer("sys", "cxp_gtx_rx")
self.comb += test_reset_ps.i.eq(self.test_counts_reset.re),
test_err_cnt_rx = Signal.like(self.test_error_counter.status)
test_pak_cnt_rx = Signal.like(self.test_packet_counter.status)
test_err_r, test_pak_r = Signal(), Signal()
self.sync.cxp_gtx_rx += [
test_err_r.eq(test_seq_checker.error),
test_pak_r.eq(arbiter.recv_test_pak),
If(test_reset_ps.o,
test_err_cnt_rx.eq(test_err_cnt_rx.reset),
).Elif(test_err_r,
test_err_cnt_rx.eq(test_err_cnt_rx + 1),
),
If(test_reset_ps.o,
test_pak_cnt_rx.eq(test_pak_cnt_rx.reset),
).Elif(test_pak_r,
test_pak_cnt_rx.eq(test_pak_cnt_rx + 1),
),
]
self.specials += [
MultiReg(test_err_cnt_rx, self.test_error_counter.status),
MultiReg(test_pak_cnt_rx, self.test_packet_counter.status),
]
# Command packet
self.submodules.command_reader = command_reader = cdr(Command_Packet_Reader(command_buffer_depth, nslot))
self.comb += arbiter.source_command.connect(command_reader.sink)
# nslot buffers control interface
write_ptr_sys = Signal.like(command_reader.write_ptr)
self.specials += [
MultiReg(self.read_ptr.status, command_reader.read_ptr, odomain="cxp_gtx_rx"),
MultiReg(command_reader.write_ptr, write_ptr_sys)
]
self.sync += [
self.pending_packet.w.eq(self.read_ptr.status != write_ptr_sys),
If(~gtx.rx_ready,
self.read_ptr.status.eq(0),
).Elif(self.pending_packet.re & self.pending_packet.w,
self.read_ptr.status.eq(self.read_ptr.status + 1),
)
]
self.submodules.buffer_err_ps = buffer_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
self.sync.cxp_gtx_rx += buffer_err_ps.i.eq(command_reader.buffer_err),
self.sync += [
If(buffer_err_ps.o,
self.reader_buffer_err.w.eq(1),
).Elif(self.reader_buffer_err.re,
self.reader_buffer_err.w.eq(0),
),
]
# Heartbeat packet
self.submodules.heartbeat_reader = heartbeat_reader = cdr(Heartbeat_Packet_Reader())
self.comb += arbiter.source_heartbeat.connect(heartbeat_reader.sink)
self.specials += [
MultiReg(heartbeat_reader.host_id, self.host_id.status),
MultiReg(heartbeat_reader.heartbeat, self.device_time.status),
]
self.submodules.heartbeat_ps = heartbeat_ps = PulseSynchronizer("cxp_gtx_rx", "sys")
self.sync.cxp_gtx_rx += heartbeat_ps.i.eq(arbiter.recv_heartbeat)
self.sync += [
If(heartbeat_ps.o,
self.heartbeat.w.eq(1),
).Elif(self.heartbeat.re,
self.heartbeat.w.eq(0),
),
]
class TX_Pipeline(Module, AutoCSR):
def __init__(self, phy, command_buffer_depth, with_trigger_ack):
self.trig_stb = Signal()
self.trig_delay = Signal(char_width)
self.trig_linktrigger_mode = Signal()
if with_trigger_ack:
self.trig_ack_stb = Signal()
# # #
# Host tx pipeline
#
# 32 32 8
# command/test ───/───> packet ─────> idle word ─────> trigger ack ───/───> conv ───/───> trigger ─────> PHY
# packet writer wrapper inserter inserter inserter
# (optional)
#
# Equivalent transmission priority:
# trigger > tigger ack > idle word > command/test packet
#
# The pipeline is splited into 32 and 8 bits section to handle the word and char boundary priority insertion requirement:
# Insertion @ char boundary: trigger packets
# Insertion @ word boundary: idle packets and trigger ack packet
# - Section 9.2.4 (CXP-001-2021)
#
# The idle inserter is placed between the trigger ack inserter and command/test packet writer to maintain the trigger performance,
# as idle word should not be inserted into trigger and trigger ack packet - Section 9.2.5.1 (CXP-001-2021)
#
# Priority level 0 packet - Trigger packet
self.submodules.trig = trig = Trigger_Inserter()
self.comb += [
trig.stb.eq(self.trig_stb),
trig.delay.eq(self.trig_delay),
trig.linktrig_mode.eq(self.trig_linktrigger_mode)
]
# Priority level 1 packet - Trigger ack
if with_trigger_ack:
self.submodules.trig_ack = trig_ack = Trigger_ACK_Inserter()
self.comb += self.trig_ack_stb.eq(trig_ack.stb)
# Priority level 2 packet - command and test packet
# Control is not timing dependent, all the data packets are handled in firmware
self.submodules.writer = writer = Command_Test_Packet_Writer(command_buffer_depth)
# writer memory control interface
self.writer_word_len = CSRStorage(log2_int(command_buffer_depth))
self.writer_stb = CSR()
self.writer_stb_testseq = CSR()
self.writer_busy = CSRStatus()
self.sync += [
writer.word_len.eq(self.writer_word_len.storage),
writer.stb.eq(self.writer_stb.re),
writer.stb_testseq.eq(self.writer_stb_testseq.re),
self.writer_busy.status.eq(writer.busy),
]
# Misc
self.submodules.pak_wrp = pak_wrp = Packet_Wrapper()
self.submodules.idle = idle = Idle_Word_Inserter()
self.submodules.converter = converter = StrideConverter(word_layout, char_layout)
if with_trigger_ack:
tx_pipeline = [writer, pak_wrp, idle, trig_ack, converter, trig, phy]
else:
tx_pipeline = [writer, pak_wrp, idle, converter, trig, phy]
for s, d in zip(tx_pipeline, tx_pipeline[1:]):
self.comb += s.source.connect(d.sink)
class CXP_Frame_Pipeline(Module, AutoCSR):
# optimal stream packet size is 2 KiB - Section 9.5.2 (CXP-001-2021)
def __init__(self, pipelines, pmod_pads, roi_engine_count=1, res_width=16, count_width=31, master=0, packet_size=16384):
@ -371,7 +98,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
# # #
cdr = ClockDomainsRenamer("cxp_gtx_rx")
cdr = ClockDomainsRenamer("cxp_gt_rx")
debug_out = False
@ -379,13 +106,13 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
self.submodules.pixel_pipeline = pixel_pipeline = cdr(Pixel_Pipeline(res_width, count_width, packet_size))
# CRC error counter
self.submodules.crc_reset_ps = crc_reset_ps = PulseSynchronizer("sys", "cxp_gtx_rx")
self.submodules.crc_reset_ps = crc_reset_ps = PulseSynchronizer("sys", "cxp_gt_rx")
self.comb += crc_reset_ps.i.eq(self.crc_error_reset.re)
crc_error_cnt_rx = Signal.like(self.crc_error_cnt.status)
crc_error_r = Signal()
self.sync.cxp_gtx_rx += [
self.sync.cxp_gt_rx += [
# to improve timinig
crc_error_r.eq(pixel_pipeline.crc_checker.error),
@ -405,14 +132,14 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
roi_boundary = Signal.like(target)
self.sync.rio += If(self.config.o.stb & (self.config.o.address == 4*n+offset),
roi_boundary.eq(self.config.o.data))
self.specials += MultiReg(roi_boundary, target, "cxp_gtx_rx")
self.specials += MultiReg(roi_boundary, target, "cxp_gt_rx")
roi_out = pixel_pipeline.roi.out
update = Signal()
self.submodules.ps = ps = PulseSynchronizer("cxp_gtx_rx", "sys")
self.sync.cxp_gtx_rx += ps.i.eq(roi_out.update)
self.submodules.ps = ps = PulseSynchronizer("cxp_gt_rx", "sys")
self.sync.cxp_gt_rx += ps.i.eq(roi_out.update)
self.sync += update.eq(ps.o)
sentinel = 2**count_width
@ -438,7 +165,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
x_size_rx, x_size_sys = Signal(3*char_width), Signal(3*char_width)
y_size_rx, y_size_sys = Signal(3*char_width), Signal(3*char_width)
y_pix_rx, y_pix_sys = Signal(res_width), Signal(res_width)
self.sync.cxp_gtx_rx += [
self.sync.cxp_gt_rx += [
If(pixel_pipeline.header_decoder.new_line,
new_line_cnt_rx.eq(new_line_cnt_rx + 1),
),
@ -485,7 +212,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
buffer_cdc_fifo = cdr(Buffer(word_layout_dchar)) # to improve timing
cdc_fifo = stream.AsyncFIFO(word_layout_dchar, 2**log2_int(packet_size//word_width))
self.submodules += buffer, crc_checker, buffer_cdc_fifo
self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(cdc_fifo)
self.submodules += ClockDomainsRenamer({"write": "cxp_gt_rx", "read": "sys"})(cdc_fifo)
self.submodules.debug_out = debug_out = RX_Debug_Buffer(word_layout_dchar, 2**log2_int(packet_size//word_width))
pipeline = [buffer, crc_checker, buffer_cdc_fifo, cdc_fifo, debug_out]
@ -495,45 +222,64 @@ class CXP_Frame_Pipeline(Module, AutoCSR):
# Routing WIP
# +---------+ +-------------+
# downconn pipline ----->| | | |------> crc checker ------> raw stream data
# | arbiter |---->| broadcaster |
# downconn pipline ----->| | | |------> crc checker ------> raw stream data
# +---------+ +-------------+
# +---------+ +-------------+
# downconn pipline -----> buffer ----->| | | |------> crc checker ------> raw stream data
# | arbiter |----> buffer ---->| broadcaster |
# downconn pipline -----> buffer ----->| | | |------> crc checker ------> raw stream data
# +---------+ +-------------+
#
self.submodules.arbiter = arbiter = cdr(Stream_Arbiter(n_channels))
self.submodules.broadcaster = broadcaster = cdr(Stream_Broadcaster())
# Connect pipeline
for i, p in enumerate(pipelines):
# Assume downconns pipeline already marks the eop
self.comb += p.rx.source.connect(arbiter.sinks[i])
if n_channels > 1:
self.submodules.arbiter = arbiter = cdr(Stream_Arbiter(n_channels))
self.comb += arbiter.source.connect(broadcaster.sink)
# Connect pipeline
for i, p in enumerate(pipelines):
buffer = cdr(Buffer(word_layout_dchar))
self.submodules += buffer
# Assume downconns pipeline already marks the eop
self.comb += [
p.rx.source.connect(buffer.sink),
buffer.source.connect(arbiter.sinks[i]),
]
self.submodules.buf = buf = cdr(Buffer(word_layout_dchar))
self.comb += [
arbiter.source.connect(buf.sink),
buf.source.connect(broadcaster.sink),
]
# Control interface
# only the simple topology MASTER:ch0, extension:ch1,2,3 is supported right now
active_channels_sys = Signal(n_channels)
for i, p in enumerate(pipelines):
# TODO: change this to non csr signal?
self.sync += active_channels_sys[i].eq(p.rx.active)
self.specials += MultiReg(active_channels_sys, arbiter.active_channels, odomain="cxp_gt_rx"),
# DEBUG:
self.sync += self.arbiter_active_ch.status.eq(active_channels_sys)
else:
buffer = cdr(Buffer(word_layout_dchar))
self.submodules += buffer
self.comb += [
pipelines[0].rx.source.connect(buffer.sink),
buffer.source.connect(broadcaster.sink),
]
if not debug_out:
self.comb += broadcaster.sources[0].connect(pixel_pipeline.sink),
else:
self.comb += broadcaster.sources[0].connect(pipeline[0].sink),
# Control interface
# only the simple topology MASTER:ch0, extension:ch1,2,3 is supported right now
active_channels_sys = Signal(n_channels)
for i, p in enumerate(pipelines):
# TODO: change this to non csr signal?
self.sync += active_channels_sys[i].eq(p.rx.active)
self.specials += MultiReg(active_channels_sys, arbiter.active_channels, odomain="cxp_gtx_rx"),
# DEBUG:
self.sync += self.arbiter_active_ch.status.eq(active_channels_sys)
for i, p in enumerate(pipelines):
# self.comb += p.rx.source.ack.eq(1)
rx_stb = Signal()
self.sync.cxp_gtx_rx += rx_stb.eq(p.rx.source.stb)
self.sync.cxp_gt_rx += rx_stb.eq(p.rx.source.stb)
self.specials += [
# Instance("OBUF", i_I=rx_stb, o_O=pmod_pads[i]),
# Instance("OBUF", i_I=arbiter.sinks[i].stb, o_O=pmod_pads[i]),
@ -578,7 +324,7 @@ class NEO_CXP_Frame_pipeline(Module):
# # #
cdr = ClockDomainsRenamer("cxp_gtx_rx")
cdr = ClockDomainsRenamer("cxp_gt_rx")
debug_out = False