From e09840e1d0fefcc8222dadf677fb07db4009e018 Mon Sep 17 00:00:00 2001 From: morgan Date: Tue, 4 Feb 2025 16:05:22 +0800 Subject: [PATCH] cxp: use tx/rx instead of up/downconn cxp GW: add single chs roi support CXP GW: move rx/tx pipeline to misoc --- src/gateware/cxp.py | 374 +++++++------------------------------------- 1 file changed, 60 insertions(+), 314 deletions(-) diff --git a/src/gateware/cxp.py b/src/gateware/cxp.py index d400431..7371c2a 100644 --- a/src/gateware/cxp.py +++ b/src/gateware/cxp.py @@ -1,38 +1,24 @@ from migen import * from migen.genlib.cdc import MultiReg, PulseSynchronizer from misoc.interconnect.csr import * -from misoc.interconnect.stream import StrideConverter +from misoc.cores.coaxpress.core import HostTXCore, HostRXCore +from misoc.cores.coaxpress.phy.high_speed_gtx import HostRXPHYs +from misoc.cores.coaxpress.phy.low_speed_serdes import HostTXPHYs from artiq.gateware.rtio import rtlink -from cxp_downconn import CXP_RXPHYs -from cxp_upconn import CXP_TXPHYs -from cxp_pipeline import ( - Command_Packet_Reader, - Command_Test_Packet_Writer, - Duplicated_Char_Decoder, - Heartbeat_Packet_Reader, - Idle_Word_Inserter, - Packet_Arbiter, - Packet_Wrapper, - Test_Sequence_Checker, - Trigger_ACK_Inserter, - Trigger_ACK_Reader, - Trigger_Inserter, - Trigger_Reader, -) from cxp_frame_pipeline import * import cxp_router from types import SimpleNamespace -class CXP_PHYS(Module, AutoCSR): - def __init__(self, refclk, upconn_pads, downconn_pads, sys_clk_freq, master=0): - assert len(upconn_pads) == len(downconn_pads) +class CXP_Host_PHYS(Module, AutoCSR): + def __init__(self, refclk, tx_pads, rx_pads, sys_clk_freq, master=0): + assert len(tx_pads) == len(rx_pads) - self.submodules.tx = CXP_TXPHYs(upconn_pads, sys_clk_freq) - self.submodules.rx = CXP_RXPHYs(refclk, downconn_pads, sys_clk_freq, master) + self.submodules.tx = HostTXPHYs(tx_pads, sys_clk_freq) + self.submodules.rx = HostRXPHYs(refclk, rx_pads, sys_clk_freq, master) self.phys = [] for tx, rx in zip(self.tx.phys, self.rx.phys): @@ -47,8 +33,8 @@ class CXP_Core(Module, AutoCSR): # Section 12.1.6 (CXP-001-2021) self.buffer_depth, self.nslots = command_buffer_depth, nrxslot - self.submodules.tx = TX_Pipeline(phy.tx, command_buffer_depth, False) - self.submodules.rx = RX_Pipeline(phy.rx, command_buffer_depth, nrxslot, False) + self.submodules.tx = HostTXCore(phy.tx, command_buffer_depth, False) + self.submodules.rx = HostRXCore(phy.rx, command_buffer_depth, nrxslot, False) def get_tx_port(self): return self.tx.writer.mem.get_port(write_capable=True) @@ -59,265 +45,6 @@ class CXP_Core(Module, AutoCSR): def get_mem_size(self): return word_width * self.buffer_depth * self.nslots // 8 -class RX_Pipeline(Module, AutoCSR): - def __init__(self, phy, command_buffer_depth, nslot, with_trigger): - self.ready = CSRStatus() - - self.trigger_ack = CSR() - - self.pending_packet = CSR() - self.read_ptr = CSRStatus(log2_int(nslot)) - self.reader_buffer_err = CSR() - - self.reader_decode_err = CSR() - self.test_error_counter = CSRStatus(16) - self.test_packet_counter = CSRStatus(16) - self.test_counts_reset = CSR() - - self.heartbeat = CSR() - self.host_id = CSRStatus(32) - self.device_time = CSRStatus(64) - - # for multilane router - self.active = Signal() - - if with_trigger: - self.trig = Signal() - self.trig_delay = Signal(char_width) - self.trig_linktrigger_n = Signal(char_width) - - # # # - - gtx = phy.gtx - self.sync += [ - self.ready.status.eq(gtx.rx_ready), - self.active.eq(gtx.rx_ready), - ] - - # Host rx pipeline - # - # 32 32+8(dchar) - # PHY ───/───> dchar ─────/─────> trigger ─────> trigger ack ─────> packet parser ─────> EOP Marker ─────> stream data packet - # decoder reader reader │ │ │ with CRC - # (optional) │ │ └──────> test sequence checker - # │ │ - # │ └─────────> heartbeat packet reader - # │ - # └────────────> command packet reader - # - cdr = ClockDomainsRenamer("cxp_gtx_rx") - - # decode all incoming data as duplicate char and inject the result into the bus for downstream modules - self.submodules.dchar_decoder = dchar_decoder = cdr(Duplicated_Char_Decoder()) - - # Priority level 0 packet - Trigger packet - if with_trigger: - self.submodules.trig_reader = trig_reader = cdr(Trigger_Reader()) - self.sync.cxp_gtx_rx += [ - self.trig.eq(trig_reader.trig), - self.trig_delay.eq(trig_reader.delay), - self.trig_linktrigger_n.eq(trig_reader.linktrigger_n), - ] - - # Priority level 1 packet - Trigger ack packet - self.submodules.trig_ack_reader= trig_ack_reader = cdr(Trigger_ACK_Reader()) - - self.submodules.trig_ack_ps = trig_ack_ps = PulseSynchronizer("cxp_gtx_rx", "sys") - self.sync.cxp_gtx_rx += trig_ack_ps.i.eq(trig_ack_reader.ack) - self.sync += [ - If(trig_ack_ps.o, - self.trigger_ack.w.eq(1), - ).Elif(self.trigger_ack.re, - self.trigger_ack.w.eq(0), - ), - ] - - # Priority level 2 packet - stream, test, heartbeat and command packets - self.submodules.arbiter = arbiter = cdr(Packet_Arbiter()) - - self.submodules.decode_err_ps = decode_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys") - self.sync.cxp_gtx_rx += decode_err_ps.i.eq(arbiter.decode_err) - self.sync += [ - If(decode_err_ps.o, - self.reader_decode_err.w.eq(1), - ).Elif(self.reader_decode_err.re, - self.reader_decode_err.w.eq(0), - ), - ] - - if with_trigger: - rx_pipeline = [phy, dchar_decoder, trig_reader, trig_ack_reader, arbiter] - else: - rx_pipeline = [phy, dchar_decoder, trig_ack_reader, arbiter] - for s, d in zip(rx_pipeline, rx_pipeline[1:]): - self.comb += s.source.connect(d.sink) - - # Stream packet - # Drop the K29.7 and mark the EOP for downstream - self.submodules.eop_marker = eop_marker = cdr(EOP_Marker()) - self.comb += arbiter.source_stream.connect(eop_marker.sink) - - # set pipeline source to output stream packet - self.source = eop_marker.source - - - # Test packet - self.submodules.test_seq_checker = test_seq_checker = cdr(Test_Sequence_Checker()) - self.comb += arbiter.source_test.connect(test_seq_checker.sink) - - self.submodules.test_reset_ps = test_reset_ps = PulseSynchronizer("sys", "cxp_gtx_rx") - self.comb += test_reset_ps.i.eq(self.test_counts_reset.re), - - test_err_cnt_rx = Signal.like(self.test_error_counter.status) - test_pak_cnt_rx = Signal.like(self.test_packet_counter.status) - test_err_r, test_pak_r = Signal(), Signal() - self.sync.cxp_gtx_rx += [ - test_err_r.eq(test_seq_checker.error), - test_pak_r.eq(arbiter.recv_test_pak), - - If(test_reset_ps.o, - test_err_cnt_rx.eq(test_err_cnt_rx.reset), - ).Elif(test_err_r, - test_err_cnt_rx.eq(test_err_cnt_rx + 1), - ), - If(test_reset_ps.o, - test_pak_cnt_rx.eq(test_pak_cnt_rx.reset), - ).Elif(test_pak_r, - test_pak_cnt_rx.eq(test_pak_cnt_rx + 1), - ), - ] - self.specials += [ - MultiReg(test_err_cnt_rx, self.test_error_counter.status), - MultiReg(test_pak_cnt_rx, self.test_packet_counter.status), - ] - - # Command packet - self.submodules.command_reader = command_reader = cdr(Command_Packet_Reader(command_buffer_depth, nslot)) - self.comb += arbiter.source_command.connect(command_reader.sink) - - # nslot buffers control interface - write_ptr_sys = Signal.like(command_reader.write_ptr) - - self.specials += [ - MultiReg(self.read_ptr.status, command_reader.read_ptr, odomain="cxp_gtx_rx"), - MultiReg(command_reader.write_ptr, write_ptr_sys) - ] - self.sync += [ - self.pending_packet.w.eq(self.read_ptr.status != write_ptr_sys), - If(~gtx.rx_ready, - self.read_ptr.status.eq(0), - ).Elif(self.pending_packet.re & self.pending_packet.w, - self.read_ptr.status.eq(self.read_ptr.status + 1), - ) - ] - - self.submodules.buffer_err_ps = buffer_err_ps = PulseSynchronizer("cxp_gtx_rx", "sys") - self.sync.cxp_gtx_rx += buffer_err_ps.i.eq(command_reader.buffer_err), - self.sync += [ - If(buffer_err_ps.o, - self.reader_buffer_err.w.eq(1), - ).Elif(self.reader_buffer_err.re, - self.reader_buffer_err.w.eq(0), - ), - ] - - # Heartbeat packet - self.submodules.heartbeat_reader = heartbeat_reader = cdr(Heartbeat_Packet_Reader()) - self.comb += arbiter.source_heartbeat.connect(heartbeat_reader.sink) - - self.specials += [ - MultiReg(heartbeat_reader.host_id, self.host_id.status), - MultiReg(heartbeat_reader.heartbeat, self.device_time.status), - ] - - self.submodules.heartbeat_ps = heartbeat_ps = PulseSynchronizer("cxp_gtx_rx", "sys") - self.sync.cxp_gtx_rx += heartbeat_ps.i.eq(arbiter.recv_heartbeat) - self.sync += [ - If(heartbeat_ps.o, - self.heartbeat.w.eq(1), - ).Elif(self.heartbeat.re, - self.heartbeat.w.eq(0), - ), - ] - - - - - -class TX_Pipeline(Module, AutoCSR): - def __init__(self, phy, command_buffer_depth, with_trigger_ack): - self.trig_stb = Signal() - self.trig_delay = Signal(char_width) - self.trig_linktrigger_mode = Signal() - - if with_trigger_ack: - self.trig_ack_stb = Signal() - - # # # - - # Host tx pipeline - # - # 32 32 8 - # command/test ───/───> packet ─────> idle word ─────> trigger ack ───/───> conv ───/───> trigger ─────> PHY - # packet writer wrapper inserter inserter inserter - # (optional) - # - # Equivalent transmission priority: - # trigger > tigger ack > idle word > command/test packet - # - # The pipeline is splited into 32 and 8 bits section to handle the word and char boundary priority insertion requirement: - # Insertion @ char boundary: trigger packets - # Insertion @ word boundary: idle packets and trigger ack packet - # - Section 9.2.4 (CXP-001-2021) - # - # The idle inserter is placed between the trigger ack inserter and command/test packet writer to maintain the trigger performance, - # as idle word should not be inserted into trigger and trigger ack packet - Section 9.2.5.1 (CXP-001-2021) - # - - - # Priority level 0 packet - Trigger packet - self.submodules.trig = trig = Trigger_Inserter() - self.comb += [ - trig.stb.eq(self.trig_stb), - trig.delay.eq(self.trig_delay), - trig.linktrig_mode.eq(self.trig_linktrigger_mode) - ] - - # Priority level 1 packet - Trigger ack - if with_trigger_ack: - self.submodules.trig_ack = trig_ack = Trigger_ACK_Inserter() - self.comb += self.trig_ack_stb.eq(trig_ack.stb) - - # Priority level 2 packet - command and test packet - # Control is not timing dependent, all the data packets are handled in firmware - self.submodules.writer = writer = Command_Test_Packet_Writer(command_buffer_depth) - - # writer memory control interface - self.writer_word_len = CSRStorage(log2_int(command_buffer_depth)) - self.writer_stb = CSR() - self.writer_stb_testseq = CSR() - self.writer_busy = CSRStatus() - - self.sync += [ - writer.word_len.eq(self.writer_word_len.storage), - writer.stb.eq(self.writer_stb.re), - writer.stb_testseq.eq(self.writer_stb_testseq.re), - self.writer_busy.status.eq(writer.busy), - ] - - # Misc - self.submodules.pak_wrp = pak_wrp = Packet_Wrapper() - self.submodules.idle = idle = Idle_Word_Inserter() - self.submodules.converter = converter = StrideConverter(word_layout, char_layout) - - if with_trigger_ack: - tx_pipeline = [writer, pak_wrp, idle, trig_ack, converter, trig, phy] - else: - tx_pipeline = [writer, pak_wrp, idle, converter, trig, phy] - - for s, d in zip(tx_pipeline, tx_pipeline[1:]): - self.comb += s.source.connect(d.sink) - class CXP_Frame_Pipeline(Module, AutoCSR): # optimal stream packet size is 2 KiB - Section 9.5.2 (CXP-001-2021) def __init__(self, pipelines, pmod_pads, roi_engine_count=1, res_width=16, count_width=31, master=0, packet_size=16384): @@ -371,7 +98,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR): # # # - cdr = ClockDomainsRenamer("cxp_gtx_rx") + cdr = ClockDomainsRenamer("cxp_gt_rx") debug_out = False @@ -379,13 +106,13 @@ class CXP_Frame_Pipeline(Module, AutoCSR): self.submodules.pixel_pipeline = pixel_pipeline = cdr(Pixel_Pipeline(res_width, count_width, packet_size)) # CRC error counter - self.submodules.crc_reset_ps = crc_reset_ps = PulseSynchronizer("sys", "cxp_gtx_rx") + self.submodules.crc_reset_ps = crc_reset_ps = PulseSynchronizer("sys", "cxp_gt_rx") self.comb += crc_reset_ps.i.eq(self.crc_error_reset.re) crc_error_cnt_rx = Signal.like(self.crc_error_cnt.status) crc_error_r = Signal() - self.sync.cxp_gtx_rx += [ + self.sync.cxp_gt_rx += [ # to improve timinig crc_error_r.eq(pixel_pipeline.crc_checker.error), @@ -405,14 +132,14 @@ class CXP_Frame_Pipeline(Module, AutoCSR): roi_boundary = Signal.like(target) self.sync.rio += If(self.config.o.stb & (self.config.o.address == 4*n+offset), roi_boundary.eq(self.config.o.data)) - self.specials += MultiReg(roi_boundary, target, "cxp_gtx_rx") + self.specials += MultiReg(roi_boundary, target, "cxp_gt_rx") roi_out = pixel_pipeline.roi.out update = Signal() - self.submodules.ps = ps = PulseSynchronizer("cxp_gtx_rx", "sys") - self.sync.cxp_gtx_rx += ps.i.eq(roi_out.update) + self.submodules.ps = ps = PulseSynchronizer("cxp_gt_rx", "sys") + self.sync.cxp_gt_rx += ps.i.eq(roi_out.update) self.sync += update.eq(ps.o) sentinel = 2**count_width @@ -438,7 +165,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR): x_size_rx, x_size_sys = Signal(3*char_width), Signal(3*char_width) y_size_rx, y_size_sys = Signal(3*char_width), Signal(3*char_width) y_pix_rx, y_pix_sys = Signal(res_width), Signal(res_width) - self.sync.cxp_gtx_rx += [ + self.sync.cxp_gt_rx += [ If(pixel_pipeline.header_decoder.new_line, new_line_cnt_rx.eq(new_line_cnt_rx + 1), ), @@ -485,7 +212,7 @@ class CXP_Frame_Pipeline(Module, AutoCSR): buffer_cdc_fifo = cdr(Buffer(word_layout_dchar)) # to improve timing cdc_fifo = stream.AsyncFIFO(word_layout_dchar, 2**log2_int(packet_size//word_width)) self.submodules += buffer, crc_checker, buffer_cdc_fifo - self.submodules += ClockDomainsRenamer({"write": "cxp_gtx_rx", "read": "sys"})(cdc_fifo) + self.submodules += ClockDomainsRenamer({"write": "cxp_gt_rx", "read": "sys"})(cdc_fifo) self.submodules.debug_out = debug_out = RX_Debug_Buffer(word_layout_dchar, 2**log2_int(packet_size//word_width)) pipeline = [buffer, crc_checker, buffer_cdc_fifo, cdc_fifo, debug_out] @@ -495,45 +222,64 @@ class CXP_Frame_Pipeline(Module, AutoCSR): # Routing WIP - # +---------+ +-------------+ - # downconn pipline ----->| | | |------> crc checker ------> raw stream data - # | arbiter |---->| broadcaster | - # downconn pipline ----->| | | |------> crc checker ------> raw stream data - # +---------+ +-------------+ + # +---------+ +-------------+ + # downconn pipline -----> buffer ----->| | | |------> crc checker ------> raw stream data + # | arbiter |----> buffer ---->| broadcaster | + # downconn pipline -----> buffer ----->| | | |------> crc checker ------> raw stream data + # +---------+ +-------------+ # - self.submodules.arbiter = arbiter = cdr(Stream_Arbiter(n_channels)) self.submodules.broadcaster = broadcaster = cdr(Stream_Broadcaster()) - # Connect pipeline - for i, p in enumerate(pipelines): - # Assume downconns pipeline already marks the eop - self.comb += p.rx.source.connect(arbiter.sinks[i]) + if n_channels > 1: + self.submodules.arbiter = arbiter = cdr(Stream_Arbiter(n_channels)) - self.comb += arbiter.source.connect(broadcaster.sink) + # Connect pipeline + for i, p in enumerate(pipelines): + buffer = cdr(Buffer(word_layout_dchar)) + self.submodules += buffer + # Assume downconns pipeline already marks the eop + self.comb += [ + p.rx.source.connect(buffer.sink), + buffer.source.connect(arbiter.sinks[i]), + ] + + self.submodules.buf = buf = cdr(Buffer(word_layout_dchar)) + self.comb += [ + arbiter.source.connect(buf.sink), + buf.source.connect(broadcaster.sink), + ] + + # Control interface + # only the simple topology MASTER:ch0, extension:ch1,2,3 is supported right now + active_channels_sys = Signal(n_channels) + for i, p in enumerate(pipelines): + # TODO: change this to non csr signal? + self.sync += active_channels_sys[i].eq(p.rx.active) + self.specials += MultiReg(active_channels_sys, arbiter.active_channels, odomain="cxp_gt_rx"), + # DEBUG: + self.sync += self.arbiter_active_ch.status.eq(active_channels_sys) + + else: + buffer = cdr(Buffer(word_layout_dchar)) + self.submodules += buffer + self.comb += [ + pipelines[0].rx.source.connect(buffer.sink), + buffer.source.connect(broadcaster.sink), + ] if not debug_out: self.comb += broadcaster.sources[0].connect(pixel_pipeline.sink), else: self.comb += broadcaster.sources[0].connect(pipeline[0].sink), - # Control interface - # only the simple topology MASTER:ch0, extension:ch1,2,3 is supported right now - active_channels_sys = Signal(n_channels) - for i, p in enumerate(pipelines): - # TODO: change this to non csr signal? - self.sync += active_channels_sys[i].eq(p.rx.active) - self.specials += MultiReg(active_channels_sys, arbiter.active_channels, odomain="cxp_gtx_rx"), - - # DEBUG: - self.sync += self.arbiter_active_ch.status.eq(active_channels_sys) for i, p in enumerate(pipelines): # self.comb += p.rx.source.ack.eq(1) rx_stb = Signal() - self.sync.cxp_gtx_rx += rx_stb.eq(p.rx.source.stb) + self.sync.cxp_gt_rx += rx_stb.eq(p.rx.source.stb) self.specials += [ # Instance("OBUF", i_I=rx_stb, o_O=pmod_pads[i]), # Instance("OBUF", i_I=arbiter.sinks[i].stb, o_O=pmod_pads[i]), @@ -578,7 +324,7 @@ class NEO_CXP_Frame_pipeline(Module): # # # - cdr = ClockDomainsRenamer("cxp_gtx_rx") + cdr = ClockDomainsRenamer("cxp_gt_rx") debug_out = False