diff --git a/src/gateware/cxp_pipeline.py b/src/gateware/cxp_pipeline.py new file mode 100644 index 0000000..573cca9 --- /dev/null +++ b/src/gateware/cxp_pipeline.py @@ -0,0 +1,613 @@ +from migen import * +from migen.genlib.cdc import MultiReg +from misoc.interconnect.csr import * +from misoc.interconnect import stream + +from functools import reduce +from itertools import combinations +from operator import or_, and_ + +char_width = 8 +char_layout = [("data", char_width), ("k", char_width//8)] + +word_dw = 32 +word_layout = [("data", word_dw), ("k", word_dw//8)] + +word_layout_dchar = [ + ("data", word_dw), + ("k", word_dw//8), + ("dchar", char_width), + ("dchar_k", char_width//8), +] + +buffer_count = 4 +buffer_depth = 512 + +def K(x, y): + return ((y << 5) | x) + +KCode = { + "pak_start" : C(K(27, 7), char_width), + "io_ack" : C(K(28, 6), char_width), + "trig_indic_28_2" : C(K(28, 2), char_width), + "stream_marker" : C(K(28, 3), char_width), + "trig_indic_28_4" : C(K(28, 4), char_width), + "pak_end" : C(K(29, 7), char_width), + "idle_comma" : C(K(28, 5), char_width), + "idle_alignment" : C(K(28, 1), char_width), +} + +class Packet_Wrapper(Module): + def __init__(self): + self.sink = stream.Endpoint(word_layout) + self.source = stream.Endpoint(word_layout) + + # # # + + self.submodules.fsm = fsm = FSM(reset_state="IDLE") + + fsm.act("IDLE", + self.sink.ack.eq(1), + If(self.sink.stb, + self.sink.ack.eq(0), + NextState("INSERT_HEADER"), + ) + ) + + fsm.act("INSERT_HEADER", + self.sink.ack.eq(0), + self.source.stb.eq(1), + self.source.data.eq(Replicate(KCode["pak_start"], 4)), + self.source.k.eq(Replicate(1, 4)), + If(self.source.ack, NextState("COPY")), + ) + + fsm.act("COPY", + self.sink.connect(self.source), + self.source.eop.eq(0), + If(self.sink.stb & self.sink.eop & self.source.ack, + NextState("INSERT_FOOTER"), + ), + ) + + fsm.act("INSERT_FOOTER", + self.sink.ack.eq(0), + self.source.stb.eq(1), + self.source.data.eq(Replicate(KCode["pak_end"], 4)), + self.source.k.eq(Replicate(1, 4)), + self.source.eop.eq(1), + If(self.source.ack, NextState("IDLE")), + ) + +class TX_Trigger(Module): + def __init__(self): + self.stb = Signal() + self.delay = Signal(char_width) + self.linktrig_mode = Signal() + + # # # + + self.sink = stream.Endpoint(char_layout) + self.source = stream.Endpoint(char_layout) + + # Table 15 & 16 (CXP-001-2021) + # Send [K28.2, K28.4, K28.4] or [K28.4, K28.2, K28.2] and 3x delay as trigger packet + + trig_packet = [Signal(char_width), Signal(char_width), Signal(char_width), self.delay, self.delay, self.delay] + trig_packet_k = [1, 1, 1, 0, 0, 0] + self.comb += [ + If(self.linktrig_mode, + trig_packet[0].eq(KCode["trig_indic_28_4"]), + trig_packet[1].eq(KCode["trig_indic_28_2"]), + trig_packet[2].eq(KCode["trig_indic_28_2"]), + ).Else( + trig_packet[0].eq(KCode["trig_indic_28_2"]), + trig_packet[1].eq(KCode["trig_indic_28_4"]), + trig_packet[2].eq(KCode["trig_indic_28_4"]), + ), + ] + + self.submodules.fsm = fsm = FSM(reset_state="COPY") + + cnt = Signal(max=6) + fsm.act("COPY", + NextValue(cnt, cnt.reset), + self.sink.connect(self.source), + If(self.stb, NextState("WRITE_TRIG")) + ) + + fsm.act("WRITE_TRIG", + self.sink.ack.eq(0), + self.source.stb.eq(1), + self.source.data.eq(Array(trig_packet)[cnt]), + self.source.k.eq(Array(trig_packet_k)[cnt]), + If(self.source.ack, + If(cnt == 5, + NextState("COPY"), + ).Else( + NextValue(cnt, cnt + 1), + ) + ) + ) + +class Idle_Word_Inserter(Module): + def __init__(self): + # Section 9.2.5 (CXP-001-2021) + # Send K28.5, K28.1, K28.1, D21.5 as idle word + self.submodules.fsm = fsm = FSM(reset_state="WRITE_IDLE") + + self.sink = stream.Endpoint(word_layout) + self.source = stream.Endpoint(word_layout) + + # Section 9.2.5.1 (CXP-001-2021) + # IDLE should be transmitter every 10000 words + cnt = Signal(max=10000, reset=9999) + + fsm.act("WRITE_IDLE", + self.source.stb.eq(1), + self.source.data.eq(Cat(KCode["idle_comma"], KCode["idle_alignment"], KCode["idle_alignment"], C(0xB5, char_width))), + self.source.k.eq(Cat(1, 1, 1, 0)), + + self.sink.ack.eq(1), + If(self.sink.stb, + self.sink.ack.eq(0), + If(self.source.ack, + NextValue(cnt, cnt.reset), + NextState("COPY"), + ) + ), + ) + + fsm.act("COPY", + self.sink.connect(self.source), + # increment when upstream has data and got ack + If(self.sink.stb & self.source.ack, NextValue(cnt, cnt - 1)), + If((( (~self.sink.stb) | (self.sink.eop) | (cnt == 0) ) & self.source.ack), NextState("WRITE_IDLE")) + ) + + +class Trigger_ACK_Inserter(Module): + def __init__(self): + self.stb = Signal() + + # # # + + # Section 9.3.2 (CXP-001-2021) + # Send 4x K28.6 and 4x 0x01 as trigger packet ack + self.submodules.fsm = fsm = FSM(reset_state="COPY") + + self.sink = stream.Endpoint(word_layout) + self.source = stream.Endpoint(word_layout) + fsm.act("COPY", + self.sink.connect(self.source), + If(self.stb, NextState("WRITE_ACK0")) + ) + + fsm.act("WRITE_ACK0", + self.sink.ack.eq(0), + self.source.stb.eq(1), + self.source.data.eq(Replicate(KCode["io_ack"], 4)), + self.source.k.eq(Replicate(1, 4)), + If(self.source.ack, NextState("WRITE_ACK1")), + ) + + fsm.act("WRITE_ACK1", + self.sink.ack.eq(0), + self.source.stb.eq(1), + self.source.data.eq(Replicate(C(0x01, char_width), 4)), + self.source.k.eq(Replicate(0, 4)), + If(self.source.ack, NextState("COPY")), + ) + + +@FullMemoryWE() +class TX_Bootstrap(Module, AutoCSR): + def __init__(self): + self.tx_word_len = CSRStorage(log2_int(buffer_depth)) + self.tx = CSR() + self.tx_testseq = CSR() + + self.tx_busy = CSRStatus() + + # # # + + self.specials.mem = mem = Memory(word_dw, buffer_depth) + self.specials.mem_port = mem_port = mem.get_port() + self.source = stream.Endpoint(word_layout) + + # increment addr in the same cycle the moment addr_inc is high + # as memory takes one cycle to shift to the correct addr + addr_next = Signal(log2_int(buffer_depth)) + addr = Signal.like(addr_next) + addr_rst = Signal() + addr_inc = Signal() + self.sync += addr.eq(addr_next), + + self.comb += [ + addr_next.eq(addr), + If(addr_rst, + addr_next.eq(addr_next.reset), + ).Elif(addr_inc, + addr_next.eq(addr + 1), + ), + mem_port.adr.eq(addr_next), + ] + + self.submodules.fsm = fsm = FSM(reset_state="IDLE") + self.sync += self.tx_busy.status.eq(~fsm.ongoing("IDLE")) + + cnt = Signal(max=0xFFF) + fsm.act("IDLE", + addr_rst.eq(1), + If(self.tx.re, NextState("TRANSMIT")), + If(self.tx_testseq.re, + NextValue(cnt, cnt.reset), + NextState("WRITE_TEST_PACKET_TYPE"), + ) + ) + + fsm.act("TRANSMIT", + self.source.stb.eq(1), + self.source.data.eq(mem_port.dat_r), + If(self.source.ack, + addr_inc.eq(1), + ), + If(addr_next == self.tx_word_len.storage, + self.source.eop.eq(1), + NextState("IDLE") + ) + ) + + fsm.act("WRITE_TEST_PACKET_TYPE", + self.source.stb.eq(1), + self.source.data.eq(Replicate(C(0x04, char_width), 4)), + self.source.k.eq(Replicate(0, 4)), + If(self.source.ack,NextState("WRITE_TEST_COUNTER")) + ) + + fsm.act("WRITE_TEST_COUNTER", + self.source.stb.eq(1), + self.source.data[:8].eq(cnt[:8]), + self.source.data[8:16].eq(cnt[:8]+1), + self.source.data[16:24].eq(cnt[:8]+2), + self.source.data[24:].eq(cnt[:8]+3), + self.source.k.eq(Cat(0, 0, 0, 0)), + If(self.source.ack, + If(cnt == 0xFFC, + self.source.eop.eq(1), + NextState("IDLE") + ).Else( + NextValue(cnt, cnt + 4), + ) + + ) + ) + +class RX_Debug_Buffer(Module,AutoCSR): + def __init__(self, layout, size): + self.submodules.buf_out = buf_out = stream.SyncFIFO(layout, size, True) + self.sink = buf_out.sink + + self.inc = CSR() + self.dout_valid = CSRStatus() + + self.dout_pak = CSRStatus(word_dw) + self.kout_pak = CSRStatus(word_dw//8) + self.crc_error = CSRStatus() + self.eop = CSRStatus() + + self.sync += [ + buf_out.source.ack.eq(self.inc.re), + self.dout_valid.status.eq(buf_out.source.stb), + # output + self.eop.status.eq(buf_out.source.eop), + self.dout_pak.status.eq(buf_out.source.data), + self.kout_pak.status.eq(buf_out.source.k), + # self.crc_error.status.eq(buf_out.source.error) + ] + +class Duplicated_Char_Decoder(Module): + def __init__(self): + self.sink = stream.Endpoint(word_layout) + self.source = stream.Endpoint(word_layout_dchar) + + # # # + + + # For duplicated characters, an error correction method (e.g. majority voting) is required to meet the CXP spec: + # RX decoder should immune to single bit errors when handling duplicated characters - Section 9.2.2.1 (CXP-001-2021) + # + # + # 32 + # +---> buffer -----/-----+ + # 32 | | 32+8(dchar) + # sink ---/---+ ---> source -----/-----> downstream + # | 8(dchar) | decoders + # +---> majority -----/-----+ + # voting + # + # + # Due to the tight setup/hold time requiremnt for 12.5Gbps CXP, the voting logic cannot be implemented as combinational logic + # Hence, a pipeline approach is needed to avoid any s/h violation, where the majority voting result are pre-calculate and injected into the bus immediate after the PHY. + # And any downstream modules can access the voting result anytime + + # cycle 1 - buffer data & calculate intermediate result + buffer = stream.Endpoint(word_layout) + self.sync += [ + If((~buffer.stb | buffer.ack), + self.sink.connect(buffer, omit={"ack"}), + ) + ] + self.comb += self.sink.ack.eq(~buffer.stb | buffer.ack) + + # calculate ABC, ABD, ACD, BCD + char = [[self.sink.data[i*8:(i+1)*8], self.sink.k[i]] for i in range(4)] + voters = [Record([("data", 8), ("k", 1)]) for _ in range(4)] + + for i, comb in enumerate(combinations(char, 3)): + self.sync += [ + If((~buffer.stb | buffer.ack), + voters[i].data.eq(reduce(and_, [code[0] for code in comb])), + voters[i].k.eq(reduce(and_, [code[1] for code in comb])), + ) + ] + + + # cycle 2 - inject the voting result + self.sync += [ + If((~self.source.stb | self.source.ack), + buffer.connect(self.source, omit={"ack", "dchar", "dchar_k"}), + self.source.dchar.eq(Replicate(reduce(or_, [v.data for v in voters]), 4)), + self.source.dchar_k.eq(Replicate(reduce(or_, [v.k for v in voters]), 4)), + ) + ] + self.comb += buffer.ack.eq(~self.source.stb | self.source.ack) + + +@FullMemoryWE() +class RX_Bootstrap(Module): + def __init__(self): + self.packet_type = Signal(8) + + self.decode_err = Signal() + self.buffer_err = Signal() + + self.test_err_cnt = Signal(16) + self.test_pak_cnt = Signal(16) + self.test_cnt_reset = Signal() + # # # + + type = { + "data_stream": 0x01, + "control_ack_no_tag": 0x03, + "test_packet": 0x04, + "control_ack_with_tag": 0x06, + "event": 0x07, + "heartbeat": 0x09, + } + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + self.submodules.fsm = fsm = FSM(reset_state="IDLE") + + fsm.act("IDLE", + self.sink.ack.eq(1), + If((self.sink.stb & (self.sink.dchar == KCode["pak_start"]) & (self.sink.dchar_k == 1)), + NextState("DECODE"), + ) + ) + + cnt = Signal(max=0x100) + addr_nbits = log2_int(buffer_depth) + addr = Signal(addr_nbits) + test_pak = Signal() + fsm.act("DECODE", + self.sink.ack.eq(1), + If(self.sink.stb, + Case(self.sink.dchar, { + type["data_stream"]: NextState("STREAMING"), + type["test_packet"]: [ + test_pak.eq(1), + NextValue(cnt, cnt.reset), + NextState("VERIFY_TEST_PATTERN"), + ], + type["control_ack_no_tag"]:[ + NextValue(self.packet_type, self.sink.dchar), + NextValue(addr, addr.reset), + NextState("LOAD_BUFFER"), + ], + type["control_ack_with_tag"]:[ + NextValue(self.packet_type, self.sink.dchar), + NextValue(addr, addr.reset), + NextState("LOAD_BUFFER"), + ], + type["event"]: [ + NextValue(self.packet_type, self.sink.dchar), + NextValue(addr, addr.reset), + NextState("LOAD_BUFFER"), + ], + type["heartbeat"] : [ + # TODO: handle heartbeat + NextState("IDLE"), + ], + "default": [ + self.decode_err.eq(1), + # wait till next valid packet + NextState("IDLE"), + ], + }), + ) + ) + # For stream data packet + fsm.act("STREAMING", + self.sink.connect(self.source), + # assume downstream is not blocked + If((self.sink.stb & (self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)), + NextState("IDLE") + ) + ) + + # Section 9.9.1 (CXP-001-2021) + # the received test data packet (0x00, 0x01 ... 0xFF) + # need to be compared against the local test sequence generator + # TODO: improve this to avoid tight setup/hold time + test_err = Signal() + fsm.act("VERIFY_TEST_PATTERN", + self.sink.ack.eq(1), + If(self.sink.stb, + If(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)), + NextState("IDLE"), + ).Else( + If(self.sink.data[:8] != cnt, + test_err.eq(1), + ).Elif(self.sink.data[8:16] != cnt + 1, + test_err.eq(1), + ).Elif(self.sink.data[16:24] != cnt + 2, + test_err.eq(1), + ).Elif(self.sink.data[24:] != cnt + 3, + test_err.eq(1), + ), + + If(cnt == 0xFC, + NextValue(cnt, cnt.reset), + ).Else( + NextValue(cnt, cnt + 4) + ) + ) + ) + + ) + + self.sync += [ + If(self.test_cnt_reset, + self.test_err_cnt.eq(self.test_err_cnt.reset), + ).Elif(test_err, + self.test_err_cnt.eq(self.test_err_cnt + 1), + ), + If(self.test_cnt_reset, + self.test_pak_cnt.eq(self.test_pak_cnt.reset), + ).Elif(test_pak, + self.test_pak_cnt.eq(self.test_pak_cnt + 1), + ) + ] + + # A circular buffer for firmware to read packet from + self.specials.mem = mem = Memory(word_dw, buffer_count*buffer_depth) + self.specials.mem_port = mem_port = mem.get_port(write_capable=True) + + # buffered mem_port to improve timing + buf_mem_we = Signal.like(mem_port.we) + buf_mem_dat_w = Signal.like(mem_port.dat_w) + buf_mem_adr = Signal.like(mem_port.adr) + self.sync += [ + mem_port.we.eq(buf_mem_we), + mem_port.dat_w.eq(buf_mem_dat_w), + mem_port.adr.eq(buf_mem_adr) + ] + + write_ptr = Signal(log2_int(buffer_count)) + self.write_ptr_sys = Signal.like(write_ptr) + self.specials += MultiReg(write_ptr, self.write_ptr_sys), + + self.comb += [ + buf_mem_adr[:addr_nbits].eq(addr), + buf_mem_adr[addr_nbits:].eq(write_ptr), + ] + + # For control ack, event packet + fsm.act("LOAD_BUFFER", + buf_mem_we.eq(0), + self.sink.ack.eq(1), + If(self.sink.stb, + If(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)), + NextState("MOVE_BUFFER_PTR"), + ).Else( + buf_mem_we.eq(1), + buf_mem_dat_w.eq(self.sink.data), + NextValue(addr, addr + 1), + If(addr == buffer_depth - 1, + # discard the packet + self.buffer_err.eq(1), + NextState("IDLE"), + ) + ) + ) + ) + + self.read_ptr_rx = Signal.like(write_ptr) + fsm.act("MOVE_BUFFER_PTR", + self.sink.ack.eq(0), + If(write_ptr + 1 == self.read_ptr_rx, + # if next one hasn't been read, overwrite the current buffer when new packet comes in + self.buffer_err.eq(1), + ).Else( + NextValue(write_ptr, write_ptr + 1), + ), + NextState("IDLE"), + ) + +class Trigger_Ack_Checker(Module, AutoCSR): + def __init__(self): + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + self.ack = Signal() + + # # # + + self.submodules.fsm = fsm = FSM(reset_state="COPY") + + fsm.act("COPY", + If((self.sink.stb & (self.sink.dchar == KCode["io_ack"]) & (self.sink.dchar_k == 1)), + # discard K28,6 + self.sink.ack.eq(1), + NextState("CHECK_ACK") + ).Else( + self.sink.connect(self.source), + ) + ) + + fsm.act("CHECK_ACK", + If(self.sink.stb, + NextState("COPY"), + # discard the word after K28,6 + self.sink.ack.eq(1), + If((self.sink.dchar == 0x01) & (self.sink.dchar_k == 0), + self.ack.eq(1), + ) + ) + ) + +class Buffer(Module): + def __init__(self, layout, omits=None): + self.sink = stream.Endpoint(layout) + self.source = stream.Endpoint(layout) + + # # # + + self.sync += [ + If((~self.source.stb | self.source.ack), + self.sink.connect(self.source, omit={"ack"}.union(omits) if omits else {"ack"}), + ), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + ] + +class DChar_Dropper(Module): + def __init__(self): + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout) + + # # # + + self.sync += [ + If((~self.source.stb | self.source.ack), + self.sink.connect(self.source, omit={"ack", "dchar", "dchar_k", "error"}), + ), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + ] +