diff --git a/src/gateware/cxp_frame_pipeline.py b/src/gateware/cxp_frame_pipeline.py new file mode 100644 index 0000000..bec9e87 --- /dev/null +++ b/src/gateware/cxp_frame_pipeline.py @@ -0,0 +1,705 @@ +from migen import * +from migen.genlib.coding import PriorityEncoder +from misoc.interconnect.csr import * +from misoc.interconnect import stream +from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine + +from cxp_pipeline import * +# from src.gateware.cxp_pipeline import * # for sim only + +from types import SimpleNamespace +from math import lcm + +pixel_width = 16 +pixel4x_layout = [ + ("data", pixel_width*4), + ("valid", 4), +] + +def switch_endianness(s): + assert len(s) % 8 == 0 + char = [s[i*8:(i+1)*8] for i in range(len(s)//8)] + return Cat(char[::-1]) + +class EOP_Inserter(Module): + def __init__(self): + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + # # # + + self.sync += [ + If((~self.source.stb | self.source.ack), + If(~((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)), + self.sink.connect(self.source, omit={"ack", "eop"}), + ).Else( + self.source.stb.eq(0), + ) + ), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + self.source.eop.eq(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1))), + ] + +class End_Of_Line_Inserter(Module): + def __init__(self): + self.l_size = Signal(3*char_width) + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint([("data", word_dw)]) # pixel data don't need k code + + # # # + + # TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed + # cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag + + # NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high + # Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless + + cnt = Signal.like(self.l_size, reset=1) + self.sync += [ + If((~self.source.stb | self.source.ack), + self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}), + If(self.sink.stb, + If(cnt == 1, + cnt.eq(self.l_size) + ).Else( + cnt.eq(cnt - 1), + ) + ), + ), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + # repurpose eop as end of line + self.source.eop.eq(cnt == 1), + ] + +class EOP_Marker(Module): + def __init__(self): + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + # # # + + last_stb = Signal() + self.sync += [ + If((~self.source.stb | self.source.ack), + self.source.stb.eq(self.sink.stb), + self.source.payload.eq(self.sink.payload), + ), + last_stb.eq(self.sink.stb), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + self.source.eop.eq(~self.sink.stb & last_stb), + ] + +class Stream_Arbiter(Module): + def __init__(self, n_downconn): + self.n_ext_active = Signal(max=n_downconn) + + self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_downconn)] + self.source = stream.Endpoint(word_layout_dchar) + # # # + + self.submodules.fsm = fsm = FSM(reset_state="COPY") + read_mask = Signal.like(self.n_ext_active) + connect_case = dict((i, s.connect(self.source)) for i, s in enumerate(self.sinks)) + fsm.act( + "COPY", + Case(read_mask, connect_case), + If(self.source.eop, + NextState("SWITCH_SOURCE"), + ), + ) + + # Section 9.5.5 (CXP-001-2021) + # When Multiple connections are active, stream packets are transmitted in + # ascending order of Connection ID (which we currently only support ch1->2->3->4). + # And one connection shall be transmitting data at a time. + fsm.act( + "SWITCH_SOURCE", + If(read_mask == self.n_ext_active, + NextValue(read_mask, read_mask.reset), + ).Else( + NextValue(read_mask, read_mask + 1), + ), + NextState("COPY"), + ) + +@ResetInserter() +@CEInserter() +class CXPCRC32(Module): + # Section 9.2.2.2 (CXP-001-2021) + width = 32 + polynom = 0x04C11DB7 + seed = 2**width - 1 + check = 0x00000000 + + def __init__(self, data_width): + self.data = Signal(data_width) + self.value = Signal(self.width) + self.error = Signal() + + # # # + + self.submodules.engine = LiteEthMACCRCEngine( + data_width, self.width, self.polynom + ) + reg = Signal(self.width, reset=self.seed) + self.sync += reg.eq(self.engine.next) + self.comb += [ + self.engine.data.eq(self.data), + self.engine.last.eq(reg), + self.value.eq(reg[::-1]), + self.error.eq(reg != self.check), + ] + +# For verifying crc in stream data packet +class CXPCRC32_Checker(Module): + def __init__(self): + self.error_cnt = Signal(16) + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + # # # + + self.submodules.crc = crc = CXPCRC32(word_dw) + self.comb += crc.data.eq(self.sink.data), + + self.submodules.fsm = fsm = FSM(reset_state="INIT") + fsm.act("INIT", + crc.reset.eq(1), + NextState("CHECKING"), + ) + + fsm.act("RESET", + crc.reset.eq(1), + If(crc.error, NextValue(self.error_cnt, self.error_cnt + 1)), + NextState("CHECKING"), + ) + + fsm.act("CHECKING", + If(self.sink.stb & self.sink.eop, + # discard the crc + self.sink.ack.eq(1), + NextState("RESET"), + ).Else( + self.sink.connect(self.source), + ), + If(self.sink.stb, + crc.ce.eq(1), + ), + ) + + +class Stream_Broadcaster(Module): + def __init__(self, n_buffer, default_id=0): + assert n_buffer > 0 + + self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)] + + self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)] + self.sink = stream.Endpoint(word_layout_dchar) + + # # # + + + + + self.stream_id = Signal(char_width) + self.pak_tag = Signal(char_width) + self.stream_pak_size = Signal(char_width * 2) + + self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER") + + fsm.act( + "WAIT_HEADER", + NextValue(self.stream_id, self.stream_id.reset), + NextValue(self.pak_tag, self.pak_tag.reset), + NextValue(self.stream_pak_size, self.stream_pak_size.reset), + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(self.stream_id, self.sink.dchar), + NextState("GET_PAK_TAG"), + ), + ) + + fsm.act( + "GET_PAK_TAG", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(self.pak_tag, self.sink.dchar), + NextState("GET_PAK_SIZE_0"), + ), + ) + + fsm.act( + "GET_PAK_SIZE_0", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(self.stream_pak_size[8:], self.sink.dchar), + NextState("GET_PAK_SIZE_1"), + ), + ) + + fsm.act( + "GET_PAK_SIZE_1", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(self.stream_pak_size[:8], self.sink.dchar), + NextState("STORE_BUFFER"), + ), + ) + + # routing decoder + sel = Signal(n_buffer) + no_match = Signal() + self.comb += sel[0].eq(self.stream_id == default_id) + for i, routing_id in enumerate(self.routing_table): + self.comb += sel[i+1].eq(self.stream_id == routing_id) + # DEBUG: disrecard the stream id = 0 rule + # self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0]) + + # ensure the lower source has priority when two or more bits of sel are high + self.submodules.coder = coder = PriorityEncoder(n_buffer) + sel_r = Signal.like(coder.o) + self.sync += [ + coder.i.eq(sel), + sel_r.eq(coder.o), + no_match.eq(coder.n), + ] + + routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources)) + routing["default"] = self.sink.ack.eq(1) # discard if invalid + fsm.act( + "STORE_BUFFER", + If(no_match, + self.sink.ack.eq(1), + ).Else( + Case(sel_r, routing), + ), + # assume downstream is not blocked + If(self.sink.stb, + NextValue(self.stream_pak_size, self.stream_pak_size - 1), + If(self.stream_pak_size == 0, + NextState("WAIT_HEADER"), + ) + ), + ) + +class Frame_Header_Decoder(Module): + def __init__(self): + self.format_error = Signal() + self.decode_err = Signal() + + self.new_frame = Signal() + self.new_line = Signal() + + # Table 47 (CXP-001-2021) + n_metadata_chars = 23 + img_header_layout = [ + ("stream_id", char_width), + ("source_tag", 2*char_width), + ("x_size", 3*char_width), + ("x_offset", 3*char_width), + ("y_size", 3*char_width), + ("y_offset", 3*char_width), + ("l_size", 3*char_width), # number of data words per image line + ("pixel_format", 2*char_width), + ("tap_geo", 2*char_width), + ("flag", char_width), + ] + assert layout_len(img_header_layout) == n_metadata_chars*char_width + + + # # # + + # TODO: decode Image header, line break + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + self.submodules.fsm = fsm = FSM(reset_state="IDLE") + + fsm.act("IDLE", + self.sink.ack.eq(1), + If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), + NextState("DECODE"), + ) + ) + + fsm.act("COPY", + # until for new line or new frame + If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), + self.sink.ack.eq(1), + NextState("DECODE"), + ).Else( + self.sink.connect(self.source), + ) + ) + + type = { + "new_frame": 0x01, + "line_break": 0x02, + } + + cnt = Signal(max=n_metadata_chars) + fsm.act("DECODE", + self.sink.ack.eq(1), + If(self.sink.stb, + Case(self.sink.dchar, { + type["new_frame"]: [ + self.new_frame.eq(1), + NextValue(cnt, cnt.reset), + NextState("GET_FRAME_DATA"), + ], + type["line_break"]: [ + self.new_line.eq(1), + NextState("COPY"), + ], + "default": [ + self.decode_err.eq(1), + # discard all data until valid frame header + NextState("IDLE"), + ], + }), + ) + ) + + packet_buffer = Signal(layout_len(img_header_layout)) + case = dict( + (i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar)) + for i in range(n_metadata_chars) + ) + fsm.act("GET_FRAME_DATA", + self.sink.ack.eq(1), + If(self.sink.stb, + Case(cnt, case), + If(cnt == n_metadata_chars - 1, + NextState("COPY"), + NextValue(cnt, cnt.reset), + ).Else( + NextValue(cnt, cnt + 1), + ), + ), + ) + + # dissect packet + self.metadata = SimpleNamespace() + idx = 0 + for name, size in img_header_layout: + # CXP use MSB even when sending duplicate chars + setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size])) + idx += size + +class Custom_Pixel_Gearbox(Module): + def __init__(self, size): + assert size in [8] + + self.x_size = Signal(3*char_width) + + i_dw, o_dw = word_dw, size*4 + self.sink = stream.Endpoint([("data", i_dw)]) + self.source = stream.Endpoint(pixel4x_layout) + + # # # + + io_lcm = lcm(i_dw, o_dw) + if (io_lcm//i_dw) < 2: + io_lcm = io_lcm * 2 + if (io_lcm//o_dw) < 2: + io_lcm = io_lcm * 2 + + + self.shift_register = Signal(io_lcm, reset_less=True) + + # Input sink + + i_inc = Signal() + i_count = Signal(max=io_lcm//i_dw) + + self.comb += [ + self.sink.ack.eq(1), # assume downstream is not blocked + i_inc.eq(self.sink.stb), + ] + + self.sync += [ + If(i_inc, + If(i_count == ((io_lcm//i_dw) - 1), + i_count.eq(i_count.reset), + ).Else( + i_count.eq(i_count + 1), + ) + ), + ] + + i_cases = {} + for i in range(io_lcm//i_dw): + i_cases[i] = [ + self.shift_register[i_dw*i:i_dw*(i+1)].eq(self.sink.data), + ] + self.sync += If(self.sink.stb, Case(i_count, i_cases)) + + # Output source + + level = Signal(max=io_lcm) + o_inc = Signal() + o_count = Signal(max=io_lcm//o_dw) + + self.comb += [ + self.source.stb.eq(level >= o_dw), + o_inc.eq(self.source.stb & self.source.ack) + ] + + self.sync += [ + If(o_inc, + If(o_count == ((io_lcm//o_dw) - 1), + o_count.eq(o_count.reset), + ).Else( + o_count.eq(o_count + 1), + ) + ), + If(i_inc & ~o_inc, level.eq(level + i_dw)), + If(~i_inc & o_inc, level.eq(level - o_dw)), + If(i_inc & o_inc, level.eq(level + i_dw - o_dw)), + ] + + o_cases = {} + for i in range(io_lcm//o_dw): + o_cases[i] = [] + for j in range(4): + o_cases[i].append( + self.source.data[pixel_width * j : pixel_width * (j + 1)].eq( + self.shift_register[(o_dw * i) + (size * j) : (o_dw * i) + (size * (j + 1))] + ) + ) + self.comb += Case(o_count, o_cases) + + # Handle line break + + # precalcule which pixels are valid + self.valid = Signal(4) + bit_cases = { + 0: self.valid.eq(0b1111), + 1: self.valid.eq(0b0001), + 2: self.valid.eq(0b0011), + 3: self.valid.eq(0b0111), + } + self.sync += Case(self.x_size[:2], bit_cases) + + # TODO: reset the o_count & i_count after eop + line_break_r = Signal() + self.sync += [ + line_break_r.eq(self.sink.eop), + If(line_break_r, + ) + ] + + # get which last pixels are valid + # use end of line to reset o_count, i_count, level & stb the last pixel + self.comb += [ + If(line_break_r, + self.source.valid.eq(self.valid), + ).Else( + self.source.valid.eq(0b1111), + ) + ] + + + + + +class Frame_Deserializer(Module): + def __init__(self, width, pixel_size): + self.new_frame = Signal() + self.l_size = Signal(3*char_width) + self.x_size = Signal(3*char_width) + + # # # + + self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter() + self.sync += eol_inserter.l_size.eq(self.l_size), + + + self.submodules.gearbox = gearbox = Custom_Pixel_Gearbox(8) + self.sync += gearbox.x_size.eq(self.x_size), + + self.comb += eol_inserter.source.connect(gearbox.sink) + + self.sink = eol_inserter.sink + self.source = gearbox.source + + + + + # # TODO: use this to control mux + # Table 34 (CXP-001-2021) + pix_fmt = { + "mono8": 0x0101, + "mono10": 0x0102, + "mono12": 0x0103, + "mono14": 0x0104, + "mono16": 0x0105, + } + # self.sync += [ + # pixel_format_r.eq(header_decoder.metadata.pixel_format), + # Case(pixel_format_r, + # { + # pix_fmt["mono8"]: pix_size.eq(8), + # pix_fmt["mono10"]: pix_size.eq(10), + # pix_fmt["mono12"]: pix_size.eq(12), + # pix_fmt["mono14"]: pix_size.eq(14), + # pix_fmt["mono16"]: pix_size.eq(16), + # "default": pix_size.eq(pix_size.reset), + # } + # ) + # ] + + +def inc_mod(s, m): + return [s.eq(s + 1), If(s == (m -1), s.eq(0))] + + +class Pixel_Gearbox(Module): + def __init__(self, i_dw, o_dw): + self.sink = sink = stream.Endpoint([("data", i_dw)]) + self.source = source = stream.Endpoint([("data", o_dw)]) + + # # # + + # From Litex + + # TODO: change this to purpose built module + # TODO: handle linebreak stb + # TODO: handle the last line may only contain 1, 2, 3 or 4 pixels + + # Section 10.4.2 (CXP-001-2021) + # the line data need to be fitted inside of 32*nbits where n is integers + # extra bits are padded with zero + # -> perhaps use this as advantage?? it's provided as DsizeL + # -> use DsizeL as another counter to indicate line break? + + io_lcm = lcm(i_dw, o_dw) + if (io_lcm//i_dw) < 2: + io_lcm = io_lcm * 2 + if (io_lcm//o_dw) < 2: + io_lcm = io_lcm * 2 + + # Control path + + level = Signal(max=io_lcm) + i_inc = Signal() + i_count = Signal(max=io_lcm//i_dw) + o_inc = Signal() + o_count = Signal(max=io_lcm//o_dw) + + self.comb += [ + sink.ack.eq(1), + # sink.ack.eq(level < (io_lcm - i_dw)), + source.stb.eq(level >= o_dw), + ] + self.comb += [ + i_inc.eq(sink.stb & sink.ack), + o_inc.eq(source.stb & source.ack) + ] + self.sync += [ + If(i_inc, *inc_mod(i_count, io_lcm//i_dw)), + If(o_inc, *inc_mod(o_count, io_lcm//o_dw)), + If(i_inc & ~o_inc, level.eq(level + i_dw)), + If(~i_inc & o_inc, level.eq(level - o_dw)), + If(i_inc & o_inc, level.eq(level + i_dw - o_dw)), + ] + + # Data path + + shift_register = Signal(io_lcm, reset_less=True) + + i_cases = {} + i_data = Signal(i_dw) + + self.comb += i_data.eq(sink.data) + for i in range(io_lcm//i_dw): + i_cases[i] = shift_register[io_lcm - i_dw*(i+1):io_lcm - i_dw*i].eq(i_data) + self.sync += If(sink.stb & sink.ack, Case(i_count, i_cases)) + + o_cases = {} + o_data = Signal(o_dw) + for i in range(io_lcm//o_dw): + o_cases[i] = o_data.eq(shift_register[io_lcm - o_dw*(i+1):io_lcm - o_dw*i]) + self.comb += Case(o_count, o_cases) + self.comb += source.data.eq(o_data) + + + +class ROI_Pipeline(Module): + def __init__(self, res_width=32, pixel_size=16): + + # NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module + # NOTE: TapGeo other than 1X-1Y are not supported + # TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?) + + self.submodules.crc_checker = crc_checker = CXPCRC32_Checker() + self.submodules.header_decoder = header_decoder = Frame_Header_Decoder() + self.submodules.deserializer = deserializer = Frame_Deserializer(res_width, pixel_size) + + self.comb += [ + deserializer.new_frame.eq(header_decoder.new_frame), + deserializer.l_size.eq(header_decoder.metadata.l_size), + deserializer.x_size.eq(header_decoder.metadata.x_size), + ] + + self.pipeline = [crc_checker, header_decoder, deserializer] + for s, d in zip(self.pipeline, self.pipeline[1:]): + self.comb += s.source.connect(d.sink) + self.sink = self.pipeline[0].sink + + # DEBUG + self.source = self.pipeline[-1].source + self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing + +class Frame_Packet_Router(Module): + # packet size expressed in bits + def __init__(self, downconns, framebuffer, packet_size, pmod_pads): + n_downconn = len(downconns) + n_buffer = len(framebuffer) + assert n_downconn > 0 and n_buffer > 0 + + self.n_ext_active = Signal(max=n_downconn) + self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)] + + # # # + + # +----------+ +-------------+ + # eop marker ----->| | | |------> crc checker + # | arbiter |---->| broadcaster | + # eop marker ----->| need eop | | |------> crc checker + # +----------+ +-------------+ + # + + self.submodules.arbiter = arbiter = Stream_Arbiter(n_downconn) + self.sync += arbiter.n_ext_active.eq(self.n_ext_active) + + self.submodules.broadcaster = broadcaster = Stream_Broadcaster(n_buffer) + for i, s in enumerate(self.routing_table): + self.sync += broadcaster.routing_table[i].eq(s) + + for i, d in enumerate(downconns): + # eop is needed for arbiter and crc checker to work correctly + eop_marker = EOP_Inserter() + self.submodules += eop_marker + self.comb += [ + d.source.connect(eop_marker.sink), + eop_marker.source.connect(arbiter.sinks[i]) + ] + + self.comb += arbiter.source.connect(broadcaster.sink) + + for i, fb in enumerate(framebuffer): + self.comb += broadcaster.sources[i].connect(fb.sink),