from migen import * from migen.genlib.coding import PriorityEncoder from misoc.interconnect.csr import * from misoc.interconnect import stream from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine from cxp_pipeline import * # from src.gateware.cxp_pipeline import * # for sim only from types import SimpleNamespace from math import lcm pixel_width = 16 pixel4x_layout = [ ("data", pixel_width*4), ("valid", 4), ] def switch_endianness(s): assert len(s) % 8 == 0 char = [s[i*8:(i+1)*8] for i in range(len(s)//8)] return Cat(char[::-1]) class EOP_Inserter(Module): def __init__(self): self.sink = stream.Endpoint(word_layout_dchar) self.source = stream.Endpoint(word_layout_dchar) # # # self.sync += [ If((~self.source.stb | self.source.ack), If(~((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)), self.sink.connect(self.source, omit={"ack", "eop"}), ).Else( self.source.stb.eq(0), ) ), ] self.comb += [ self.sink.ack.eq(~self.source.stb | self.source.ack), self.source.eop.eq(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1))), ] class End_Of_Line_Inserter(Module): def __init__(self): self.l_size = Signal(3*char_width) self.sink = stream.Endpoint(word_layout_dchar) self.source = stream.Endpoint([("data", word_dw)]) # pixel data don't need k code # # # # TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed # cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag # NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high # Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless cnt = Signal.like(self.l_size, reset=1) self.sync += [ If((~self.source.stb | self.source.ack), self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}), If(self.sink.stb, If(cnt == 1, cnt.eq(self.l_size) ).Else( cnt.eq(cnt - 1), ) ), ), ] self.comb += [ self.sink.ack.eq(~self.source.stb | self.source.ack), # repurpose eop as end of line self.source.eop.eq(cnt == 1), ] class EOP_Marker(Module): def __init__(self): self.sink = stream.Endpoint(word_layout_dchar) self.source = stream.Endpoint(word_layout_dchar) # # # last_stb = Signal() self.sync += [ If((~self.source.stb | self.source.ack), self.source.stb.eq(self.sink.stb), self.source.payload.eq(self.sink.payload), ), last_stb.eq(self.sink.stb), ] self.comb += [ self.sink.ack.eq(~self.source.stb | self.source.ack), self.source.eop.eq(~self.sink.stb & last_stb), ] class Stream_Arbiter(Module): def __init__(self, n_downconn): self.n_ext_active = Signal(max=n_downconn) self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_downconn)] self.source = stream.Endpoint(word_layout_dchar) # # # self.submodules.fsm = fsm = FSM(reset_state="COPY") read_mask = Signal.like(self.n_ext_active) connect_case = dict((i, s.connect(self.source)) for i, s in enumerate(self.sinks)) fsm.act( "COPY", Case(read_mask, connect_case), If(self.source.eop, NextState("SWITCH_SOURCE"), ), ) # Section 9.5.5 (CXP-001-2021) # When Multiple connections are active, stream packets are transmitted in # ascending order of Connection ID (which we currently only support ch1->2->3->4). # And one connection shall be transmitting data at a time. fsm.act( "SWITCH_SOURCE", If(read_mask == self.n_ext_active, NextValue(read_mask, read_mask.reset), ).Else( NextValue(read_mask, read_mask + 1), ), NextState("COPY"), ) @ResetInserter() @CEInserter() class CXPCRC32(Module): # Section 9.2.2.2 (CXP-001-2021) width = 32 polynom = 0x04C11DB7 seed = 2**width - 1 check = 0x00000000 def __init__(self, data_width): self.data = Signal(data_width) self.value = Signal(self.width) self.error = Signal() # # # self.submodules.engine = LiteEthMACCRCEngine( data_width, self.width, self.polynom ) reg = Signal(self.width, reset=self.seed) self.sync += reg.eq(self.engine.next) self.comb += [ self.engine.data.eq(self.data), self.engine.last.eq(reg), self.value.eq(reg[::-1]), self.error.eq(reg != self.check), ] # For verifying crc in stream data packet class CXPCRC32_Checker(Module): def __init__(self): self.error_cnt = Signal(16) self.sink = stream.Endpoint(word_layout_dchar) self.source = stream.Endpoint(word_layout_dchar) # # # self.submodules.crc = crc = CXPCRC32(word_dw) self.comb += crc.data.eq(self.sink.data), self.submodules.fsm = fsm = FSM(reset_state="INIT") fsm.act("INIT", crc.reset.eq(1), NextState("CHECKING"), ) fsm.act("RESET", crc.reset.eq(1), If(crc.error, NextValue(self.error_cnt, self.error_cnt + 1)), NextState("CHECKING"), ) fsm.act("CHECKING", If(self.sink.stb & self.sink.eop, # discard the crc self.sink.ack.eq(1), NextState("RESET"), ).Else( self.sink.connect(self.source), ), If(self.sink.stb, crc.ce.eq(1), ), ) class Stream_Broadcaster(Module): def __init__(self, n_buffer, default_id=0): assert n_buffer > 0 self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)] self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)] self.sink = stream.Endpoint(word_layout_dchar) # # # self.stream_id = Signal(char_width) self.pak_tag = Signal(char_width) self.stream_pak_size = Signal(char_width * 2) self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER") fsm.act( "WAIT_HEADER", NextValue(self.stream_id, self.stream_id.reset), NextValue(self.pak_tag, self.pak_tag.reset), NextValue(self.stream_pak_size, self.stream_pak_size.reset), self.sink.ack.eq(1), If( self.sink.stb, NextValue(self.stream_id, self.sink.dchar), NextState("GET_PAK_TAG"), ), ) fsm.act( "GET_PAK_TAG", self.sink.ack.eq(1), If( self.sink.stb, NextValue(self.pak_tag, self.sink.dchar), NextState("GET_PAK_SIZE_0"), ), ) fsm.act( "GET_PAK_SIZE_0", self.sink.ack.eq(1), If( self.sink.stb, NextValue(self.stream_pak_size[8:], self.sink.dchar), NextState("GET_PAK_SIZE_1"), ), ) fsm.act( "GET_PAK_SIZE_1", self.sink.ack.eq(1), If( self.sink.stb, NextValue(self.stream_pak_size[:8], self.sink.dchar), NextState("STORE_BUFFER"), ), ) # routing decoder sel = Signal(n_buffer) no_match = Signal() self.comb += sel[0].eq(self.stream_id == default_id) for i, routing_id in enumerate(self.routing_table): self.comb += sel[i+1].eq(self.stream_id == routing_id) # DEBUG: disrecard the stream id = 0 rule # self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0]) # ensure the lower source has priority when two or more bits of sel are high self.submodules.coder = coder = PriorityEncoder(n_buffer) sel_r = Signal.like(coder.o) self.sync += [ coder.i.eq(sel), sel_r.eq(coder.o), no_match.eq(coder.n), ] routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources)) routing["default"] = self.sink.ack.eq(1) # discard if invalid fsm.act( "STORE_BUFFER", If(no_match, self.sink.ack.eq(1), ).Else( Case(sel_r, routing), ), # assume downstream is not blocked If(self.sink.stb, NextValue(self.stream_pak_size, self.stream_pak_size - 1), If(self.stream_pak_size == 0, NextState("WAIT_HEADER"), ) ), ) class Frame_Header_Decoder(Module): def __init__(self): self.format_error = Signal() self.decode_err = Signal() self.new_frame = Signal() self.new_line = Signal() # Table 47 (CXP-001-2021) n_metadata_chars = 23 img_header_layout = [ ("stream_id", char_width), ("source_tag", 2*char_width), ("x_size", 3*char_width), ("x_offset", 3*char_width), ("y_size", 3*char_width), ("y_offset", 3*char_width), ("l_size", 3*char_width), # number of data words per image line ("pixel_format", 2*char_width), ("tap_geo", 2*char_width), ("flag", char_width), ] assert layout_len(img_header_layout) == n_metadata_chars*char_width # # # # TODO: decode Image header, line break self.sink = stream.Endpoint(word_layout_dchar) self.source = stream.Endpoint(word_layout_dchar) self.submodules.fsm = fsm = FSM(reset_state="IDLE") fsm.act("IDLE", self.sink.ack.eq(1), If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), NextState("DECODE"), ) ) fsm.act("COPY", # until for new line or new frame If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), self.sink.ack.eq(1), NextState("DECODE"), ).Else( self.sink.connect(self.source), ) ) type = { "new_frame": 0x01, "line_break": 0x02, } cnt = Signal(max=n_metadata_chars) fsm.act("DECODE", self.sink.ack.eq(1), If(self.sink.stb, Case(self.sink.dchar, { type["new_frame"]: [ self.new_frame.eq(1), NextValue(cnt, cnt.reset), NextState("GET_FRAME_DATA"), ], type["line_break"]: [ self.new_line.eq(1), NextState("COPY"), ], "default": [ self.decode_err.eq(1), # discard all data until valid frame header NextState("IDLE"), ], }), ) ) packet_buffer = Signal(layout_len(img_header_layout)) case = dict( (i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar)) for i in range(n_metadata_chars) ) fsm.act("GET_FRAME_DATA", self.sink.ack.eq(1), If(self.sink.stb, Case(cnt, case), If(cnt == n_metadata_chars - 1, NextState("COPY"), NextValue(cnt, cnt.reset), ).Else( NextValue(cnt, cnt + 1), ), ), ) # dissect packet self.metadata = SimpleNamespace() idx = 0 for name, size in img_header_layout: # CXP use MSB even when sending duplicate chars setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size])) idx += size class Custom_Pixel_Gearbox(Module): def __init__(self, size): assert size in [8, 16] self.x_size = Signal(3*char_width) sink_dw, source_dw = word_dw, size*4 self.sink = stream.Endpoint([("data", sink_dw)]) self.source = stream.Endpoint(pixel4x_layout) # # # shift_reg_size = lcm(sink_dw, source_dw) # ensure the shift register is at least twice the size of sink/source dw if (shift_reg_size//sink_dw) < 2: shift_reg_size = shift_reg_size * 2 if (shift_reg_size//source_dw) < 2: shift_reg_size = shift_reg_size * 2 # Control interface reset_reg = Signal() level = Signal(max=shift_reg_size) we = Signal() re = Signal() self.w_cnt = Signal(max=shift_reg_size//sink_dw) self.r_cnt = Signal(max=shift_reg_size//source_dw) self.sync += [ If(reset_reg, level.eq(level.reset), ).Else( If(we & ~re, level.eq(level + sink_dw)), If(~we & re, level.eq(level - source_dw)), If(we & re, level.eq(level + sink_dw - source_dw)), ), If(reset_reg, self.w_cnt.eq(self.w_cnt.reset), self.r_cnt.eq(self.r_cnt.reset), ).Else( If(we, If(self.w_cnt == ((shift_reg_size//sink_dw) - 1), self.w_cnt.eq(self.w_cnt.reset), ).Else( self.w_cnt.eq(self.w_cnt + 1), ) ), If(re, If(self.r_cnt == ((shift_reg_size//source_dw) - 1), self.r_cnt.eq(self.r_cnt.reset), ).Else( self.r_cnt.eq(self.r_cnt + 1), ) ), ) ] self.submodules.fsm = fsm = FSM(reset_state="COPY") fsm.act( "COPY", self.sink.ack.eq(1), self.source.stb.eq(level >= source_dw), we.eq(self.sink.stb), re.eq((self.source.stb & self.source.ack)), If(self.sink.stb & self.sink.eop, NextState("NEWLINE"), ), ) valid_stb = Signal() fsm.act( "NEWLINE", reset_reg.eq(1), valid_stb.eq(1), self.source.stb.eq(1), NextState("COPY"), ) # Data path self.shift_register = Signal(shift_reg_size, reset_less=True) sink_cases = {} for i in range(shift_reg_size//sink_dw): sink_cases[i] = [ self.shift_register[sink_dw*i:sink_dw*(i+1)].eq(self.sink.data), ] self.sync += If(self.sink.stb, Case(self.w_cnt, sink_cases)) source_cases = {} for i in range(shift_reg_size//source_dw): source_cases[i] = [] for j in range(4): source_cases[i].append( self.source.data[pixel_width * j : pixel_width * (j + 1)].eq( self.shift_register[(source_dw * i) + (size * j) : (source_dw * i) + (size * (j + 1))] ) ) # precalcule which last pixels are valid self.valid = Signal(4) bit_cases = { 0: self.valid.eq(0b1111), 1: self.valid.eq(0b0001), 2: self.valid.eq(0b0011), 3: self.valid.eq(0b0111), } self.sync += Case(self.x_size[:2], bit_cases) self.comb += [ Case(self.r_cnt, source_cases), If(valid_stb, self.source.valid.eq(self.valid), ).Else( self.source.valid.eq(0b1111), ), ] class Frame_Deserializer(Module): def __init__(self, width, pixel_size): self.new_frame = Signal() self.l_size = Signal(3*char_width) self.x_size = Signal(3*char_width) # # # self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter() self.sync += eol_inserter.l_size.eq(self.l_size), self.submodules.gearbox = gearbox = Custom_Pixel_Gearbox(16) self.sync += gearbox.x_size.eq(self.x_size), self.comb += eol_inserter.source.connect(gearbox.sink) self.sink = eol_inserter.sink self.source = gearbox.source # # TODO: use this to control mux # Table 34 (CXP-001-2021) pix_fmt = { "mono8": 0x0101, "mono10": 0x0102, "mono12": 0x0103, "mono14": 0x0104, "mono16": 0x0105, } # self.sync += [ # pixel_format_r.eq(header_decoder.metadata.pixel_format), # Case(pixel_format_r, # { # pix_fmt["mono8"]: pix_size.eq(8), # pix_fmt["mono10"]: pix_size.eq(10), # pix_fmt["mono12"]: pix_size.eq(12), # pix_fmt["mono14"]: pix_size.eq(14), # pix_fmt["mono16"]: pix_size.eq(16), # "default": pix_size.eq(pix_size.reset), # } # ) # ] def inc_mod(s, m): return [s.eq(s + 1), If(s == (m -1), s.eq(0))] class Pixel_Gearbox(Module): def __init__(self, i_dw, o_dw): self.sink = sink = stream.Endpoint([("data", i_dw)]) self.source = source = stream.Endpoint([("data", o_dw)]) # # # # From Litex # TODO: change this to purpose built module # TODO: handle linebreak stb # TODO: handle the last line may only contain 1, 2, 3 or 4 pixels # Section 10.4.2 (CXP-001-2021) # the line data need to be fitted inside of 32*nbits where n is integers # extra bits are padded with zero # -> perhaps use this as advantage?? it's provided as DsizeL # -> use DsizeL as another counter to indicate line break? io_lcm = lcm(i_dw, o_dw) if (io_lcm//i_dw) < 2: io_lcm = io_lcm * 2 if (io_lcm//o_dw) < 2: io_lcm = io_lcm * 2 # Control path level = Signal(max=io_lcm) i_inc = Signal() i_count = Signal(max=io_lcm//i_dw) o_inc = Signal() o_count = Signal(max=io_lcm//o_dw) self.comb += [ sink.ack.eq(1), # sink.ack.eq(level < (io_lcm - i_dw)), source.stb.eq(level >= o_dw), ] self.comb += [ i_inc.eq(sink.stb & sink.ack), o_inc.eq(source.stb & source.ack) ] self.sync += [ If(i_inc, *inc_mod(i_count, io_lcm//i_dw)), If(o_inc, *inc_mod(o_count, io_lcm//o_dw)), If(i_inc & ~o_inc, level.eq(level + i_dw)), If(~i_inc & o_inc, level.eq(level - o_dw)), If(i_inc & o_inc, level.eq(level + i_dw - o_dw)), ] # Data path shift_register = Signal(io_lcm, reset_less=True) i_cases = {} i_data = Signal(i_dw) self.comb += i_data.eq(sink.data) for i in range(io_lcm//i_dw): i_cases[i] = shift_register[io_lcm - i_dw*(i+1):io_lcm - i_dw*i].eq(i_data) self.sync += If(sink.stb & sink.ack, Case(i_count, i_cases)) o_cases = {} o_data = Signal(o_dw) for i in range(io_lcm//o_dw): o_cases[i] = o_data.eq(shift_register[io_lcm - o_dw*(i+1):io_lcm - o_dw*i]) self.comb += Case(o_count, o_cases) self.comb += source.data.eq(o_data) class ROI_Pipeline(Module): def __init__(self, res_width=32, pixel_size=16): # NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module # NOTE: TapGeo other than 1X-1Y are not supported # TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?) self.submodules.buffer = buffer = Buffer(word_layout_dchar) # to improve timing from broadcaster self.submodules.crc_checker = crc_checker = CXPCRC32_Checker() self.submodules.header_decoder = header_decoder = Frame_Header_Decoder() self.submodules.deserializer = deserializer = Frame_Deserializer(res_width, pixel_size) self.comb += [ deserializer.new_frame.eq(header_decoder.new_frame), deserializer.l_size.eq(header_decoder.metadata.l_size), deserializer.x_size.eq(header_decoder.metadata.x_size), ] self.pipeline = [buffer, crc_checker, header_decoder, deserializer] for s, d in zip(self.pipeline, self.pipeline[1:]): self.comb += s.source.connect(d.sink) self.sink = self.pipeline[0].sink # DEBUG self.source = self.pipeline[-1].source self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing class Frame_Packet_Router(Module): # packet size expressed in bits def __init__(self, downconns, framebuffer, packet_size, pmod_pads): n_downconn = len(downconns) n_buffer = len(framebuffer) assert n_downconn > 0 and n_buffer > 0 self.n_ext_active = Signal(max=n_downconn) self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)] # # # # +----------+ +-------------+ # eop marker ----->| | | |------> crc checker # | arbiter |---->| broadcaster | # eop marker ----->| need eop | | |------> crc checker # +----------+ +-------------+ # self.submodules.arbiter = arbiter = Stream_Arbiter(n_downconn) self.sync += arbiter.n_ext_active.eq(self.n_ext_active) self.submodules.broadcaster = broadcaster = Stream_Broadcaster(n_buffer) for i, s in enumerate(self.routing_table): self.sync += broadcaster.routing_table[i].eq(s) for i, d in enumerate(downconns): # eop is needed for arbiter and crc checker to work correctly eop_marker = EOP_Inserter() self.submodules += eop_marker self.comb += [ d.source.connect(eop_marker.sink), eop_marker.source.connect(arbiter.sinks[i]) ] self.comb += arbiter.source.connect(broadcaster.sink) for i, fb in enumerate(framebuffer): self.comb += broadcaster.sources[i].connect(fb.sink),