diff --git a/src/gateware/cxp_frame_pipeline.py b/src/gateware/cxp_frame_pipeline.py new file mode 100644 index 0000000..6790cca --- /dev/null +++ b/src/gateware/cxp_frame_pipeline.py @@ -0,0 +1,808 @@ +from migen import * +from migen.genlib.coding import PriorityEncoder +from misoc.interconnect.csr import * +from misoc.interconnect import stream +from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine + +from cxp_pipeline import * +# from src.gateware.cxp_pipeline import * # for sim only + +from types import SimpleNamespace +from math import lcm +from operator import or_, add + +pixel_width = 16 +pixel4x_layout = [ + ("data", pixel_width*4), + ("valid", 4), +] + +def switch_endianness(s): + assert len(s) % 8 == 0 + char = [s[i*8:(i+1)*8] for i in range(len(s)//8)] + return Cat(char[::-1]) + +class End_Of_Line_Inserter(Module): + """ + Insert eop to indicate end of line + And drop the K codes and Duplicate char + + """ + def __init__(self): + self.l_size = Signal(3*char_width) + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint([("data", word_width)]) # pixel data don't need k code + + # # # + + # TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed + # cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag + + # NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high + # Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless + + cnt = Signal.like(self.l_size, reset=1) + self.sync += [ + If((~self.source.stb | self.source.ack), + self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}), + If(self.sink.stb, + If(cnt == 1, + cnt.eq(self.l_size) + ).Else( + cnt.eq(cnt - 1), + ) + ), + ), + ] + self.comb += [ + self.sink.ack.eq(~self.source.stb | self.source.ack), + # repurpose eop as end of line + self.source.eop.eq(cnt == 1), + ] + +class Stream_Arbiter(Module): + def __init__(self, n_channels): + assert n_channels > 1 # don't need a arbiter if there is only one channel + + self.active_channels = Signal(n_channels) + + self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_channels)] + self.source = stream.Endpoint(word_layout_dchar) + # # # + + self.submodules.fsm = fsm = FSM(reset_state="0") + + # Section 9.5.5 (CXP-001-2021) + # When Multiple connections are active, stream packets are transmitted in + # ascending order of Connection ID + # Support ch0->1->2->4 topology only + for n, sink in enumerate(self.sinks): + if n < n_channels - 1: + fsm.act(str(n), + sink.connect(self.source), + If(sink.stb & sink.eop & self.source.ack, + If(self.active_channels[n+1], + NextState(str(n+1)), + ). Else( + NextState(str(0)), + ), + ) + ) + else: + fsm.act(str(n), + sink.connect(self.source), + If(sink.stb & sink.eop & self.source.ack, + NextState(str(0)) + ), + ) + +@ResetInserter() +@CEInserter() +class CXPCRC32(Module): + # Section 9.2.2.2 (CXP-001-2021) + width = 32 + polynom = 0x04C11DB7 + seed = 2**width - 1 + check = 0x00000000 + + def __init__(self, data_width): + self.data = Signal(data_width) + self.value = Signal(self.width) + self.error = Signal() + + # # # + + self.submodules.engine = LiteEthMACCRCEngine( + data_width, self.width, self.polynom + ) + reg = Signal(self.width, reset=self.seed) + self.sync += reg.eq(self.engine.next) + self.comb += [ + self.engine.data.eq(self.data), + self.engine.last.eq(reg), + self.value.eq(reg[::-1]), + self.error.eq(reg != self.check), + ] + +class CXPCRC32_Checker(Module): + """Verify crc in stream data packet""" + def __init__(self): + # TODO: handle the error into a counter + self.error = Signal() + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + # # # + + self.submodules.crc = crc = CXPCRC32(word_width) + self.comb += crc.data.eq(self.sink.data), + + self.submodules.fsm = fsm = FSM(reset_state="INIT") + fsm.act("INIT", + crc.reset.eq(1), + NextState("CHECKING"), + ) + + fsm.act("RESET", + crc.reset.eq(1), + self.error.eq(crc.error), + NextState("CHECKING"), + ) + + fsm.act("CHECKING", + If(self.sink.stb & self.sink.eop, + # discard the crc + self.sink.ack.eq(1), + NextState("RESET"), + ).Else( + self.sink.connect(self.source), + ), + crc.ce.eq(self.sink.stb), + ) + + +class Stream_Broadcaster(Module): + def __init__(self, n_buffer=1, default_id=0): + assert n_buffer > 0 + + self.routing_ids = [Signal(char_width) for _ in range(1, n_buffer)] + + self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)] + self.sink = stream.Endpoint(word_layout_dchar) + + # # # + + routing_ids_r = [Signal(char_width) for _ in range(1, n_buffer)] + for i, id in enumerate(self.routing_ids): + self.sync += routing_ids_r[i].eq(id) + + + stream_id = Signal(char_width) + pak_tag = Signal(char_width) + stream_pak_size = Signal(char_width * 2) + + self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER") + + fsm.act( + "WAIT_HEADER", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(stream_id, self.sink.dchar), + NextState("GET_PAK_TAG"), + ), + ) + + fsm.act( + "GET_PAK_TAG", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(pak_tag, self.sink.dchar), + NextState("GET_PAK_SIZE_0"), + ), + ) + + fsm.act( + "GET_PAK_SIZE_0", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(stream_pak_size[8:], self.sink.dchar), + NextState("GET_PAK_SIZE_1"), + ), + ) + + fsm.act( + "GET_PAK_SIZE_1", + self.sink.ack.eq(1), + If( + self.sink.stb, + NextValue(stream_pak_size[:8], self.sink.dchar), + NextState("STORE_BUFFER"), + ), + ) + + # routing decoder + sel = Signal(n_buffer) + no_match = Signal() + self.comb += sel[0].eq(stream_id == default_id) + for i, id in enumerate(routing_ids_r): + self.comb += sel[i+1].eq(stream_id == id) + # DEBUG: disrecard the stream id = 0 rule + # self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0]) + + # ensure the lower source has priority when two or more bits of sel are high + self.submodules.coder = coder = PriorityEncoder(n_buffer) + sel_r = Signal.like(coder.o) + self.sync += [ + coder.i.eq(sel), + sel_r.eq(coder.o), + no_match.eq(coder.n), + ] + + routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources)) + routing["default"] = self.sink.ack.eq(1) # discard if invalid + fsm.act( + "STORE_BUFFER", + If(no_match, + self.sink.ack.eq(1), + ).Else( + Case(sel_r, routing), + ), + # assume downstream is not blocked + If(self.sink.stb, + NextValue(stream_pak_size, stream_pak_size - 1), + If(stream_pak_size == 0, + NextValue(stream_id, stream_id.reset), + NextValue(pak_tag, pak_tag.reset), + NextValue(stream_pak_size, stream_pak_size.reset), + NextState("WAIT_HEADER"), + ) + ), + ) + +class Frame_Header_Decoder(Module): + def __init__(self): + self.decode_err = Signal() + + self.new_frame = Signal() + self.new_line = Signal() + + # Table 47 (CXP-001-2021) + n_metadata_chars = 23 + img_header_layout = [ + ("stream_id", char_width), + ("source_tag", 2*char_width), + ("x_size", 3*char_width), + ("x_offset", 3*char_width), + ("y_size", 3*char_width), + ("y_offset", 3*char_width), + ("l_size", 3*char_width), # number of data words per image line + ("pixel_format", 2*char_width), + ("tap_geo", 2*char_width), + ("flag", char_width), + ] + assert layout_len(img_header_layout) == n_metadata_chars*char_width + + + # # # + + self.sink = stream.Endpoint(word_layout_dchar) + self.source = stream.Endpoint(word_layout_dchar) + + self.submodules.fsm = fsm = FSM(reset_state="IDLE") + + fsm.act("IDLE", + self.sink.ack.eq(1), + If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), + NextState("DECODE"), + ) + ) + + fsm.act("COPY", + # until for new line or new frame + If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)), + self.sink.ack.eq(1), + NextState("DECODE"), + ).Else( + self.sink.connect(self.source), + ) + ) + + type = { + "new_frame": 0x01, + "line_break": 0x02, + } + + cnt = Signal(max=n_metadata_chars) + fsm.act("DECODE", + self.sink.ack.eq(1), + If(self.sink.stb, + Case(self.sink.dchar, { + type["new_frame"]: [ + self.new_frame.eq(1), + NextValue(cnt, cnt.reset), + NextState("GET_FRAME_DATA"), + ], + type["line_break"]: [ + self.new_line.eq(1), + NextState("COPY"), + ], + "default": [ + self.decode_err.eq(1), + # discard all data until valid frame header + NextState("IDLE"), + ], + }), + ) + ) + + packet_buffer = Signal(layout_len(img_header_layout)) + case = dict( + (i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar)) + for i in range(n_metadata_chars) + ) + fsm.act("GET_FRAME_DATA", + self.sink.ack.eq(1), + If(self.sink.stb, + Case(cnt, case), + If(cnt == n_metadata_chars - 1, + NextState("COPY"), + NextValue(cnt, cnt.reset), + ).Else( + NextValue(cnt, cnt + 1), + ), + ), + ) + + # dissect packet + self.metadata = SimpleNamespace() + idx = 0 + for name, size in img_header_layout: + # CXP use MSB even when sending duplicate chars + setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size])) + idx += size + +class Pixel_Gearbox(Module): + """Convert 32 bits word into 4x pixel""" + def __init__(self, size): + assert size <= pixel_width + assert size in [8, 10, 12, 14, 16] + + self.x_size = Signal(3*char_width) + + sink_dw, source_dw = word_width, size*4 + self.sink = stream.Endpoint([("data", sink_dw)]) + self.source = stream.Endpoint(pixel4x_layout) + + # # # + + ring_buf_size = lcm(sink_dw, source_dw) + # ensure the shift register is at least twice the size of sink/source dw + if (ring_buf_size//sink_dw) < 2: + ring_buf_size = ring_buf_size * 2 + if (ring_buf_size//source_dw) < 2: + ring_buf_size = ring_buf_size * 2 + + # Control interface + + reset_reg = Signal() + we = Signal() + re = Signal() + level = Signal(max=ring_buf_size) + w_cnt = Signal(max=ring_buf_size//sink_dw) + r_cnt = Signal(max=ring_buf_size//source_dw) + + self.sync += [ + If(reset_reg, + level.eq(level.reset), + ).Else( + If(we & ~re, level.eq(level + sink_dw)), + If(~we & re, level.eq(level - source_dw)), + If(we & re, level.eq(level + sink_dw - source_dw)), + ), + + If(reset_reg, + w_cnt.eq(w_cnt.reset), + r_cnt.eq(r_cnt.reset), + ).Else( + If(we, + If(w_cnt == ((ring_buf_size//sink_dw) - 1), + w_cnt.eq(w_cnt.reset), + ).Else( + w_cnt.eq(w_cnt + 1), + ) + ), + If(re, + If(r_cnt == ((ring_buf_size//source_dw) - 1), + r_cnt.eq(r_cnt.reset), + ).Else( + r_cnt.eq(r_cnt + 1), + ) + ), + ) + ] + + extra_eol_handling = size in [10, 12, 14] + if extra_eol_handling: + # the source need to be stb twice + # (one for level >= source_dw and the other for the remaining pixels) + # when last word of each line packet satisfied the following condition: + # + # if there exist an integers j such that + # sink_dw * i > size * j > source_dw * k + # where i,k are postive integers and source_dw * k - sink_dw * (i-1) > 0 + # + stb_aligned = Signal() + match size: + case 10: + # For example size == 10 + # 32 * 2 > 10 * (5) > 40 * 1 + # 32 * 2 > 10 * (6) > 40 * 1 + # 32 * 3 > 10 * (9) > 40 * 2 + # ... + # + # the packing pattern for size == 10 repeat every 16 pixels + # the remaining special case can be taken care off using modulo operation + stb_cases = { + 5: stb_aligned.eq(1), + 6: stb_aligned.eq(1), + 9: stb_aligned.eq(1), + } + self.sync += Case(self.x_size[:4], stb_cases) # mod 16 + case 12: + stb_cases = { + 5: stb_aligned.eq(1), + } + self.sync += Case(self.x_size[:3], stb_cases) # mod 8 + case 14: + stb_cases = { + 9: stb_aligned.eq(1), + 13: stb_aligned.eq(1), + } + self.sync += Case(self.x_size[:4], stb_cases) # mod 16 + + + + self.submodules.fsm = fsm = FSM(reset_state="SHIFTING") + fsm.act( + "SHIFTING", + self.sink.ack.eq(1), + self.source.stb.eq(level >= source_dw), + we.eq(self.sink.stb), + re.eq((self.source.stb & self.source.ack)), + If(self.sink.stb & self.sink.eop, + (If(stb_aligned, + NextState("MOVE_ALIGNED_PIX"), + ).Else( + NextState("MOVE_REMAINING_PIX"), + ) if extra_eol_handling else + NextState("MOVE_REMAINING_PIX"), + ) + ), + ) + + if extra_eol_handling: + fsm.act( + "MOVE_ALIGNED_PIX", + self.source.stb.eq(1), + re.eq((self.source.stb & self.source.ack)), + NextState("MOVE_REMAINING_PIX"), + ) + + stb_remaining_pix = Signal() + fsm.act( + "MOVE_REMAINING_PIX", + reset_reg.eq(1), + self.source.stb.eq(1), + stb_remaining_pix.eq(1), + NextState("SHIFTING"), + ) + + # Data path + + ring_buf = Signal(ring_buf_size, reset_less=True) + + sink_cases = {} + for i in range(ring_buf_size//sink_dw): + sink_cases[i] = [ + ring_buf[sink_dw*i:sink_dw*(i+1)].eq(self.sink.data), + ] + self.sync += If(self.sink.stb, Case(w_cnt, sink_cases)) + + source_cases = {} + for i in range(ring_buf_size//source_dw): + source_cases[i] = [] + for j in range(4): + source_cases[i].append( + self.source.data[pixel_width * j : pixel_width * (j + 1)].eq( + ring_buf[(source_dw * i) + (size * j) : (source_dw * i) + (size * (j + 1))] + ) + ) + + # calcule which last pixels are valid + valid = Signal(4) + bit_cases = { + 0: valid.eq(0b1111), + 1: valid.eq(0b0001), + 2: valid.eq(0b0011), + 3: valid.eq(0b0111), + } + self.sync += Case(self.x_size[:2], bit_cases) + + self.comb += [ + Case(r_cnt, source_cases), + If(stb_remaining_pix, + self.source.valid.eq(valid), + self.source.eop.eq(1), + ).Else( + self.source.valid.eq(0b1111), + ), + ] + +class Pixel_Coordinate_Tracker(Module): + """ + Track pixel coordinates + + Assume + - camera is in area scan mode + - 1X-1Y Tap geometry + """ + def __init__(self, res_width): + # largest x/y pixel size supported by frame header are 24 bits + assert res_width <= 3*char_width + + # line scaning frame will have y_size = 0 and won't trigger the end of frame bit + self.y_size = Signal(3*char_width) + self.sink = stream.Endpoint(pixel4x_layout) + + # # # + + # NOTE: no need for last_x/last_y csr which is use to indicate how big is the frame + # layout = Record([ + # ("x", res_width), + # ("y", res_width), + # ("d", pixel_width), + # ("stb", 1), + # ("eof", 1), # end of frame + # ]) + # self.pixel4x = [layout for _ in range(4)] + + # DEBUG: for sim only, to show all record in sim + self.pixel4x = [] + for _ in range(4): + self.pixel4x.append(Record([ + ("x", res_width), + ("y", res_width), + ("gray", pixel_width), + ("stb", 1), + ("eof", 1), # end of frame + ])) + + x_4x = [Signal(len(self.pixel4x[0].x), reset=i) for i in range(4)] + y_r = Signal(len(self.pixel4x[0].y)) + + y_max = Signal.like(self.y_size) + self.sync += [ + self.sink.ack.eq(1), + y_max.eq(self.y_size - 1), + ] + for i, (x_r, pix) in enumerate(zip(x_4x, self.pixel4x)): + self.sync += [ + pix.stb.eq(0), + pix.eof.eq(0), + If(self.sink.stb, + If(self.sink.eop, + # new line + x_r.eq(x_r.reset), + + If(y_r == y_max, + pix.eof.eq(1), + y_r.eq(y_r.reset), + ).Else( + y_r.eq(y_r + 1), + ) + ).Else( + x_r.eq(x_r + 4), + ), + pix.stb.eq(self.sink.valid[i]), + pix.x.eq(x_r), + pix.y.eq(y_r), + pix.gray.eq(self.sink.data[pixel_width*i:pixel_width*(i+1)]), + ) + ] + +class ROI(Module): + """ + ROI Engine. For each frame, accumulates pixels values within a + rectangular region of interest, and reports the total. + """ + def __init__(self, pixel_4x, count_width): + assert len(pixel_4x) == 4 + + self.cfg = Record([ + ("x0", len(pixel_4x[0].x)), + ("y0", len(pixel_4x[0].y)), + ("x1", len(pixel_4x[0].x)), + ("y1", len(pixel_4x[0].y)), + ]) + + self.out = Record([ + ("update", 1), + # registered output - can be used as CDC input + ("count", count_width), + ]) + + # # # + + # TODO: remove the self. from self.roi_4x + self.roi_4x = [] + for _ in range(4): + self.roi_4x.append(Record([ + ("x_good", 1), + ("y_good", 1), + ("gray", len(pixel_4x[0].gray)), + ("stb", 1), + ("count", count_width), + ])) + + for pix, roi in zip(pixel_4x, self.roi_4x): + self.sync += [ + # TODO: replace the comparision with preprocess equal + # e.g. pix.x == self.cfg.x0 - i + # stage 1 - generate "good" (in-ROI) signals + If(pix.x <= self.cfg.x0, + roi.x_good.eq(1) + ), + # NOTE: this gate doens't work as 4 pixes are coming in + If(pix.x >= self.cfg.x1, + roi.x_good.eq(0) + ), + + # This is fine because 4x pixel are on the same line + If(pix.y == self.cfg.y0, + roi.y_good.eq(1) + ), + If(pix.y == self.cfg.y1, + roi.y_good.eq(0) + ), + If(pix.eof, + roi.x_good.eq(0), + roi.y_good.eq(0) + ), + roi.gray.eq(pix.gray), + roi.stb.eq(pix.stb), + + # stage 2 - accumulate + If((roi.stb & roi.x_good & roi.y_good), + roi.count.eq(roi.count + roi.gray) + ) + ] + + eof = Signal() + eof_buf = Signal() + count_buf = [Signal(count_width), Signal(count_width)] + + # stage 3 - update + self.sync += [ + eof.eq(reduce(or_, [pix.eof for pix in pixel_4x])), + eof_buf.eq(eof), + count_buf[0].eq(self.roi_4x[0].count + self.roi_4x[1].count), + count_buf[1].eq(self.roi_4x[2].count + self.roi_4x[3].count), + + self.out.update.eq(0), + If(eof_buf, + [roi.count.eq(0) for roi in self.roi_4x], + self.out.update.eq(1), + self.out.count.eq(reduce(add, count_buf)) + ), + ] + + + + + +class Pixel_Parser(Module): + """ + Convert the raw frame data into pixel data + + Currently only support: + - Pixel format: mono8, mono10, mono12, mono14, mono16 + - Tap geometry: 1X-1Y + - Scaning mode: area scanning + + """ + def __init__(self, res_width): + self.l_size = Signal(3*char_width) + self.x_size = Signal(3*char_width) + self.y_size = Signal(3*char_width) + self.pixel_format = Signal(2*char_width) + + # # # + + # + # 32+8(dchar) 32 pixel 4x + # ----/----> end of line ---/---> Pixel Gearboxes ----/----> Pixel Coordinate ------> pixel 4x + # inserter Tracker w/ coord + # + + self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter() + self.sync += eol_inserter.l_size.eq(self.l_size) + self.sink = eol_inserter.sink + + + gearboxes = {} + for s in [8, 10, 12, 14, 16]: + gearbox = Pixel_Gearbox(s) + gearboxes["mono"+str(s)] = gearbox + self.submodules += gearbox + self.sync += gearbox.x_size.eq(self.x_size), + + # From Table 34 (CXP-001-2021) + pix_fmt = { + "mono8": 0x0101, + "mono10": 0x0102, + "mono12": 0x0103, + "mono14": 0x0104, + "mono16": 0x0105, + } + + self.submodules.tracker = tracker = Pixel_Coordinate_Tracker(res_width) + self.sync += tracker.y_size.eq(self.y_size) + self.pixel4x = tracker.pixel4x + + mux_cases = { + "default": [ + # discard unknown pixel format + eol_inserter.source.ack.eq(1), + ], + } + for fmt in pix_fmt: + mux_cases[pix_fmt[fmt]] = [ + eol_inserter.source.connect(gearboxes[fmt].sink), + gearboxes[fmt].source.connect(tracker.sink), + ] + + self.comb += Case(self.pixel_format, mux_cases) + + + +class Pixel_Pipeline(Module): + def __init__(self, res_width, count_width, packet_size): + + # NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module + # NOTE: TapGeo other than 1X-1Y are not supported + # TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?) + + # 32+8(dchar) + # ----/----> crc checker ------> frame header ------> Pixel Parser ------> pixel 4x + # decoder w/ coord + + # DEBUG: adding fifo doesn't help + self.submodules.buffer = buffer = stream.SyncFIFO(word_layout_dchar, 32, True) + # self.submodules.buffer = buffer = Buffer(word_layout_dchar) # to improve timing from broadcaster + self.submodules.crc_checker = crc_checker = CXPCRC32_Checker() + self.submodules.header_decoder = header_decoder = Frame_Header_Decoder() + self.submodules.parser = parser = Pixel_Parser(res_width) + + self.submodules.roi = ROI(parser.pixel4x, count_width) + + self.comb += [ + parser.l_size.eq(header_decoder.metadata.l_size), + parser.x_size.eq(header_decoder.metadata.x_size), + parser.y_size.eq(header_decoder.metadata.y_size), + parser.pixel_format.eq(header_decoder.metadata.pixel_format), + ] + + self.pipeline = [buffer, crc_checker, header_decoder, parser] + for s, d in zip(self.pipeline, self.pipeline[1:]): + self.comb += s.source.connect(d.sink) + self.sink = self.pipeline[0].sink + + # DEBUG + # self.pix = self.pipeline[-1].pix + # self.source = self.pipeline[-1].source + # self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing