cxp frame pipeline: frame handling pipeline

pipeline: add eop marker, cxp_crc32 checker
frame: add stream crossbar, double buffer, parser
frame: add metadata parser, frame extractor
frame: add stream arbiter, crc checker & broadcaster
frame: add custom pixel gearbox 32:8*4
This commit is contained in:
morgan 2024-11-28 13:10:56 +08:00
parent 4cd10ef7be
commit 9aefdc569d

View File

@ -0,0 +1,705 @@
from migen import *
from migen.genlib.coding import PriorityEncoder
from misoc.interconnect.csr import *
from misoc.interconnect import stream
from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine
from cxp_pipeline import *
# from src.gateware.cxp_pipeline import * # for sim only
from types import SimpleNamespace
from math import lcm
pixel_width = 16
pixel4x_layout = [
("data", pixel_width*4),
("valid", 4),
]
def switch_endianness(s):
assert len(s) % 8 == 0
char = [s[i*8:(i+1)*8] for i in range(len(s)//8)]
return Cat(char[::-1])
class EOP_Inserter(Module):
def __init__(self):
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.sync += [
If((~self.source.stb | self.source.ack),
If(~((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)),
self.sink.connect(self.source, omit={"ack", "eop"}),
).Else(
self.source.stb.eq(0),
)
),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
self.source.eop.eq(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1))),
]
class End_Of_Line_Inserter(Module):
def __init__(self):
self.l_size = Signal(3*char_width)
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint([("data", word_dw)]) # pixel data don't need k code
# # #
# TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed
# cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag
# NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high
# Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless
cnt = Signal.like(self.l_size, reset=1)
self.sync += [
If((~self.source.stb | self.source.ack),
self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}),
If(self.sink.stb,
If(cnt == 1,
cnt.eq(self.l_size)
).Else(
cnt.eq(cnt - 1),
)
),
),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
# repurpose eop as end of line
self.source.eop.eq(cnt == 1),
]
class EOP_Marker(Module):
def __init__(self):
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
last_stb = Signal()
self.sync += [
If((~self.source.stb | self.source.ack),
self.source.stb.eq(self.sink.stb),
self.source.payload.eq(self.sink.payload),
),
last_stb.eq(self.sink.stb),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
self.source.eop.eq(~self.sink.stb & last_stb),
]
class Stream_Arbiter(Module):
def __init__(self, n_downconn):
self.n_ext_active = Signal(max=n_downconn)
self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_downconn)]
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.fsm = fsm = FSM(reset_state="COPY")
read_mask = Signal.like(self.n_ext_active)
connect_case = dict((i, s.connect(self.source)) for i, s in enumerate(self.sinks))
fsm.act(
"COPY",
Case(read_mask, connect_case),
If(self.source.eop,
NextState("SWITCH_SOURCE"),
),
)
# Section 9.5.5 (CXP-001-2021)
# When Multiple connections are active, stream packets are transmitted in
# ascending order of Connection ID (which we currently only support ch1->2->3->4).
# And one connection shall be transmitting data at a time.
fsm.act(
"SWITCH_SOURCE",
If(read_mask == self.n_ext_active,
NextValue(read_mask, read_mask.reset),
).Else(
NextValue(read_mask, read_mask + 1),
),
NextState("COPY"),
)
@ResetInserter()
@CEInserter()
class CXPCRC32(Module):
# Section 9.2.2.2 (CXP-001-2021)
width = 32
polynom = 0x04C11DB7
seed = 2**width - 1
check = 0x00000000
def __init__(self, data_width):
self.data = Signal(data_width)
self.value = Signal(self.width)
self.error = Signal()
# # #
self.submodules.engine = LiteEthMACCRCEngine(
data_width, self.width, self.polynom
)
reg = Signal(self.width, reset=self.seed)
self.sync += reg.eq(self.engine.next)
self.comb += [
self.engine.data.eq(self.data),
self.engine.last.eq(reg),
self.value.eq(reg[::-1]),
self.error.eq(reg != self.check),
]
# For verifying crc in stream data packet
class CXPCRC32_Checker(Module):
def __init__(self):
self.error_cnt = Signal(16)
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.crc = crc = CXPCRC32(word_dw)
self.comb += crc.data.eq(self.sink.data),
self.submodules.fsm = fsm = FSM(reset_state="INIT")
fsm.act("INIT",
crc.reset.eq(1),
NextState("CHECKING"),
)
fsm.act("RESET",
crc.reset.eq(1),
If(crc.error, NextValue(self.error_cnt, self.error_cnt + 1)),
NextState("CHECKING"),
)
fsm.act("CHECKING",
If(self.sink.stb & self.sink.eop,
# discard the crc
self.sink.ack.eq(1),
NextState("RESET"),
).Else(
self.sink.connect(self.source),
),
If(self.sink.stb,
crc.ce.eq(1),
),
)
class Stream_Broadcaster(Module):
def __init__(self, n_buffer, default_id=0):
assert n_buffer > 0
self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)]
self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)]
self.sink = stream.Endpoint(word_layout_dchar)
# # #
self.stream_id = Signal(char_width)
self.pak_tag = Signal(char_width)
self.stream_pak_size = Signal(char_width * 2)
self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER")
fsm.act(
"WAIT_HEADER",
NextValue(self.stream_id, self.stream_id.reset),
NextValue(self.pak_tag, self.pak_tag.reset),
NextValue(self.stream_pak_size, self.stream_pak_size.reset),
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_id, self.sink.dchar),
NextState("GET_PAK_TAG"),
),
)
fsm.act(
"GET_PAK_TAG",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.pak_tag, self.sink.dchar),
NextState("GET_PAK_SIZE_0"),
),
)
fsm.act(
"GET_PAK_SIZE_0",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_pak_size[8:], self.sink.dchar),
NextState("GET_PAK_SIZE_1"),
),
)
fsm.act(
"GET_PAK_SIZE_1",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_pak_size[:8], self.sink.dchar),
NextState("STORE_BUFFER"),
),
)
# routing decoder
sel = Signal(n_buffer)
no_match = Signal()
self.comb += sel[0].eq(self.stream_id == default_id)
for i, routing_id in enumerate(self.routing_table):
self.comb += sel[i+1].eq(self.stream_id == routing_id)
# DEBUG: disrecard the stream id = 0 rule
# self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0])
# ensure the lower source has priority when two or more bits of sel are high
self.submodules.coder = coder = PriorityEncoder(n_buffer)
sel_r = Signal.like(coder.o)
self.sync += [
coder.i.eq(sel),
sel_r.eq(coder.o),
no_match.eq(coder.n),
]
routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources))
routing["default"] = self.sink.ack.eq(1) # discard if invalid
fsm.act(
"STORE_BUFFER",
If(no_match,
self.sink.ack.eq(1),
).Else(
Case(sel_r, routing),
),
# assume downstream is not blocked
If(self.sink.stb,
NextValue(self.stream_pak_size, self.stream_pak_size - 1),
If(self.stream_pak_size == 0,
NextState("WAIT_HEADER"),
)
),
)
class Frame_Header_Decoder(Module):
def __init__(self):
self.format_error = Signal()
self.decode_err = Signal()
self.new_frame = Signal()
self.new_line = Signal()
# Table 47 (CXP-001-2021)
n_metadata_chars = 23
img_header_layout = [
("stream_id", char_width),
("source_tag", 2*char_width),
("x_size", 3*char_width),
("x_offset", 3*char_width),
("y_size", 3*char_width),
("y_offset", 3*char_width),
("l_size", 3*char_width), # number of data words per image line
("pixel_format", 2*char_width),
("tap_geo", 2*char_width),
("flag", char_width),
]
assert layout_len(img_header_layout) == n_metadata_chars*char_width
# # #
# TODO: decode Image header, line break
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
self.submodules.fsm = fsm = FSM(reset_state="IDLE")
fsm.act("IDLE",
self.sink.ack.eq(1),
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
NextState("DECODE"),
)
)
fsm.act("COPY",
# until for new line or new frame
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
self.sink.ack.eq(1),
NextState("DECODE"),
).Else(
self.sink.connect(self.source),
)
)
type = {
"new_frame": 0x01,
"line_break": 0x02,
}
cnt = Signal(max=n_metadata_chars)
fsm.act("DECODE",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(self.sink.dchar, {
type["new_frame"]: [
self.new_frame.eq(1),
NextValue(cnt, cnt.reset),
NextState("GET_FRAME_DATA"),
],
type["line_break"]: [
self.new_line.eq(1),
NextState("COPY"),
],
"default": [
self.decode_err.eq(1),
# discard all data until valid frame header
NextState("IDLE"),
],
}),
)
)
packet_buffer = Signal(layout_len(img_header_layout))
case = dict(
(i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar))
for i in range(n_metadata_chars)
)
fsm.act("GET_FRAME_DATA",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(cnt, case),
If(cnt == n_metadata_chars - 1,
NextState("COPY"),
NextValue(cnt, cnt.reset),
).Else(
NextValue(cnt, cnt + 1),
),
),
)
# dissect packet
self.metadata = SimpleNamespace()
idx = 0
for name, size in img_header_layout:
# CXP use MSB even when sending duplicate chars
setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size]))
idx += size
class Custom_Pixel_Gearbox(Module):
def __init__(self, size):
assert size in [8]
self.x_size = Signal(3*char_width)
i_dw, o_dw = word_dw, size*4
self.sink = stream.Endpoint([("data", i_dw)])
self.source = stream.Endpoint(pixel4x_layout)
# # #
io_lcm = lcm(i_dw, o_dw)
if (io_lcm//i_dw) < 2:
io_lcm = io_lcm * 2
if (io_lcm//o_dw) < 2:
io_lcm = io_lcm * 2
self.shift_register = Signal(io_lcm, reset_less=True)
# Input sink
i_inc = Signal()
i_count = Signal(max=io_lcm//i_dw)
self.comb += [
self.sink.ack.eq(1), # assume downstream is not blocked
i_inc.eq(self.sink.stb),
]
self.sync += [
If(i_inc,
If(i_count == ((io_lcm//i_dw) - 1),
i_count.eq(i_count.reset),
).Else(
i_count.eq(i_count + 1),
)
),
]
i_cases = {}
for i in range(io_lcm//i_dw):
i_cases[i] = [
self.shift_register[i_dw*i:i_dw*(i+1)].eq(self.sink.data),
]
self.sync += If(self.sink.stb, Case(i_count, i_cases))
# Output source
level = Signal(max=io_lcm)
o_inc = Signal()
o_count = Signal(max=io_lcm//o_dw)
self.comb += [
self.source.stb.eq(level >= o_dw),
o_inc.eq(self.source.stb & self.source.ack)
]
self.sync += [
If(o_inc,
If(o_count == ((io_lcm//o_dw) - 1),
o_count.eq(o_count.reset),
).Else(
o_count.eq(o_count + 1),
)
),
If(i_inc & ~o_inc, level.eq(level + i_dw)),
If(~i_inc & o_inc, level.eq(level - o_dw)),
If(i_inc & o_inc, level.eq(level + i_dw - o_dw)),
]
o_cases = {}
for i in range(io_lcm//o_dw):
o_cases[i] = []
for j in range(4):
o_cases[i].append(
self.source.data[pixel_width * j : pixel_width * (j + 1)].eq(
self.shift_register[(o_dw * i) + (size * j) : (o_dw * i) + (size * (j + 1))]
)
)
self.comb += Case(o_count, o_cases)
# Handle line break
# precalcule which pixels are valid
self.valid = Signal(4)
bit_cases = {
0: self.valid.eq(0b1111),
1: self.valid.eq(0b0001),
2: self.valid.eq(0b0011),
3: self.valid.eq(0b0111),
}
self.sync += Case(self.x_size[:2], bit_cases)
# TODO: reset the o_count & i_count after eop
line_break_r = Signal()
self.sync += [
line_break_r.eq(self.sink.eop),
If(line_break_r,
)
]
# get which last pixels are valid
# use end of line to reset o_count, i_count, level & stb the last pixel
self.comb += [
If(line_break_r,
self.source.valid.eq(self.valid),
).Else(
self.source.valid.eq(0b1111),
)
]
class Frame_Deserializer(Module):
def __init__(self, width, pixel_size):
self.new_frame = Signal()
self.l_size = Signal(3*char_width)
self.x_size = Signal(3*char_width)
# # #
self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter()
self.sync += eol_inserter.l_size.eq(self.l_size),
self.submodules.gearbox = gearbox = Custom_Pixel_Gearbox(8)
self.sync += gearbox.x_size.eq(self.x_size),
self.comb += eol_inserter.source.connect(gearbox.sink)
self.sink = eol_inserter.sink
self.source = gearbox.source
# # TODO: use this to control mux
# Table 34 (CXP-001-2021)
pix_fmt = {
"mono8": 0x0101,
"mono10": 0x0102,
"mono12": 0x0103,
"mono14": 0x0104,
"mono16": 0x0105,
}
# self.sync += [
# pixel_format_r.eq(header_decoder.metadata.pixel_format),
# Case(pixel_format_r,
# {
# pix_fmt["mono8"]: pix_size.eq(8),
# pix_fmt["mono10"]: pix_size.eq(10),
# pix_fmt["mono12"]: pix_size.eq(12),
# pix_fmt["mono14"]: pix_size.eq(14),
# pix_fmt["mono16"]: pix_size.eq(16),
# "default": pix_size.eq(pix_size.reset),
# }
# )
# ]
def inc_mod(s, m):
return [s.eq(s + 1), If(s == (m -1), s.eq(0))]
class Pixel_Gearbox(Module):
def __init__(self, i_dw, o_dw):
self.sink = sink = stream.Endpoint([("data", i_dw)])
self.source = source = stream.Endpoint([("data", o_dw)])
# # #
# From Litex
# TODO: change this to purpose built module
# TODO: handle linebreak stb
# TODO: handle the last line may only contain 1, 2, 3 or 4 pixels
# Section 10.4.2 (CXP-001-2021)
# the line data need to be fitted inside of 32*nbits where n is integers
# extra bits are padded with zero
# -> perhaps use this as advantage?? it's provided as DsizeL
# -> use DsizeL as another counter to indicate line break?
io_lcm = lcm(i_dw, o_dw)
if (io_lcm//i_dw) < 2:
io_lcm = io_lcm * 2
if (io_lcm//o_dw) < 2:
io_lcm = io_lcm * 2
# Control path
level = Signal(max=io_lcm)
i_inc = Signal()
i_count = Signal(max=io_lcm//i_dw)
o_inc = Signal()
o_count = Signal(max=io_lcm//o_dw)
self.comb += [
sink.ack.eq(1),
# sink.ack.eq(level < (io_lcm - i_dw)),
source.stb.eq(level >= o_dw),
]
self.comb += [
i_inc.eq(sink.stb & sink.ack),
o_inc.eq(source.stb & source.ack)
]
self.sync += [
If(i_inc, *inc_mod(i_count, io_lcm//i_dw)),
If(o_inc, *inc_mod(o_count, io_lcm//o_dw)),
If(i_inc & ~o_inc, level.eq(level + i_dw)),
If(~i_inc & o_inc, level.eq(level - o_dw)),
If(i_inc & o_inc, level.eq(level + i_dw - o_dw)),
]
# Data path
shift_register = Signal(io_lcm, reset_less=True)
i_cases = {}
i_data = Signal(i_dw)
self.comb += i_data.eq(sink.data)
for i in range(io_lcm//i_dw):
i_cases[i] = shift_register[io_lcm - i_dw*(i+1):io_lcm - i_dw*i].eq(i_data)
self.sync += If(sink.stb & sink.ack, Case(i_count, i_cases))
o_cases = {}
o_data = Signal(o_dw)
for i in range(io_lcm//o_dw):
o_cases[i] = o_data.eq(shift_register[io_lcm - o_dw*(i+1):io_lcm - o_dw*i])
self.comb += Case(o_count, o_cases)
self.comb += source.data.eq(o_data)
class ROI_Pipeline(Module):
def __init__(self, res_width=32, pixel_size=16):
# NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module
# NOTE: TapGeo other than 1X-1Y are not supported
# TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?)
self.submodules.crc_checker = crc_checker = CXPCRC32_Checker()
self.submodules.header_decoder = header_decoder = Frame_Header_Decoder()
self.submodules.deserializer = deserializer = Frame_Deserializer(res_width, pixel_size)
self.comb += [
deserializer.new_frame.eq(header_decoder.new_frame),
deserializer.l_size.eq(header_decoder.metadata.l_size),
deserializer.x_size.eq(header_decoder.metadata.x_size),
]
self.pipeline = [crc_checker, header_decoder, deserializer]
for s, d in zip(self.pipeline, self.pipeline[1:]):
self.comb += s.source.connect(d.sink)
self.sink = self.pipeline[0].sink
# DEBUG
self.source = self.pipeline[-1].source
self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing
class Frame_Packet_Router(Module):
# packet size expressed in bits
def __init__(self, downconns, framebuffer, packet_size, pmod_pads):
n_downconn = len(downconns)
n_buffer = len(framebuffer)
assert n_downconn > 0 and n_buffer > 0
self.n_ext_active = Signal(max=n_downconn)
self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)]
# # #
# +----------+ +-------------+
# eop marker ----->| | | |------> crc checker
# | arbiter |---->| broadcaster |
# eop marker ----->| need eop | | |------> crc checker
# +----------+ +-------------+
#
self.submodules.arbiter = arbiter = Stream_Arbiter(n_downconn)
self.sync += arbiter.n_ext_active.eq(self.n_ext_active)
self.submodules.broadcaster = broadcaster = Stream_Broadcaster(n_buffer)
for i, s in enumerate(self.routing_table):
self.sync += broadcaster.routing_table[i].eq(s)
for i, d in enumerate(downconns):
# eop is needed for arbiter and crc checker to work correctly
eop_marker = EOP_Inserter()
self.submodules += eop_marker
self.comb += [
d.source.connect(eop_marker.sink),
eop_marker.source.connect(arbiter.sinks[i])
]
self.comb += arbiter.source.connect(broadcaster.sink)
for i, fb in enumerate(framebuffer):
self.comb += broadcaster.sources[i].connect(fb.sink),