cxp frame pipeline: frame handling pipeline

pipeline: add eop marker, cxp_crc32 checker
frame: add stream crossbar, double buffer, parser
frame: add metadata parser, frame extractor
frame: add stream arbiter, crc checker & broadcaster
frame: add 8, 10, 12, 14, 16bit pixel gearbox support
frame: add pixel coordinate tracker
This commit is contained in:
morgan 2024-11-28 13:10:56 +08:00
parent 86ae02187b
commit 89e9a438e7

View File

@ -0,0 +1,808 @@
from migen import *
from migen.genlib.coding import PriorityEncoder
from misoc.interconnect.csr import *
from misoc.interconnect import stream
from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine
from cxp_pipeline import *
# from src.gateware.cxp_pipeline import * # for sim only
from types import SimpleNamespace
from math import lcm
from operator import or_, add
pixel_width = 16
pixel4x_layout = [
("data", pixel_width*4),
("valid", 4),
]
def switch_endianness(s):
assert len(s) % 8 == 0
char = [s[i*8:(i+1)*8] for i in range(len(s)//8)]
return Cat(char[::-1])
class End_Of_Line_Inserter(Module):
"""
Insert eop to indicate end of line
And drop the K codes and Duplicate char
"""
def __init__(self):
self.l_size = Signal(3*char_width)
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint([("data", word_width)]) # pixel data don't need k code
# # #
# TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed
# cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag
# NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high
# Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless
cnt = Signal.like(self.l_size, reset=1)
self.sync += [
If((~self.source.stb | self.source.ack),
self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}),
If(self.sink.stb,
If(cnt == 1,
cnt.eq(self.l_size)
).Else(
cnt.eq(cnt - 1),
)
),
),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
# repurpose eop as end of line
self.source.eop.eq(cnt == 1),
]
class Stream_Arbiter(Module):
def __init__(self, n_channels):
assert n_channels > 1 # don't need a arbiter if there is only one channel
self.active_channels = Signal(n_channels)
self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_channels)]
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.fsm = fsm = FSM(reset_state="0")
# Section 9.5.5 (CXP-001-2021)
# When Multiple connections are active, stream packets are transmitted in
# ascending order of Connection ID
# Support ch0->1->2->4 topology only
for n, sink in enumerate(self.sinks):
if n < n_channels - 1:
fsm.act(str(n),
sink.connect(self.source),
If(sink.stb & sink.eop & self.source.ack,
If(self.active_channels[n+1],
NextState(str(n+1)),
). Else(
NextState(str(0)),
),
)
)
else:
fsm.act(str(n),
sink.connect(self.source),
If(sink.stb & sink.eop & self.source.ack,
NextState(str(0))
),
)
@ResetInserter()
@CEInserter()
class CXPCRC32(Module):
# Section 9.2.2.2 (CXP-001-2021)
width = 32
polynom = 0x04C11DB7
seed = 2**width - 1
check = 0x00000000
def __init__(self, data_width):
self.data = Signal(data_width)
self.value = Signal(self.width)
self.error = Signal()
# # #
self.submodules.engine = LiteEthMACCRCEngine(
data_width, self.width, self.polynom
)
reg = Signal(self.width, reset=self.seed)
self.sync += reg.eq(self.engine.next)
self.comb += [
self.engine.data.eq(self.data),
self.engine.last.eq(reg),
self.value.eq(reg[::-1]),
self.error.eq(reg != self.check),
]
class CXPCRC32_Checker(Module):
"""Verify crc in stream data packet"""
def __init__(self):
# TODO: handle the error into a counter
self.error = Signal()
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.crc = crc = CXPCRC32(word_width)
self.comb += crc.data.eq(self.sink.data),
self.submodules.fsm = fsm = FSM(reset_state="INIT")
fsm.act("INIT",
crc.reset.eq(1),
NextState("CHECKING"),
)
fsm.act("RESET",
crc.reset.eq(1),
self.error.eq(crc.error),
NextState("CHECKING"),
)
fsm.act("CHECKING",
If(self.sink.stb & self.sink.eop,
# discard the crc
self.sink.ack.eq(1),
NextState("RESET"),
).Else(
self.sink.connect(self.source),
),
crc.ce.eq(self.sink.stb),
)
class Stream_Broadcaster(Module):
def __init__(self, n_buffer=1, default_id=0):
assert n_buffer > 0
self.routing_ids = [Signal(char_width) for _ in range(1, n_buffer)]
self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)]
self.sink = stream.Endpoint(word_layout_dchar)
# # #
routing_ids_r = [Signal(char_width) for _ in range(1, n_buffer)]
for i, id in enumerate(self.routing_ids):
self.sync += routing_ids_r[i].eq(id)
stream_id = Signal(char_width)
pak_tag = Signal(char_width)
stream_pak_size = Signal(char_width * 2)
self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER")
fsm.act(
"WAIT_HEADER",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(stream_id, self.sink.dchar),
NextState("GET_PAK_TAG"),
),
)
fsm.act(
"GET_PAK_TAG",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(pak_tag, self.sink.dchar),
NextState("GET_PAK_SIZE_0"),
),
)
fsm.act(
"GET_PAK_SIZE_0",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(stream_pak_size[8:], self.sink.dchar),
NextState("GET_PAK_SIZE_1"),
),
)
fsm.act(
"GET_PAK_SIZE_1",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(stream_pak_size[:8], self.sink.dchar),
NextState("STORE_BUFFER"),
),
)
# routing decoder
sel = Signal(n_buffer)
no_match = Signal()
self.comb += sel[0].eq(stream_id == default_id)
for i, id in enumerate(routing_ids_r):
self.comb += sel[i+1].eq(stream_id == id)
# DEBUG: disrecard the stream id = 0 rule
# self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0])
# ensure the lower source has priority when two or more bits of sel are high
self.submodules.coder = coder = PriorityEncoder(n_buffer)
sel_r = Signal.like(coder.o)
self.sync += [
coder.i.eq(sel),
sel_r.eq(coder.o),
no_match.eq(coder.n),
]
routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources))
routing["default"] = self.sink.ack.eq(1) # discard if invalid
fsm.act(
"STORE_BUFFER",
If(no_match,
self.sink.ack.eq(1),
).Else(
Case(sel_r, routing),
),
# assume downstream is not blocked
If(self.sink.stb,
NextValue(stream_pak_size, stream_pak_size - 1),
If(stream_pak_size == 0,
NextValue(stream_id, stream_id.reset),
NextValue(pak_tag, pak_tag.reset),
NextValue(stream_pak_size, stream_pak_size.reset),
NextState("WAIT_HEADER"),
)
),
)
class Frame_Header_Decoder(Module):
def __init__(self):
self.decode_err = Signal()
self.new_frame = Signal()
self.new_line = Signal()
# Table 47 (CXP-001-2021)
n_metadata_chars = 23
img_header_layout = [
("stream_id", char_width),
("source_tag", 2*char_width),
("x_size", 3*char_width),
("x_offset", 3*char_width),
("y_size", 3*char_width),
("y_offset", 3*char_width),
("l_size", 3*char_width), # number of data words per image line
("pixel_format", 2*char_width),
("tap_geo", 2*char_width),
("flag", char_width),
]
assert layout_len(img_header_layout) == n_metadata_chars*char_width
# # #
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
self.submodules.fsm = fsm = FSM(reset_state="IDLE")
fsm.act("IDLE",
self.sink.ack.eq(1),
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
NextState("DECODE"),
)
)
fsm.act("COPY",
# until for new line or new frame
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
self.sink.ack.eq(1),
NextState("DECODE"),
).Else(
self.sink.connect(self.source),
)
)
type = {
"new_frame": 0x01,
"line_break": 0x02,
}
cnt = Signal(max=n_metadata_chars)
fsm.act("DECODE",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(self.sink.dchar, {
type["new_frame"]: [
self.new_frame.eq(1),
NextValue(cnt, cnt.reset),
NextState("GET_FRAME_DATA"),
],
type["line_break"]: [
self.new_line.eq(1),
NextState("COPY"),
],
"default": [
self.decode_err.eq(1),
# discard all data until valid frame header
NextState("IDLE"),
],
}),
)
)
packet_buffer = Signal(layout_len(img_header_layout))
case = dict(
(i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar))
for i in range(n_metadata_chars)
)
fsm.act("GET_FRAME_DATA",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(cnt, case),
If(cnt == n_metadata_chars - 1,
NextState("COPY"),
NextValue(cnt, cnt.reset),
).Else(
NextValue(cnt, cnt + 1),
),
),
)
# dissect packet
self.metadata = SimpleNamespace()
idx = 0
for name, size in img_header_layout:
# CXP use MSB even when sending duplicate chars
setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size]))
idx += size
class Pixel_Gearbox(Module):
"""Convert 32 bits word into 4x pixel"""
def __init__(self, size):
assert size <= pixel_width
assert size in [8, 10, 12, 14, 16]
self.x_size = Signal(3*char_width)
sink_dw, source_dw = word_width, size*4
self.sink = stream.Endpoint([("data", sink_dw)])
self.source = stream.Endpoint(pixel4x_layout)
# # #
ring_buf_size = lcm(sink_dw, source_dw)
# ensure the shift register is at least twice the size of sink/source dw
if (ring_buf_size//sink_dw) < 2:
ring_buf_size = ring_buf_size * 2
if (ring_buf_size//source_dw) < 2:
ring_buf_size = ring_buf_size * 2
# Control interface
reset_reg = Signal()
we = Signal()
re = Signal()
level = Signal(max=ring_buf_size)
w_cnt = Signal(max=ring_buf_size//sink_dw)
r_cnt = Signal(max=ring_buf_size//source_dw)
self.sync += [
If(reset_reg,
level.eq(level.reset),
).Else(
If(we & ~re, level.eq(level + sink_dw)),
If(~we & re, level.eq(level - source_dw)),
If(we & re, level.eq(level + sink_dw - source_dw)),
),
If(reset_reg,
w_cnt.eq(w_cnt.reset),
r_cnt.eq(r_cnt.reset),
).Else(
If(we,
If(w_cnt == ((ring_buf_size//sink_dw) - 1),
w_cnt.eq(w_cnt.reset),
).Else(
w_cnt.eq(w_cnt + 1),
)
),
If(re,
If(r_cnt == ((ring_buf_size//source_dw) - 1),
r_cnt.eq(r_cnt.reset),
).Else(
r_cnt.eq(r_cnt + 1),
)
),
)
]
extra_eol_handling = size in [10, 12, 14]
if extra_eol_handling:
# the source need to be stb twice
# (one for level >= source_dw and the other for the remaining pixels)
# when last word of each line packet satisfied the following condition:
#
# if there exist an integers j such that
# sink_dw * i > size * j > source_dw * k
# where i,k are postive integers and source_dw * k - sink_dw * (i-1) > 0
#
stb_aligned = Signal()
match size:
case 10:
# For example size == 10
# 32 * 2 > 10 * (5) > 40 * 1
# 32 * 2 > 10 * (6) > 40 * 1
# 32 * 3 > 10 * (9) > 40 * 2
# ...
#
# the packing pattern for size == 10 repeat every 16 pixels
# the remaining special case can be taken care off using modulo operation
stb_cases = {
5: stb_aligned.eq(1),
6: stb_aligned.eq(1),
9: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:4], stb_cases) # mod 16
case 12:
stb_cases = {
5: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:3], stb_cases) # mod 8
case 14:
stb_cases = {
9: stb_aligned.eq(1),
13: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:4], stb_cases) # mod 16
self.submodules.fsm = fsm = FSM(reset_state="SHIFTING")
fsm.act(
"SHIFTING",
self.sink.ack.eq(1),
self.source.stb.eq(level >= source_dw),
we.eq(self.sink.stb),
re.eq((self.source.stb & self.source.ack)),
If(self.sink.stb & self.sink.eop,
(If(stb_aligned,
NextState("MOVE_ALIGNED_PIX"),
).Else(
NextState("MOVE_REMAINING_PIX"),
) if extra_eol_handling else
NextState("MOVE_REMAINING_PIX"),
)
),
)
if extra_eol_handling:
fsm.act(
"MOVE_ALIGNED_PIX",
self.source.stb.eq(1),
re.eq((self.source.stb & self.source.ack)),
NextState("MOVE_REMAINING_PIX"),
)
stb_remaining_pix = Signal()
fsm.act(
"MOVE_REMAINING_PIX",
reset_reg.eq(1),
self.source.stb.eq(1),
stb_remaining_pix.eq(1),
NextState("SHIFTING"),
)
# Data path
ring_buf = Signal(ring_buf_size, reset_less=True)
sink_cases = {}
for i in range(ring_buf_size//sink_dw):
sink_cases[i] = [
ring_buf[sink_dw*i:sink_dw*(i+1)].eq(self.sink.data),
]
self.sync += If(self.sink.stb, Case(w_cnt, sink_cases))
source_cases = {}
for i in range(ring_buf_size//source_dw):
source_cases[i] = []
for j in range(4):
source_cases[i].append(
self.source.data[pixel_width * j : pixel_width * (j + 1)].eq(
ring_buf[(source_dw * i) + (size * j) : (source_dw * i) + (size * (j + 1))]
)
)
# calcule which last pixels are valid
valid = Signal(4)
bit_cases = {
0: valid.eq(0b1111),
1: valid.eq(0b0001),
2: valid.eq(0b0011),
3: valid.eq(0b0111),
}
self.sync += Case(self.x_size[:2], bit_cases)
self.comb += [
Case(r_cnt, source_cases),
If(stb_remaining_pix,
self.source.valid.eq(valid),
self.source.eop.eq(1),
).Else(
self.source.valid.eq(0b1111),
),
]
class Pixel_Coordinate_Tracker(Module):
"""
Track pixel coordinates
Assume
- camera is in area scan mode
- 1X-1Y Tap geometry
"""
def __init__(self, res_width):
# largest x/y pixel size supported by frame header are 24 bits
assert res_width <= 3*char_width
# line scaning frame will have y_size = 0 and won't trigger the end of frame bit
self.y_size = Signal(3*char_width)
self.sink = stream.Endpoint(pixel4x_layout)
# # #
# NOTE: no need for last_x/last_y csr which is use to indicate how big is the frame
# layout = Record([
# ("x", res_width),
# ("y", res_width),
# ("d", pixel_width),
# ("stb", 1),
# ("eof", 1), # end of frame
# ])
# self.pixel4x = [layout for _ in range(4)]
# DEBUG: for sim only, to show all record in sim
self.pixel4x = []
for _ in range(4):
self.pixel4x.append(Record([
("x", res_width),
("y", res_width),
("gray", pixel_width),
("stb", 1),
("eof", 1), # end of frame
]))
x_4x = [Signal(len(self.pixel4x[0].x), reset=i) for i in range(4)]
y_r = Signal(len(self.pixel4x[0].y))
y_max = Signal.like(self.y_size)
self.sync += [
self.sink.ack.eq(1),
y_max.eq(self.y_size - 1),
]
for i, (x_r, pix) in enumerate(zip(x_4x, self.pixel4x)):
self.sync += [
pix.stb.eq(0),
pix.eof.eq(0),
If(self.sink.stb,
If(self.sink.eop,
# new line
x_r.eq(x_r.reset),
If(y_r == y_max,
pix.eof.eq(1),
y_r.eq(y_r.reset),
).Else(
y_r.eq(y_r + 1),
)
).Else(
x_r.eq(x_r + 4),
),
pix.stb.eq(self.sink.valid[i]),
pix.x.eq(x_r),
pix.y.eq(y_r),
pix.gray.eq(self.sink.data[pixel_width*i:pixel_width*(i+1)]),
)
]
class ROI(Module):
"""
ROI Engine. For each frame, accumulates pixels values within a
rectangular region of interest, and reports the total.
"""
def __init__(self, pixel_4x, count_width):
assert len(pixel_4x) == 4
self.cfg = Record([
("x0", len(pixel_4x[0].x)),
("y0", len(pixel_4x[0].y)),
("x1", len(pixel_4x[0].x)),
("y1", len(pixel_4x[0].y)),
])
self.out = Record([
("update", 1),
# registered output - can be used as CDC input
("count", count_width),
])
# # #
# TODO: remove the self. from self.roi_4x
self.roi_4x = []
for _ in range(4):
self.roi_4x.append(Record([
("x_good", 1),
("y_good", 1),
("gray", len(pixel_4x[0].gray)),
("stb", 1),
("count", count_width),
]))
for pix, roi in zip(pixel_4x, self.roi_4x):
self.sync += [
# TODO: replace the comparision with preprocess equal
# e.g. pix.x == self.cfg.x0 - i
# stage 1 - generate "good" (in-ROI) signals
If(pix.x <= self.cfg.x0,
roi.x_good.eq(1)
),
# NOTE: this gate doens't work as 4 pixes are coming in
If(pix.x >= self.cfg.x1,
roi.x_good.eq(0)
),
# This is fine because 4x pixel are on the same line
If(pix.y == self.cfg.y0,
roi.y_good.eq(1)
),
If(pix.y == self.cfg.y1,
roi.y_good.eq(0)
),
If(pix.eof,
roi.x_good.eq(0),
roi.y_good.eq(0)
),
roi.gray.eq(pix.gray),
roi.stb.eq(pix.stb),
# stage 2 - accumulate
If((roi.stb & roi.x_good & roi.y_good),
roi.count.eq(roi.count + roi.gray)
)
]
eof = Signal()
eof_buf = Signal()
count_buf = [Signal(count_width), Signal(count_width)]
# stage 3 - update
self.sync += [
eof.eq(reduce(or_, [pix.eof for pix in pixel_4x])),
eof_buf.eq(eof),
count_buf[0].eq(self.roi_4x[0].count + self.roi_4x[1].count),
count_buf[1].eq(self.roi_4x[2].count + self.roi_4x[3].count),
self.out.update.eq(0),
If(eof_buf,
[roi.count.eq(0) for roi in self.roi_4x],
self.out.update.eq(1),
self.out.count.eq(reduce(add, count_buf))
),
]
class Pixel_Parser(Module):
"""
Convert the raw frame data into pixel data
Currently only support:
- Pixel format: mono8, mono10, mono12, mono14, mono16
- Tap geometry: 1X-1Y
- Scaning mode: area scanning
"""
def __init__(self, res_width):
self.l_size = Signal(3*char_width)
self.x_size = Signal(3*char_width)
self.y_size = Signal(3*char_width)
self.pixel_format = Signal(2*char_width)
# # #
#
# 32+8(dchar) 32 pixel 4x
# ----/----> end of line ---/---> Pixel Gearboxes ----/----> Pixel Coordinate ------> pixel 4x
# inserter Tracker w/ coord
#
self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter()
self.sync += eol_inserter.l_size.eq(self.l_size)
self.sink = eol_inserter.sink
gearboxes = {}
for s in [8, 10, 12, 14, 16]:
gearbox = Pixel_Gearbox(s)
gearboxes["mono"+str(s)] = gearbox
self.submodules += gearbox
self.sync += gearbox.x_size.eq(self.x_size),
# From Table 34 (CXP-001-2021)
pix_fmt = {
"mono8": 0x0101,
"mono10": 0x0102,
"mono12": 0x0103,
"mono14": 0x0104,
"mono16": 0x0105,
}
self.submodules.tracker = tracker = Pixel_Coordinate_Tracker(res_width)
self.sync += tracker.y_size.eq(self.y_size)
self.pixel4x = tracker.pixel4x
mux_cases = {
"default": [
# discard unknown pixel format
eol_inserter.source.ack.eq(1),
],
}
for fmt in pix_fmt:
mux_cases[pix_fmt[fmt]] = [
eol_inserter.source.connect(gearboxes[fmt].sink),
gearboxes[fmt].source.connect(tracker.sink),
]
self.comb += Case(self.pixel_format, mux_cases)
class Pixel_Pipeline(Module):
def __init__(self, res_width, count_width, packet_size):
# NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module
# NOTE: TapGeo other than 1X-1Y are not supported
# TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?)
# 32+8(dchar)
# ----/----> crc checker ------> frame header ------> Pixel Parser ------> pixel 4x
# decoder w/ coord
# DEBUG: adding fifo doesn't help
self.submodules.buffer = buffer = stream.SyncFIFO(word_layout_dchar, 32, True)
# self.submodules.buffer = buffer = Buffer(word_layout_dchar) # to improve timing from broadcaster
self.submodules.crc_checker = crc_checker = CXPCRC32_Checker()
self.submodules.header_decoder = header_decoder = Frame_Header_Decoder()
self.submodules.parser = parser = Pixel_Parser(res_width)
self.submodules.roi = ROI(parser.pixel4x, count_width)
self.comb += [
parser.l_size.eq(header_decoder.metadata.l_size),
parser.x_size.eq(header_decoder.metadata.x_size),
parser.y_size.eq(header_decoder.metadata.y_size),
parser.pixel_format.eq(header_decoder.metadata.pixel_format),
]
self.pipeline = [buffer, crc_checker, header_decoder, parser]
for s, d in zip(self.pipeline, self.pipeline[1:]):
self.comb += s.source.connect(d.sink)
self.sink = self.pipeline[0].sink
# DEBUG
# self.pix = self.pipeline[-1].pix
# self.source = self.pipeline[-1].source
# self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing