artiq-zynq/src/gateware/cxp_frame_pipeline.py

694 lines
23 KiB
Python
Raw Normal View History

from migen import *
from migen.genlib.coding import PriorityEncoder
from misoc.interconnect.csr import *
from misoc.interconnect import stream
from misoc.cores.liteeth_mini.mac.crc import LiteEthMACCRCEngine
from cxp_pipeline import *
# from src.gateware.cxp_pipeline import * # for sim only
from types import SimpleNamespace
from math import lcm
pixel_width = 16
pixel4x_layout = [
("data", pixel_width*4),
("valid", 4),
]
def switch_endianness(s):
assert len(s) % 8 == 0
char = [s[i*8:(i+1)*8] for i in range(len(s)//8)]
return Cat(char[::-1])
class EOP_Inserter(Module):
def __init__(self):
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.sync += [
If((~self.source.stb | self.source.ack),
If(~((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1)),
self.sink.connect(self.source, omit={"ack", "eop"}),
).Else(
self.source.stb.eq(0),
)
),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
self.source.eop.eq(((self.sink.dchar == KCode["pak_end"]) & (self.sink.dchar_k == 1))),
]
class End_Of_Line_Inserter(Module):
def __init__(self):
self.l_size = Signal(3*char_width)
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint([("data", word_dw)]) # pixel data don't need k code
# # #
# TODO: there maybe a reset bug where cxp_gtx_rx is not reset but frame size is changed
# cnt will be reset to last l_size instead of the new l_size resulting in wrong eop tag
# NOTE: because the self.sink.stb is only active after new_frame, the cnt is changed after the new_frame is high
# Also, after transmitting the last word, cnt = 1, so cnt will update to the correct self.l_size regardless
cnt = Signal.like(self.l_size, reset=1)
self.sync += [
If((~self.source.stb | self.source.ack),
self.sink.connect(self.source, omit={"ack", "eop", "k", "dchar", "dchar_k"}),
If(self.sink.stb,
If(cnt == 1,
cnt.eq(self.l_size)
).Else(
cnt.eq(cnt - 1),
)
),
),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
# repurpose eop as end of line
self.source.eop.eq(cnt == 1),
]
class EOP_Marker(Module):
def __init__(self):
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
last_stb = Signal()
self.sync += [
If((~self.source.stb | self.source.ack),
self.source.stb.eq(self.sink.stb),
self.source.payload.eq(self.sink.payload),
),
last_stb.eq(self.sink.stb),
]
self.comb += [
self.sink.ack.eq(~self.source.stb | self.source.ack),
self.source.eop.eq(~self.sink.stb & last_stb),
]
class Stream_Arbiter(Module):
def __init__(self, n_downconn):
self.n_ext_active = Signal(max=n_downconn)
self.sinks = [stream.Endpoint(word_layout_dchar) for _ in range(n_downconn)]
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.fsm = fsm = FSM(reset_state="COPY")
read_mask = Signal.like(self.n_ext_active)
connect_case = dict((i, s.connect(self.source)) for i, s in enumerate(self.sinks))
fsm.act(
"COPY",
Case(read_mask, connect_case),
If(self.source.eop,
NextState("SWITCH_SOURCE"),
),
)
# Section 9.5.5 (CXP-001-2021)
# When Multiple connections are active, stream packets are transmitted in
# ascending order of Connection ID (which we currently only support ch1->2->3->4).
# And one connection shall be transmitting data at a time.
fsm.act(
"SWITCH_SOURCE",
If(read_mask == self.n_ext_active,
NextValue(read_mask, read_mask.reset),
).Else(
NextValue(read_mask, read_mask + 1),
),
NextState("COPY"),
)
@ResetInserter()
@CEInserter()
class CXPCRC32(Module):
# Section 9.2.2.2 (CXP-001-2021)
width = 32
polynom = 0x04C11DB7
seed = 2**width - 1
check = 0x00000000
def __init__(self, data_width):
self.data = Signal(data_width)
self.value = Signal(self.width)
self.error = Signal()
# # #
self.submodules.engine = LiteEthMACCRCEngine(
data_width, self.width, self.polynom
)
reg = Signal(self.width, reset=self.seed)
self.sync += reg.eq(self.engine.next)
self.comb += [
self.engine.data.eq(self.data),
self.engine.last.eq(reg),
self.value.eq(reg[::-1]),
self.error.eq(reg != self.check),
]
# For verifying crc in stream data packet
class CXPCRC32_Checker(Module):
def __init__(self):
self.error_cnt = Signal(16)
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
# # #
self.submodules.crc = crc = CXPCRC32(word_dw)
self.comb += crc.data.eq(self.sink.data),
self.submodules.fsm = fsm = FSM(reset_state="INIT")
fsm.act("INIT",
crc.reset.eq(1),
NextState("CHECKING"),
)
fsm.act("RESET",
crc.reset.eq(1),
If(crc.error, NextValue(self.error_cnt, self.error_cnt + 1)),
NextState("CHECKING"),
)
fsm.act("CHECKING",
If(self.sink.stb & self.sink.eop,
# discard the crc
self.sink.ack.eq(1),
NextState("RESET"),
).Else(
self.sink.connect(self.source),
),
If(self.sink.stb,
crc.ce.eq(1),
),
)
class Stream_Broadcaster(Module):
def __init__(self, n_buffer, default_id=0):
assert n_buffer > 0
self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)]
self.sources = [stream.Endpoint(word_layout_dchar) for _ in range(n_buffer)]
self.sink = stream.Endpoint(word_layout_dchar)
# # #
self.stream_id = Signal(char_width)
self.pak_tag = Signal(char_width)
self.stream_pak_size = Signal(char_width * 2)
self.submodules.fsm = fsm = FSM(reset_state="WAIT_HEADER")
fsm.act(
"WAIT_HEADER",
NextValue(self.stream_id, self.stream_id.reset),
NextValue(self.pak_tag, self.pak_tag.reset),
NextValue(self.stream_pak_size, self.stream_pak_size.reset),
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_id, self.sink.dchar),
NextState("GET_PAK_TAG"),
),
)
fsm.act(
"GET_PAK_TAG",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.pak_tag, self.sink.dchar),
NextState("GET_PAK_SIZE_0"),
),
)
fsm.act(
"GET_PAK_SIZE_0",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_pak_size[8:], self.sink.dchar),
NextState("GET_PAK_SIZE_1"),
),
)
fsm.act(
"GET_PAK_SIZE_1",
self.sink.ack.eq(1),
If(
self.sink.stb,
NextValue(self.stream_pak_size[:8], self.sink.dchar),
NextState("STORE_BUFFER"),
),
)
# routing decoder
sel = Signal(n_buffer)
no_match = Signal()
self.comb += sel[0].eq(self.stream_id == default_id)
for i, routing_id in enumerate(self.routing_table):
self.comb += sel[i+1].eq(self.stream_id == routing_id)
# DEBUG: disrecard the stream id = 0 rule
# self.comb += source_sel[0].eq(self.stream_id == self.routing_table[0])
# ensure the lower source has priority when two or more bits of sel are high
self.submodules.coder = coder = PriorityEncoder(n_buffer)
sel_r = Signal.like(coder.o)
self.sync += [
coder.i.eq(sel),
sel_r.eq(coder.o),
no_match.eq(coder.n),
]
routing = dict((i, self.sink.connect(s))for i, s in enumerate(self.sources))
routing["default"] = self.sink.ack.eq(1) # discard if invalid
fsm.act(
"STORE_BUFFER",
If(no_match,
self.sink.ack.eq(1),
).Else(
Case(sel_r, routing),
),
# assume downstream is not blocked
If(self.sink.stb,
NextValue(self.stream_pak_size, self.stream_pak_size - 1),
If(self.stream_pak_size == 0,
NextState("WAIT_HEADER"),
)
),
)
class Frame_Header_Decoder(Module):
def __init__(self):
self.format_error = Signal()
self.decode_err = Signal()
self.new_frame = Signal()
self.new_line = Signal()
# Table 47 (CXP-001-2021)
n_metadata_chars = 23
img_header_layout = [
("stream_id", char_width),
("source_tag", 2*char_width),
("x_size", 3*char_width),
("x_offset", 3*char_width),
("y_size", 3*char_width),
("y_offset", 3*char_width),
("l_size", 3*char_width), # number of data words per image line
("pixel_format", 2*char_width),
("tap_geo", 2*char_width),
("flag", char_width),
]
assert layout_len(img_header_layout) == n_metadata_chars*char_width
# # #
# TODO: decode Image header, line break
self.sink = stream.Endpoint(word_layout_dchar)
self.source = stream.Endpoint(word_layout_dchar)
self.submodules.fsm = fsm = FSM(reset_state="IDLE")
fsm.act("IDLE",
self.sink.ack.eq(1),
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
NextState("DECODE"),
)
)
fsm.act("COPY",
# until for new line or new frame
If((self.sink.stb & (self.sink.dchar == KCode["stream_marker"]) & (self.sink.dchar_k == 1)),
self.sink.ack.eq(1),
NextState("DECODE"),
).Else(
self.sink.connect(self.source),
)
)
type = {
"new_frame": 0x01,
"line_break": 0x02,
}
cnt = Signal(max=n_metadata_chars)
fsm.act("DECODE",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(self.sink.dchar, {
type["new_frame"]: [
self.new_frame.eq(1),
NextValue(cnt, cnt.reset),
NextState("GET_FRAME_DATA"),
],
type["line_break"]: [
self.new_line.eq(1),
NextState("COPY"),
],
"default": [
self.decode_err.eq(1),
# discard all data until valid frame header
NextState("IDLE"),
],
}),
)
)
packet_buffer = Signal(layout_len(img_header_layout))
case = dict(
(i, NextValue(packet_buffer[8*i:8*(i+1)], self.sink.dchar))
for i in range(n_metadata_chars)
)
fsm.act("GET_FRAME_DATA",
self.sink.ack.eq(1),
If(self.sink.stb,
Case(cnt, case),
If(cnt == n_metadata_chars - 1,
NextState("COPY"),
NextValue(cnt, cnt.reset),
).Else(
NextValue(cnt, cnt + 1),
),
),
)
# dissect packet
self.metadata = SimpleNamespace()
idx = 0
for name, size in img_header_layout:
# CXP use MSB even when sending duplicate chars
setattr(self.metadata, name, switch_endianness(packet_buffer[idx:idx+size]))
idx += size
2025-01-13 15:48:47 +08:00
class Pixel_Gearbox(Module):
def __init__(self, size):
assert size in [8, 10, 12, 14, 16]
self.x_size = Signal(3*char_width)
2025-01-10 12:48:15 +08:00
sink_dw, source_dw = word_dw, size*4
self.sink = stream.Endpoint([("data", sink_dw)])
self.source = stream.Endpoint(pixel4x_layout)
# # #
2025-01-13 15:47:49 +08:00
ring_buf_size = lcm(sink_dw, source_dw)
2025-01-10 12:48:15 +08:00
# ensure the shift register is at least twice the size of sink/source dw
2025-01-13 15:47:49 +08:00
if (ring_buf_size//sink_dw) < 2:
ring_buf_size = ring_buf_size * 2
if (ring_buf_size//source_dw) < 2:
ring_buf_size = ring_buf_size * 2
2025-01-10 12:48:15 +08:00
# Control interface
2025-01-10 12:48:15 +08:00
reset_reg = Signal()
we = Signal()
re = Signal()
2025-01-13 15:47:49 +08:00
level = Signal(max=ring_buf_size)
w_cnt = Signal(max=ring_buf_size//sink_dw)
r_cnt = Signal(max=ring_buf_size//source_dw)
self.sync += [
2025-01-10 12:48:15 +08:00
If(reset_reg,
2025-01-13 15:47:49 +08:00
level.eq(level.reset),
2025-01-10 12:48:15 +08:00
).Else(
2025-01-13 15:47:49 +08:00
If(we & ~re, level.eq(level + sink_dw)),
If(~we & re, level.eq(level - source_dw)),
If(we & re, level.eq(level + sink_dw - source_dw)),
),
2025-01-10 12:48:15 +08:00
If(reset_reg,
2025-01-13 15:47:49 +08:00
w_cnt.eq(w_cnt.reset),
r_cnt.eq(r_cnt.reset),
2025-01-10 12:48:15 +08:00
).Else(
If(we,
2025-01-13 15:47:49 +08:00
If(w_cnt == ((ring_buf_size//sink_dw) - 1),
w_cnt.eq(w_cnt.reset),
2025-01-10 12:48:15 +08:00
).Else(
2025-01-13 15:47:49 +08:00
w_cnt.eq(w_cnt + 1),
2025-01-10 12:48:15 +08:00
)
),
If(re,
2025-01-13 15:47:49 +08:00
If(r_cnt == ((ring_buf_size//source_dw) - 1),
r_cnt.eq(r_cnt.reset),
2025-01-10 12:48:15 +08:00
).Else(
2025-01-13 15:47:49 +08:00
r_cnt.eq(r_cnt + 1),
2025-01-10 12:48:15 +08:00
)
),
)
]
2025-01-13 11:50:44 +08:00
2025-01-13 15:47:49 +08:00
extra_eol_handling = size in [10, 12, 14]
if extra_eol_handling:
# the source need to be stb twice
# (one for level >= source_dw and the other for the remaining pixels)
# when last word of each line packet satisfied the following condition:
#
# if there exist an integers j such that
# sink_dw * i > size * j > source_dw * k
# where i,k are postive integers and source_dw * k - sink_dw * (i-1) > 0
#
# For example size == 10
# 32 * 2 > 10 * (5) > 40 * 1
# 32 * 2 > 10 * (6) > 40 * 1
# 32 * 3 > 10 * (9) > 40 * 2
stb_aligned = Signal()
match size:
case 10:
stb_cases = {
5: stb_aligned.eq(1),
6: stb_aligned.eq(1),
9: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:4], stb_cases) # mod 16
case 12:
stb_cases = {
5: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:3], stb_cases) # mod 8
case 14:
stb_cases = {
9: stb_aligned.eq(1),
13: stb_aligned.eq(1),
}
self.sync += Case(self.x_size[:4], stb_cases) # mod 16
self.submodules.fsm = fsm = FSM(reset_state="SHIFTING")
2025-01-10 12:48:15 +08:00
fsm.act(
2025-01-13 15:47:49 +08:00
"SHIFTING",
2025-01-10 12:48:15 +08:00
self.sink.ack.eq(1),
2025-01-13 15:47:49 +08:00
self.source.stb.eq(level >= source_dw),
2025-01-10 12:48:15 +08:00
we.eq(self.sink.stb),
re.eq((self.source.stb & self.source.ack)),
If(self.sink.stb & self.sink.eop,
2025-01-13 15:47:49 +08:00
(If(stb_aligned,
NextState("MOVE_ALIGNED_PIX"),
2025-01-13 11:50:44 +08:00
).Else(
2025-01-13 15:47:49 +08:00
NextState("MOVE_REMAINING_PIX"),
) if extra_eol_handling else
NextState("MOVE_REMAINING_PIX"),
2025-01-13 11:50:44 +08:00
)
),
2025-01-10 12:48:15 +08:00
)
2025-01-13 15:47:49 +08:00
if extra_eol_handling:
fsm.act(
"MOVE_ALIGNED_PIX",
self.source.stb.eq(1),
re.eq((self.source.stb & self.source.ack)),
NextState("MOVE_REMAINING_PIX"),
)
2025-01-13 11:50:44 +08:00
2025-01-13 15:47:49 +08:00
stb_valid = Signal()
2025-01-10 12:48:15 +08:00
fsm.act(
2025-01-13 15:47:49 +08:00
"MOVE_REMAINING_PIX",
2025-01-10 12:48:15 +08:00
reset_reg.eq(1),
2025-01-13 15:47:49 +08:00
self.source.stb.eq(1),
stb_valid.eq(1),
NextState("SHIFTING"),
2025-01-10 12:48:15 +08:00
)
# Data path
2025-01-13 15:47:49 +08:00
ring_buf = Signal(ring_buf_size, reset_less=True)
2025-01-10 12:48:15 +08:00
sink_cases = {}
2025-01-13 15:47:49 +08:00
for i in range(ring_buf_size//sink_dw):
2025-01-10 12:48:15 +08:00
sink_cases[i] = [
2025-01-13 15:47:49 +08:00
ring_buf[sink_dw*i:sink_dw*(i+1)].eq(self.sink.data),
2025-01-10 12:48:15 +08:00
]
2025-01-13 15:47:49 +08:00
self.sync += If(self.sink.stb, Case(w_cnt, sink_cases))
2025-01-10 12:48:15 +08:00
source_cases = {}
2025-01-13 15:47:49 +08:00
for i in range(ring_buf_size//source_dw):
2025-01-10 12:48:15 +08:00
source_cases[i] = []
for j in range(4):
2025-01-10 12:48:15 +08:00
source_cases[i].append(
self.source.data[pixel_width * j : pixel_width * (j + 1)].eq(
2025-01-13 15:47:49 +08:00
ring_buf[(source_dw * i) + (size * j) : (source_dw * i) + (size * (j + 1))]
)
)
2025-01-13 15:47:49 +08:00
# calcule which last pixels are valid
valid = Signal(4)
bit_cases = {
2025-01-13 15:47:49 +08:00
0: valid.eq(0b1111),
1: valid.eq(0b0001),
2: valid.eq(0b0011),
3: valid.eq(0b0111),
}
self.sync += Case(self.x_size[:2], bit_cases)
self.comb += [
2025-01-13 15:47:49 +08:00
Case(r_cnt, source_cases),
If(stb_valid,
self.source.valid.eq(valid),
).Else(
self.source.valid.eq(0b1111),
2025-01-10 12:48:15 +08:00
),
]
class Frame_Deserializer(Module):
def __init__(self, width, pixel_size):
self.new_frame = Signal()
self.l_size = Signal(3*char_width)
self.x_size = Signal(3*char_width)
# # #
self.submodules.eol_inserter = eol_inserter = End_Of_Line_Inserter()
self.sync += eol_inserter.l_size.eq(self.l_size),
2025-01-13 15:48:47 +08:00
for s in [8, 10, 12, 14, 16]:
gearbox = Pixel_Gearbox(s)
2025-01-13 15:47:49 +08:00
self.submodules += gearbox
self.sync += gearbox.x_size.eq(self.x_size),
self.comb += eol_inserter.source.connect(gearbox.sink)
self.comb += gearbox.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing
2025-01-13 15:47:49 +08:00
# self.submodules.gearbox = gearbox = Custom_Pixel_Gearbox(8)
# self.sync += gearbox.x_size.eq(self.x_size),
# self.comb += eol_inserter.source.connect(gearbox.sink)
self.sink = eol_inserter.sink
2025-01-13 15:47:49 +08:00
# self.source = gearbox.source
# # TODO: use this to control mux
# Table 34 (CXP-001-2021)
pix_fmt = {
"mono8": 0x0101,
"mono10": 0x0102,
"mono12": 0x0103,
"mono14": 0x0104,
"mono16": 0x0105,
}
# self.sync += [
# pixel_format_r.eq(header_decoder.metadata.pixel_format),
# Case(pixel_format_r,
# {
# pix_fmt["mono8"]: pix_size.eq(8),
# pix_fmt["mono10"]: pix_size.eq(10),
# pix_fmt["mono12"]: pix_size.eq(12),
# pix_fmt["mono14"]: pix_size.eq(14),
# pix_fmt["mono16"]: pix_size.eq(16),
# "default": pix_size.eq(pix_size.reset),
# }
# )
# ]
class ROI_Pipeline(Module):
def __init__(self, res_width=32, pixel_size=16):
# NOTE: csr need to stay outside since this module need to be cdr in the CXP_FRAME_Pipeline module
# NOTE: TapGeo other than 1X-1Y are not supported
# TODO: match pixel and see whether the it matches the supported ones (via csr perhaps?)
self.submodules.buffer = buffer = Buffer(word_layout_dchar) # to improve timing from broadcaster
self.submodules.crc_checker = crc_checker = CXPCRC32_Checker()
self.submodules.header_decoder = header_decoder = Frame_Header_Decoder()
self.submodules.deserializer = deserializer = Frame_Deserializer(res_width, pixel_size)
self.comb += [
deserializer.new_frame.eq(header_decoder.new_frame),
deserializer.l_size.eq(header_decoder.metadata.l_size),
deserializer.x_size.eq(header_decoder.metadata.x_size),
]
self.pipeline = [buffer, crc_checker, header_decoder, deserializer]
for s, d in zip(self.pipeline, self.pipeline[1:]):
self.comb += s.source.connect(d.sink)
self.sink = self.pipeline[0].sink
# DEBUG
2025-01-13 15:47:49 +08:00
# self.source = self.pipeline[-1].source
# self.comb += self.source.ack.eq(1) # simulated a proper consumer, idk why but without this it will destory timing
class Frame_Packet_Router(Module):
# packet size expressed in bits
def __init__(self, downconns, framebuffer, packet_size, pmod_pads):
n_downconn = len(downconns)
n_buffer = len(framebuffer)
assert n_downconn > 0 and n_buffer > 0
self.n_ext_active = Signal(max=n_downconn)
self.routing_table = [Signal(char_width) for _ in range(1, n_buffer)]
# # #
# +----------+ +-------------+
# eop marker ----->| | | |------> crc checker
# | arbiter |---->| broadcaster |
# eop marker ----->| need eop | | |------> crc checker
# +----------+ +-------------+
#
self.submodules.arbiter = arbiter = Stream_Arbiter(n_downconn)
self.sync += arbiter.n_ext_active.eq(self.n_ext_active)
self.submodules.broadcaster = broadcaster = Stream_Broadcaster(n_buffer)
for i, s in enumerate(self.routing_table):
self.sync += broadcaster.routing_table[i].eq(s)
for i, d in enumerate(downconns):
# eop is needed for arbiter and crc checker to work correctly
# TODO: move eop inserter inside of broadcaster
# TODO: change arbiter to use K27.7 as eop instead
eop_marker = EOP_Inserter()
self.submodules += eop_marker
self.comb += [
d.source.connect(eop_marker.sink),
eop_marker.source.connect(arbiter.sinks[i])
]
self.comb += arbiter.source.connect(broadcaster.sink)
for i, fb in enumerate(framebuffer):
self.comb += broadcaster.sources[i].connect(fb.sink),