diff --git a/src/gateware/coaxpress_clock_align.py b/src/gateware/coaxpress_clock_align.py deleted file mode 100644 index 73fb0eb..0000000 --- a/src/gateware/coaxpress_clock_align.py +++ /dev/null @@ -1,114 +0,0 @@ -from math import ceil -from functools import reduce -from operator import add - -from migen import * -from migen.genlib.cdc import MultiReg, PulseSynchronizer - - -# Changes the phase of the transceiver RX clock to align the comma to -# the LSBs of RXDATA, fixing the latency. -# -# This is implemented by repeatedly resetting the transceiver until it -# gives out the correct phase. Each reset gives a random phase. -# -# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA -# would achieve this faster and in a cleaner way. But: -# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead -# of 1 UI at every pulse. It is unclear what the latency becomes. -# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. -# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless -# transceiver "feature". -# -# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped -# compared to the usual 8b10b binary representation. -class CXP_BruteforceClockAligner(Module): - def __init__(self, comma, tx_clk_freq, check_period=6e-3): - self.rxdata = Signal(20) - self.restart = Signal() - - self.ready = Signal() - - check_max_val = ceil(check_period*tx_clk_freq) - check_counter = Signal(max=check_max_val+1) - check = Signal() - reset_check_counter = Signal() - self.sync += [ - check.eq(0), - If(reset_check_counter, - check_counter.eq(check_max_val) - ).Else( - If(check_counter == 0, - check.eq(1), - check_counter.eq(check_max_val) - ).Else( - check_counter.eq(check_counter-1) - ) - ) - ] - - checks_reset = PulseSynchronizer("sys", "cxp_gtx_rx") - self.submodules += checks_reset - - comma_n = ~comma & 0b1111111111 - comma_seen_rxclk = Signal() - comma_seen = Signal() - comma_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(comma_seen_rxclk, comma_seen) - self.sync.cxp_gtx_rx += \ - If(checks_reset.o, - comma_seen_rxclk.eq(0) - ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), - comma_seen_rxclk.eq(1) - ) - - error_seen_rxclk = Signal() - error_seen = Signal() - error_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(error_seen_rxclk, error_seen) - rx1cnt = Signal(max=11) - self.sync.cxp_gtx_rx += [ - rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), - If(checks_reset.o, - error_seen_rxclk.eq(0) - ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), - error_seen_rxclk.eq(1) - ) - ] - - fsm = FSM(reset_state="WAIT_COMMA") - self.submodules += fsm - - fsm.act("WAIT_COMMA", - If(check, - # Errors are still OK at this stage, as the transceiver - # has just been reset and may output garbage data. - If(comma_seen, - NextState("WAIT_NOERROR") - ).Else( - self.restart.eq(1) - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("WAIT_NOERROR", - If(check, - If(comma_seen & ~error_seen, - NextState("READY") - ).Else( - self.restart.eq(1), - NextState("WAIT_COMMA") - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("READY", - reset_check_counter.eq(1), - self.ready.eq(1), - If(error_seen, - checks_reset.i.eq(1), - self.restart.eq(1), - NextState("WAIT_COMMA") - ) - ) - diff --git a/src/gateware/coaxpress_gtx.py b/src/gateware/cxp_downconn.py similarity index 76% rename from src/gateware/coaxpress_gtx.py rename to src/gateware/cxp_downconn.py index 8e0252a..73c37b8 100644 --- a/src/gateware/coaxpress_gtx.py +++ b/src/gateware/cxp_downconn.py @@ -1,6 +1,6 @@ from migen import * from migen.genlib.resetsync import AsyncResetSynchronizer -from migen.genlib.cdc import MultiReg +from migen.genlib.cdc import MultiReg, PulseSynchronizer from misoc.cores.code_8b10b import Encoder, Decoder from misoc.interconnect.csr import * @@ -8,7 +8,117 @@ from misoc.interconnect.csr import * from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface from artiq.gateware.drtio.transceiver.gtx_7series_init import * -from coaxpress_clock_align import CXP_BruteforceClockAligner +from operator import add +from math import ceil +from functools import reduce + +# Changes the phase of the transceiver RX clock to align the comma to +# the LSBs of RXDATA, fixing the latency. +# +# This is implemented by repeatedly resetting the transceiver until it +# gives out the correct phase. Each reset gives a random phase. +# +# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA +# would achieve this faster and in a cleaner way. But: +# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead +# of 1 UI at every pulse. It is unclear what the latency becomes. +# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. +# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless +# transceiver "feature". +# +# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped +# compared to the usual 8b10b binary representation. +class CXP_BruteforceClockAligner(Module): + def __init__(self, comma, tx_clk_freq, check_period=6e-3): + self.rxdata = Signal(20) + self.restart = Signal() + + self.ready = Signal() + + check_max_val = ceil(check_period*tx_clk_freq) + check_counter = Signal(max=check_max_val+1) + check = Signal() + reset_check_counter = Signal() + self.sync += [ + check.eq(0), + If(reset_check_counter, + check_counter.eq(check_max_val) + ).Else( + If(check_counter == 0, + check.eq(1), + check_counter.eq(check_max_val) + ).Else( + check_counter.eq(check_counter-1) + ) + ) + ] + + checks_reset = PulseSynchronizer("sys", "cxp_gtx_rx") + self.submodules += checks_reset + + comma_n = ~comma & 0b1111111111 + comma_seen_rxclk = Signal() + comma_seen = Signal() + comma_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(comma_seen_rxclk, comma_seen) + self.sync.cxp_gtx_rx += \ + If(checks_reset.o, + comma_seen_rxclk.eq(0) + ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), + comma_seen_rxclk.eq(1) + ) + + error_seen_rxclk = Signal() + error_seen = Signal() + error_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(error_seen_rxclk, error_seen) + rx1cnt = Signal(max=11) + self.sync.cxp_gtx_rx += [ + rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), + If(checks_reset.o, + error_seen_rxclk.eq(0) + ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), + error_seen_rxclk.eq(1) + ) + ] + + fsm = FSM(reset_state="WAIT_COMMA") + self.submodules += fsm + + fsm.act("WAIT_COMMA", + If(check, + # Errors are still OK at this stage, as the transceiver + # has just been reset and may output garbage data. + If(comma_seen, + NextState("WAIT_NOERROR") + ).Else( + self.restart.eq(1) + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("WAIT_NOERROR", + If(check, + If(comma_seen & ~error_seen, + NextState("READY") + ).Else( + self.restart.eq(1), + NextState("WAIT_COMMA") + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("READY", + reset_check_counter.eq(1), + self.ready.eq(1), + If(error_seen, + checks_reset.i.eq(1), + self.restart.eq(1), + NextState("WAIT_COMMA") + ) + ) + + class CXP_DownConn(Module): # Settings: @@ -315,114 +425,3 @@ class CXP_DownConn(Module): self.rx_ready.eq(clock_aligner.ready), tx_init.restart.eq(self.tx_restart) ] - -class CXP(Module, AutoCSR): - def __init__(self, refclk, pads, sys_clk_freq, debug_sma): - self.nchannels = nchannels = len(pads) - self.rx_start_init = CSRStorage() - self.rx_restart = CSRStatus() - self.rx_bypass_clk_alignment = CSRStorage() - - self.tx_start_init = CSRStorage() - self.tx_restart = CSRStorage() - - self.loopback_mode = CSRStorage(3) - - self.txinit_phaligndone = CSRStatus() - self.rx_ready = CSRStatus() - - self.data_0 = CSRStorage(8) - self.data_1 = CSRStorage(8) - self.control_bit_0 = CSRStorage() - self.control_bit_1 = CSRStorage() - self.encoded_0 = CSRStatus(10) - self.encoded_1 = CSRStatus(10) - - self.rxdata_0 = CSRStatus(10) - self.rxdata_1 = CSRStatus(10) - self.decoded_data_0 = CSRStatus(8) - self.decoded_data_1 = CSRStatus(8) - self.decoded_k_0 = CSRStatus() - self.decoded_k_1 = CSRStatus() - - # # # - - # single CXP - self.submodules.gtx = gtx = CXP_DownConn(refclk, pads, sys_clk_freq, tx_mode="single", rx_mode="single") - - # ! loopback for debugging - self.sync += gtx.loopback_mode.eq(self.loopback_mode.storage) - - # ! debug sma - self.specials += [ - Instance("OBUF", i_I=gtx.rxoutclk, o_O=debug_sma.p_tx), - Instance("OBUF", i_I=gtx.cd_cxp_gtx_tx.clk, o_O=debug_sma.n_rx) - ] - - self.comb += [ - self.txinit_phaligndone.status.eq(self.gtx.tx_init.Xxphaligndone), - self.rx_ready.status.eq(self.gtx.rx_ready), - ] - - self.sync.cxp_gtx_tx += [ - self.gtx.encoder.d[0].eq(self.data_0.storage), - self.gtx.encoder.k[0].eq(self.control_bit_0.storage), - self.encoded_0.status.eq(self.gtx.encoder.output[0]), - - self.gtx.encoder.d[1].eq(self.data_1.storage), - self.gtx.encoder.k[1].eq(self.control_bit_1.storage), - self.encoded_1.status.eq(self.gtx.encoder.output[1]), - ] - self.sync.cxp_gtx_rx += [ - self.rxdata_0.status.eq(self.gtx.decoders[0].input), - self.decoded_data_0.status.eq(self.gtx.decoders[0].d), - self.decoded_k_0.status.eq(self.gtx.decoders[0].k), - - self.rxdata_1.status.eq(self.gtx.decoders[1].input), - self.decoded_data_1.status.eq(self.gtx.decoders[1].d), - self.decoded_k_1.status.eq(self.gtx.decoders[1].k), - ] - - # TODO: rip encoder & rx clockalignment out of CXP_GTX - - # TODO: use expose encoder & decoder from CXP - # encoder.k = 1 if sending control bit, different calculation - # encoder.d = data 8 bit - - - - channel_interface = ChannelInterface(gtx.encoder, gtx.decoders) - self.comb += channel_interface.rx_ready.eq(gtx.rx_ready) - channel_interfaces = [] - channel_interfaces.append(channel_interface) - - # TransceiverInterface, just adding cxp_rx_ - self.stable_clkin = CSRStorage() - self.txenable = CSRStorage(len(channel_interfaces)) - for i in range(len(channel_interfaces)): - name = "cxp_gtx_rx" + str(i) - setattr(self.clock_domains, "cd_"+name, ClockDomain(name=name)) - self.channels = channel_interfaces - - - # TODO: add tx_phase_alignment for multi CXP - # The TX phase alignment will fail with a wrong TXUSRCLK frequency - - - self.comb += [ - gtx.rx_init.clk_path_ready.eq(self.rx_start_init.storage), - - gtx.tx_init.clk_path_ready.eq(self.tx_start_init.storage), - gtx.txenable.eq(self.txenable.storage[0]), - gtx.tx_restart.eq(self.tx_restart.storage), - ] - - # TODO: Connect multilane cxp_tx - - # TODO: Connect slave i's `cxp_gtx_rx` clock to `cxp_gtx_rxi` clock - self.comb += [ - getattr(self, "cd_cxp_gtx_rx" + str(0)).clk.eq(self.gtx.cd_cxp_gtx_rx.clk), - getattr(self, "cd_cxp_gtx_rx" + str(0)).rst.eq(self.gtx.cd_cxp_gtx_rx.rst) - ] - - # TODO: add low speed SERDES