forked from M-Labs/artiq
drtio: GTX WIP
This commit is contained in:
parent
c548a65ec3
commit
08e4aa3e3f
|
@ -0,0 +1,192 @@
|
|||
from migen import *
|
||||
from migen.genlib.resetsync import AsyncResetSynchronizer
|
||||
|
||||
from misoc.cores.code_8b10b import Encoder, Decoder
|
||||
|
||||
from artiq.gateware.drtio.transceiver.gtx_7series_init import *
|
||||
|
||||
|
||||
class GTX_1000BASE_BX10(Module):
|
||||
def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq):
|
||||
self.submodules.encoder = ClockDomainsRenamer("rtio")(
|
||||
Encoder(2, True))
|
||||
self.decoders = [ClockDomainsRenamer("rtio_rx")(
|
||||
Decoder(True)) for _ in range(2)]
|
||||
self.submodules += self.decoders
|
||||
|
||||
self.rx_reset = Signal()
|
||||
self.rx_ready = Signal()
|
||||
|
||||
# # #
|
||||
|
||||
refclk = Signal()
|
||||
self.specials += Instance("IBUFDS_GTE2",
|
||||
i_CEB=0,
|
||||
i_I=clock_pads.p,
|
||||
i_IB=clock_pads.n,
|
||||
o_O=refclk
|
||||
)
|
||||
|
||||
cplllock = Signal()
|
||||
# TX generates RTIO clock, init must be in system domain
|
||||
tx_init = GTXInit(sys_clk_freq, False)
|
||||
# RX receives restart commands from RTIO domain
|
||||
rx_init = ClockDomainsRenamer("rtio")(
|
||||
GTXInit(62.5e6, True))
|
||||
self.submodules += tx_init, rx_init
|
||||
self.comb += tx_init.cplllock.eq(cplllock), \
|
||||
rx_init.cplllock.eq(cplllock), \
|
||||
rx_init.restart.eq(self.rx_reset)
|
||||
|
||||
txoutclk = Signal()
|
||||
txdata = Signal(20)
|
||||
rxoutclk = Signal()
|
||||
rxdata = Signal(20)
|
||||
self.specials += \
|
||||
Instance("GTXE2_CHANNEL",
|
||||
# PMA Attributes
|
||||
p_PMA_RSV=0x00018480,
|
||||
p_PMA_RSV2=0x2050,
|
||||
p_PMA_RSV3=0,
|
||||
p_PMA_RSV4=0,
|
||||
p_RX_BIAS_CFG=0b100,
|
||||
p_RX_CM_TRIM=0b010,
|
||||
p_RX_OS_CFG=0b10000000,
|
||||
p_RX_CLK25_DIV=5,
|
||||
p_TX_CLK25_DIV=5,
|
||||
|
||||
# Power-Down Attributes
|
||||
p_PD_TRANS_TIME_FROM_P2=0x3c,
|
||||
p_PD_TRANS_TIME_NONE_P2=0x3c,
|
||||
p_PD_TRANS_TIME_TO_P2=0x64,
|
||||
|
||||
# CPLL
|
||||
p_CPLL_CFG=0xBC07DC,
|
||||
p_CPLL_FBDIV=4,
|
||||
p_CPLL_FBDIV_45=5,
|
||||
p_CPLL_REFCLK_DIV=1,
|
||||
p_RXOUT_DIV=2,
|
||||
p_TXOUT_DIV=2,
|
||||
o_CPLLLOCK=cplllock,
|
||||
i_CPLLLOCKEN=1,
|
||||
i_CPLLREFCLKSEL=0b001,
|
||||
i_TSTIN=2**20-1,
|
||||
i_GTREFCLK0=refclk,
|
||||
|
||||
# TX clock
|
||||
p_TXBUF_EN="FALSE",
|
||||
p_TX_XCLK_SEL="TXUSR",
|
||||
o_TXOUTCLK=txoutclk,
|
||||
i_TXSYSCLKSEL=0b00,
|
||||
i_TXOUTCLKSEL=0b11,
|
||||
|
||||
# TX Startup/Reset
|
||||
i_GTTXRESET=tx_init.gtXxreset,
|
||||
o_TXRESETDONE=tx_init.Xxresetdone,
|
||||
i_TXDLYSRESET=tx_init.Xxdlysreset,
|
||||
o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone,
|
||||
o_TXPHALIGNDONE=tx_init.Xxphaligndone,
|
||||
i_TXUSERRDY=tx_init.Xxuserrdy,
|
||||
|
||||
# TX data
|
||||
p_TX_DATA_WIDTH=20,
|
||||
p_TX_INT_DATAWIDTH=0,
|
||||
i_TXCHARDISPMODE=Cat(txdata[9], txdata[19]),
|
||||
i_TXCHARDISPVAL=Cat(txdata[8], txdata[18]),
|
||||
i_TXDATA=Cat(txdata[:8], txdata[10:18]),
|
||||
i_TXUSRCLK=ClockSignal("rtio"),
|
||||
i_TXUSRCLK2=ClockSignal("rtio"),
|
||||
|
||||
# TX electrical
|
||||
i_TXBUFDIFFCTRL=0b100,
|
||||
i_TXDIFFCTRL=0b1000,
|
||||
|
||||
# RX Startup/Reset
|
||||
i_GTRXRESET=rx_init.gtXxreset,
|
||||
o_RXRESETDONE=rx_init.Xxresetdone,
|
||||
i_RXDLYSRESET=rx_init.Xxdlysreset,
|
||||
o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone,
|
||||
o_RXPHALIGNDONE=rx_init.Xxphaligndone,
|
||||
i_RXUSERRDY=rx_init.Xxuserrdy,
|
||||
|
||||
# RX AFE
|
||||
p_RX_DFE_XYD_CFG=0,
|
||||
i_RXDFEXYDEN=1,
|
||||
i_RXDFEXYDHOLD=0,
|
||||
i_RXDFEXYDOVRDEN=0,
|
||||
i_RXLPMEN=0,
|
||||
|
||||
# RX clock
|
||||
p_RXBUF_EN="FALSE",
|
||||
p_RX_XCLK_SEL="RXUSR",
|
||||
i_RXDDIEN=1,
|
||||
i_RXSYSCLKSEL=0b00,
|
||||
i_RXOUTCLKSEL=0b010,
|
||||
o_RXOUTCLK=rxoutclk,
|
||||
i_RXUSRCLK=ClockSignal("rtio_rx"),
|
||||
i_RXUSRCLK2=ClockSignal("rtio_rx"),
|
||||
p_RXCDR_CFG=0x03000023FF10100020,
|
||||
|
||||
# RX Clock Correction Attributes
|
||||
p_CLK_CORRECT_USE="FALSE",
|
||||
p_CLK_COR_SEQ_1_1=0b0100000000,
|
||||
p_CLK_COR_SEQ_2_1=0b0100000000,
|
||||
p_CLK_COR_SEQ_1_ENABLE=0b1111,
|
||||
p_CLK_COR_SEQ_2_ENABLE=0b1111,
|
||||
|
||||
# RX data
|
||||
p_RX_DATA_WIDTH=20,
|
||||
p_RX_INT_DATAWIDTH=0,
|
||||
o_RXDISPERR=Cat(rxdata[9], rxdata[19]),
|
||||
o_RXCHARISK=Cat(rxdata[8], rxdata[18]),
|
||||
o_RXDATA=Cat(rxdata[:8], rxdata[10:18]),
|
||||
|
||||
# Pads
|
||||
i_GTXRXP=rx_pads.p,
|
||||
i_GTXRXN=rx_pads.n,
|
||||
o_GTXTXP=tx_pads.p,
|
||||
o_GTXTXN=tx_pads.n,
|
||||
)
|
||||
|
||||
self.clock_domains.cd_rtio = ClockDomain()
|
||||
self.specials += [
|
||||
Instance("BUFG", i_I=txoutclk, o_O=self.cd_rtio.clk),
|
||||
AsyncResetSynchronizer(self.cd_rtio, ~tx_init.done)
|
||||
]
|
||||
self.clock_domains.cd_rtio_rx = ClockDomain()
|
||||
self.specials += [
|
||||
Instance("BUFG", i_I=rxoutclk, o_O=self.cd_rtio_rx.clk),
|
||||
AsyncResetSynchronizer(self.cd_rtio_rx, ~rx_init.done)
|
||||
]
|
||||
|
||||
self.comb += [
|
||||
txdata.eq(Cat(self.encoder.output[0], self.encoder.output[1])),
|
||||
self.decoders[0].input.eq(rxdata[:10]),
|
||||
self.decoders[1].input.eq(rxdata[10:])
|
||||
]
|
||||
# TODO: clock aligner, reset/ready
|
||||
|
||||
|
||||
class RXSynchronizer(Module):
|
||||
"""Delays the data received in the rtio_rx by a configurable amount
|
||||
so that it meets s/h in the rtio domain, and recapture it in the rtio
|
||||
domain. This has fixed latency.
|
||||
|
||||
Since Xilinx doesn't provide decent on-chip delay lines, we implement the
|
||||
delay with MMCM that provides a clock and a finely configurable phase, used
|
||||
to resample the data.
|
||||
|
||||
The phase has to be determined either empirically or by making sense of the
|
||||
Xilinx scriptures (when existent) and should be constant for a given design
|
||||
placement.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.cd_rtio_delayed = ClockDomain()
|
||||
# TODO
|
||||
|
||||
def sync(self, signal):
|
||||
delayed = Signal.like(signal, related=signal)
|
||||
synchronized = Signal.like(signal, related=signal)
|
||||
self.sync.rtio_delayed += delayed.eq(signal)
|
||||
self.sync.rtio += synchronized.eq(delayed)
|
||||
return synchronized
|
|
@ -0,0 +1,168 @@
|
|||
from math import ceil
|
||||
|
||||
from migen import *
|
||||
from migen.genlib.cdc import MultiReg, PulseSynchronizer
|
||||
from migen.genlib.misc import WaitTimer
|
||||
from migen.genlib.fsm import FSM
|
||||
|
||||
|
||||
class GTXInit(Module):
|
||||
# Based on LiteSATA by Enjoy-Digital
|
||||
def __init__(self, sys_clk_freq, rx):
|
||||
self.done = Signal()
|
||||
self.restart = Signal()
|
||||
|
||||
# GTX signals
|
||||
self.cplllock = Signal()
|
||||
self.gtXxreset = Signal()
|
||||
self.Xxresetdone = Signal()
|
||||
self.Xxdlysreset = Signal()
|
||||
self.Xxdlysresetdone = Signal()
|
||||
self.Xxphaligndone = Signal()
|
||||
self.Xxuserrdy = Signal()
|
||||
|
||||
# # #
|
||||
|
||||
# Double-latch transceiver asynch outputs
|
||||
cplllock = Signal()
|
||||
Xxresetdone = Signal()
|
||||
Xxdlysresetdone = Signal()
|
||||
Xxphaligndone = Signal()
|
||||
self.specials += [
|
||||
MultiReg(self.cplllock, cplllock),
|
||||
MultiReg(self.Xxresetdone, Xxresetdone),
|
||||
MultiReg(self.Xxdlysresetdone, Xxdlysresetdone),
|
||||
MultiReg(self.Xxphaligndone, Xxphaligndone),
|
||||
]
|
||||
|
||||
# Deglitch FSM outputs driving transceiver asynch inputs
|
||||
gtXxreset = Signal()
|
||||
Xxdlysreset = Signal()
|
||||
Xxuserrdy = Signal()
|
||||
self.sync += [
|
||||
self.gtXxreset.eq(gtXxreset),
|
||||
self.Xxdlysreset.eq(Xxdlysreset),
|
||||
self.Xxuserrdy.eq(Xxuserrdy)
|
||||
]
|
||||
|
||||
# After configuration, transceiver resets have to stay low for
|
||||
# at least 500ns (see AR43482)
|
||||
startup_cycles = ceil(500*sys_clk_freq/1000000000)
|
||||
startup_timer = WaitTimer(startup_cycles)
|
||||
self.submodules += startup_timer
|
||||
|
||||
startup_fsm = FSM(reset_state="INITIAL")
|
||||
self.submodules += startup_fsm
|
||||
|
||||
if rx:
|
||||
cdr_stable_timer = WaitTimer(1024)
|
||||
self.submodules += cdr_stable_timer
|
||||
|
||||
Xxphaligndone_r = Signal(reset=1)
|
||||
Xxphaligndone_rising = Signal()
|
||||
self.sync += Xxphaligndone_r.eq(Xxphaligndone)
|
||||
self.comb += Xxphaligndone_rising.eq(Xxphaligndone & ~Xxphaligndone_r)
|
||||
|
||||
startup_fsm.act("INITIAL",
|
||||
startup_timer.wait.eq(1),
|
||||
If(startup_timer.done, NextState("RESET_GTX"))
|
||||
)
|
||||
startup_fsm.act("RESET_GTX",
|
||||
gtXxreset.eq(1),
|
||||
NextState("WAIT_CPLL")
|
||||
)
|
||||
startup_fsm.act("WAIT_CPLL",
|
||||
gtXxreset.eq(1),
|
||||
If(cplllock, NextState("RELEASE_RESET"))
|
||||
)
|
||||
# Release GTX reset and wait for GTX resetdone
|
||||
# (from UG476, GTX is reset on falling edge
|
||||
# of gttxreset)
|
||||
if rx:
|
||||
startup_fsm.act("RELEASE_RESET",
|
||||
Xxuserrdy.eq(1),
|
||||
cdr_stable_timer.wait.eq(1),
|
||||
If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN"))
|
||||
)
|
||||
else:
|
||||
startup_fsm.act("RELEASE_RESET",
|
||||
Xxuserrdy.eq(1),
|
||||
If(Xxresetdone, NextState("ALIGN"))
|
||||
)
|
||||
# Start delay alignment (pulse)
|
||||
startup_fsm.act("ALIGN",
|
||||
Xxuserrdy.eq(1),
|
||||
Xxdlysreset.eq(1),
|
||||
NextState("WAIT_ALIGN")
|
||||
)
|
||||
# Wait for delay alignment
|
||||
startup_fsm.act("WAIT_ALIGN",
|
||||
Xxuserrdy.eq(1),
|
||||
If(Xxdlysresetdone, NextState("WAIT_FIRST_ALIGN_DONE"))
|
||||
)
|
||||
# Wait 2 rising edges of rxphaligndone
|
||||
# (from UG476 in buffer bypass config)
|
||||
startup_fsm.act("WAIT_FIRST_ALIGN_DONE",
|
||||
Xxuserrdy.eq(1),
|
||||
If(Xxphaligndone_rising, NextState("WAIT_SECOND_ALIGN_DONE"))
|
||||
)
|
||||
startup_fsm.act("WAIT_SECOND_ALIGN_DONE",
|
||||
Xxuserrdy.eq(1),
|
||||
If(Xxphaligndone_rising, NextState("READY"))
|
||||
)
|
||||
startup_fsm.act("READY",
|
||||
Xxuserrdy.eq(1),
|
||||
self.done.eq(1),
|
||||
If(self.restart, NextState("RESET_GTX"))
|
||||
)
|
||||
|
||||
|
||||
# Changes the phase of the transceiver RX clock to align the comma to
|
||||
# the MSBs of RXDATA, fixing the latency.
|
||||
#
|
||||
# This is implemented by repeatedly resetting the transceiver until it
|
||||
# gives out the correct phase. Each reset gives a random phase.
|
||||
#
|
||||
# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA
|
||||
# would achieve this faster and in a cleaner way. But:
|
||||
# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead
|
||||
# of 1 UI at every pulse. It is unclear what the latency becomes.
|
||||
# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed.
|
||||
# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless
|
||||
# transceiver "feature".
|
||||
class BruteforceClockAligner(Module):
|
||||
def __init__(self, comma, sys_clk_freq, check_period=6e-3):
|
||||
self.rxdata = Signal(20)
|
||||
self.restart = Signal()
|
||||
|
||||
check_max_val = ceil(check_period*sys_clk_freq)
|
||||
check_counter = Signal(max=check_max_val+1)
|
||||
check = Signal()
|
||||
self.sync += [
|
||||
check.eq(0),
|
||||
If(check_counter == 0,
|
||||
check.eq(1),
|
||||
check_counter.eq(check_max_val)
|
||||
).Else(
|
||||
check_counter.eq(check_counter-1)
|
||||
)
|
||||
]
|
||||
|
||||
comma_n = ~comma & 0b1111111111
|
||||
comma_seen_rxclk = Signal()
|
||||
comma_seen = Signal()
|
||||
self.specials += MultiReg(comma_seen_rxclk, comma_seen)
|
||||
comma_seen_reset = PulseSynchronizer("sys", "rx")
|
||||
self.submodules += comma_seen_reset
|
||||
self.sync.rx += \
|
||||
If(comma_seen_reset.o,
|
||||
comma_seen_rxclk.eq(0)
|
||||
).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n),
|
||||
comma_seen_rxclk.eq(1)
|
||||
)
|
||||
|
||||
self.comb += \
|
||||
If(check,
|
||||
If(~comma_seen, self.restart.eq(1)),
|
||||
comma_seen_reset.i.eq(1)
|
||||
)
|
Loading…
Reference in New Issue