drtio: GTX WIP

This commit is contained in:
Sebastien Bourdeauducq 2016-10-14 00:36:13 +08:00
parent c548a65ec3
commit 08e4aa3e3f
3 changed files with 360 additions and 0 deletions

View File

@ -0,0 +1,192 @@
from migen import *
from migen.genlib.resetsync import AsyncResetSynchronizer
from misoc.cores.code_8b10b import Encoder, Decoder
from artiq.gateware.drtio.transceiver.gtx_7series_init import *
class GTX_1000BASE_BX10(Module):
def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq):
self.submodules.encoder = ClockDomainsRenamer("rtio")(
Encoder(2, True))
self.decoders = [ClockDomainsRenamer("rtio_rx")(
Decoder(True)) for _ in range(2)]
self.submodules += self.decoders
self.rx_reset = Signal()
self.rx_ready = Signal()
# # #
refclk = Signal()
self.specials += Instance("IBUFDS_GTE2",
i_CEB=0,
i_I=clock_pads.p,
i_IB=clock_pads.n,
o_O=refclk
)
cplllock = Signal()
# TX generates RTIO clock, init must be in system domain
tx_init = GTXInit(sys_clk_freq, False)
# RX receives restart commands from RTIO domain
rx_init = ClockDomainsRenamer("rtio")(
GTXInit(62.5e6, True))
self.submodules += tx_init, rx_init
self.comb += tx_init.cplllock.eq(cplllock), \
rx_init.cplllock.eq(cplllock), \
rx_init.restart.eq(self.rx_reset)
txoutclk = Signal()
txdata = Signal(20)
rxoutclk = Signal()
rxdata = Signal(20)
self.specials += \
Instance("GTXE2_CHANNEL",
# PMA Attributes
p_PMA_RSV=0x00018480,
p_PMA_RSV2=0x2050,
p_PMA_RSV3=0,
p_PMA_RSV4=0,
p_RX_BIAS_CFG=0b100,
p_RX_CM_TRIM=0b010,
p_RX_OS_CFG=0b10000000,
p_RX_CLK25_DIV=5,
p_TX_CLK25_DIV=5,
# Power-Down Attributes
p_PD_TRANS_TIME_FROM_P2=0x3c,
p_PD_TRANS_TIME_NONE_P2=0x3c,
p_PD_TRANS_TIME_TO_P2=0x64,
# CPLL
p_CPLL_CFG=0xBC07DC,
p_CPLL_FBDIV=4,
p_CPLL_FBDIV_45=5,
p_CPLL_REFCLK_DIV=1,
p_RXOUT_DIV=2,
p_TXOUT_DIV=2,
o_CPLLLOCK=cplllock,
i_CPLLLOCKEN=1,
i_CPLLREFCLKSEL=0b001,
i_TSTIN=2**20-1,
i_GTREFCLK0=refclk,
# TX clock
p_TXBUF_EN="FALSE",
p_TX_XCLK_SEL="TXUSR",
o_TXOUTCLK=txoutclk,
i_TXSYSCLKSEL=0b00,
i_TXOUTCLKSEL=0b11,
# TX Startup/Reset
i_GTTXRESET=tx_init.gtXxreset,
o_TXRESETDONE=tx_init.Xxresetdone,
i_TXDLYSRESET=tx_init.Xxdlysreset,
o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone,
o_TXPHALIGNDONE=tx_init.Xxphaligndone,
i_TXUSERRDY=tx_init.Xxuserrdy,
# TX data
p_TX_DATA_WIDTH=20,
p_TX_INT_DATAWIDTH=0,
i_TXCHARDISPMODE=Cat(txdata[9], txdata[19]),
i_TXCHARDISPVAL=Cat(txdata[8], txdata[18]),
i_TXDATA=Cat(txdata[:8], txdata[10:18]),
i_TXUSRCLK=ClockSignal("rtio"),
i_TXUSRCLK2=ClockSignal("rtio"),
# TX electrical
i_TXBUFDIFFCTRL=0b100,
i_TXDIFFCTRL=0b1000,
# RX Startup/Reset
i_GTRXRESET=rx_init.gtXxreset,
o_RXRESETDONE=rx_init.Xxresetdone,
i_RXDLYSRESET=rx_init.Xxdlysreset,
o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone,
o_RXPHALIGNDONE=rx_init.Xxphaligndone,
i_RXUSERRDY=rx_init.Xxuserrdy,
# RX AFE
p_RX_DFE_XYD_CFG=0,
i_RXDFEXYDEN=1,
i_RXDFEXYDHOLD=0,
i_RXDFEXYDOVRDEN=0,
i_RXLPMEN=0,
# RX clock
p_RXBUF_EN="FALSE",
p_RX_XCLK_SEL="RXUSR",
i_RXDDIEN=1,
i_RXSYSCLKSEL=0b00,
i_RXOUTCLKSEL=0b010,
o_RXOUTCLK=rxoutclk,
i_RXUSRCLK=ClockSignal("rtio_rx"),
i_RXUSRCLK2=ClockSignal("rtio_rx"),
p_RXCDR_CFG=0x03000023FF10100020,
# RX Clock Correction Attributes
p_CLK_CORRECT_USE="FALSE",
p_CLK_COR_SEQ_1_1=0b0100000000,
p_CLK_COR_SEQ_2_1=0b0100000000,
p_CLK_COR_SEQ_1_ENABLE=0b1111,
p_CLK_COR_SEQ_2_ENABLE=0b1111,
# RX data
p_RX_DATA_WIDTH=20,
p_RX_INT_DATAWIDTH=0,
o_RXDISPERR=Cat(rxdata[9], rxdata[19]),
o_RXCHARISK=Cat(rxdata[8], rxdata[18]),
o_RXDATA=Cat(rxdata[:8], rxdata[10:18]),
# Pads
i_GTXRXP=rx_pads.p,
i_GTXRXN=rx_pads.n,
o_GTXTXP=tx_pads.p,
o_GTXTXN=tx_pads.n,
)
self.clock_domains.cd_rtio = ClockDomain()
self.specials += [
Instance("BUFG", i_I=txoutclk, o_O=self.cd_rtio.clk),
AsyncResetSynchronizer(self.cd_rtio, ~tx_init.done)
]
self.clock_domains.cd_rtio_rx = ClockDomain()
self.specials += [
Instance("BUFG", i_I=rxoutclk, o_O=self.cd_rtio_rx.clk),
AsyncResetSynchronizer(self.cd_rtio_rx, ~rx_init.done)
]
self.comb += [
txdata.eq(Cat(self.encoder.output[0], self.encoder.output[1])),
self.decoders[0].input.eq(rxdata[:10]),
self.decoders[1].input.eq(rxdata[10:])
]
# TODO: clock aligner, reset/ready
class RXSynchronizer(Module):
"""Delays the data received in the rtio_rx by a configurable amount
so that it meets s/h in the rtio domain, and recapture it in the rtio
domain. This has fixed latency.
Since Xilinx doesn't provide decent on-chip delay lines, we implement the
delay with MMCM that provides a clock and a finely configurable phase, used
to resample the data.
The phase has to be determined either empirically or by making sense of the
Xilinx scriptures (when existent) and should be constant for a given design
placement.
"""
def __init__(self):
self.cd_rtio_delayed = ClockDomain()
# TODO
def sync(self, signal):
delayed = Signal.like(signal, related=signal)
synchronized = Signal.like(signal, related=signal)
self.sync.rtio_delayed += delayed.eq(signal)
self.sync.rtio += synchronized.eq(delayed)
return synchronized

View File

@ -0,0 +1,168 @@
from math import ceil
from migen import *
from migen.genlib.cdc import MultiReg, PulseSynchronizer
from migen.genlib.misc import WaitTimer
from migen.genlib.fsm import FSM
class GTXInit(Module):
# Based on LiteSATA by Enjoy-Digital
def __init__(self, sys_clk_freq, rx):
self.done = Signal()
self.restart = Signal()
# GTX signals
self.cplllock = Signal()
self.gtXxreset = Signal()
self.Xxresetdone = Signal()
self.Xxdlysreset = Signal()
self.Xxdlysresetdone = Signal()
self.Xxphaligndone = Signal()
self.Xxuserrdy = Signal()
# # #
# Double-latch transceiver asynch outputs
cplllock = Signal()
Xxresetdone = Signal()
Xxdlysresetdone = Signal()
Xxphaligndone = Signal()
self.specials += [
MultiReg(self.cplllock, cplllock),
MultiReg(self.Xxresetdone, Xxresetdone),
MultiReg(self.Xxdlysresetdone, Xxdlysresetdone),
MultiReg(self.Xxphaligndone, Xxphaligndone),
]
# Deglitch FSM outputs driving transceiver asynch inputs
gtXxreset = Signal()
Xxdlysreset = Signal()
Xxuserrdy = Signal()
self.sync += [
self.gtXxreset.eq(gtXxreset),
self.Xxdlysreset.eq(Xxdlysreset),
self.Xxuserrdy.eq(Xxuserrdy)
]
# After configuration, transceiver resets have to stay low for
# at least 500ns (see AR43482)
startup_cycles = ceil(500*sys_clk_freq/1000000000)
startup_timer = WaitTimer(startup_cycles)
self.submodules += startup_timer
startup_fsm = FSM(reset_state="INITIAL")
self.submodules += startup_fsm
if rx:
cdr_stable_timer = WaitTimer(1024)
self.submodules += cdr_stable_timer
Xxphaligndone_r = Signal(reset=1)
Xxphaligndone_rising = Signal()
self.sync += Xxphaligndone_r.eq(Xxphaligndone)
self.comb += Xxphaligndone_rising.eq(Xxphaligndone & ~Xxphaligndone_r)
startup_fsm.act("INITIAL",
startup_timer.wait.eq(1),
If(startup_timer.done, NextState("RESET_GTX"))
)
startup_fsm.act("RESET_GTX",
gtXxreset.eq(1),
NextState("WAIT_CPLL")
)
startup_fsm.act("WAIT_CPLL",
gtXxreset.eq(1),
If(cplllock, NextState("RELEASE_RESET"))
)
# Release GTX reset and wait for GTX resetdone
# (from UG476, GTX is reset on falling edge
# of gttxreset)
if rx:
startup_fsm.act("RELEASE_RESET",
Xxuserrdy.eq(1),
cdr_stable_timer.wait.eq(1),
If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN"))
)
else:
startup_fsm.act("RELEASE_RESET",
Xxuserrdy.eq(1),
If(Xxresetdone, NextState("ALIGN"))
)
# Start delay alignment (pulse)
startup_fsm.act("ALIGN",
Xxuserrdy.eq(1),
Xxdlysreset.eq(1),
NextState("WAIT_ALIGN")
)
# Wait for delay alignment
startup_fsm.act("WAIT_ALIGN",
Xxuserrdy.eq(1),
If(Xxdlysresetdone, NextState("WAIT_FIRST_ALIGN_DONE"))
)
# Wait 2 rising edges of rxphaligndone
# (from UG476 in buffer bypass config)
startup_fsm.act("WAIT_FIRST_ALIGN_DONE",
Xxuserrdy.eq(1),
If(Xxphaligndone_rising, NextState("WAIT_SECOND_ALIGN_DONE"))
)
startup_fsm.act("WAIT_SECOND_ALIGN_DONE",
Xxuserrdy.eq(1),
If(Xxphaligndone_rising, NextState("READY"))
)
startup_fsm.act("READY",
Xxuserrdy.eq(1),
self.done.eq(1),
If(self.restart, NextState("RESET_GTX"))
)
# Changes the phase of the transceiver RX clock to align the comma to
# the MSBs of RXDATA, fixing the latency.
#
# This is implemented by repeatedly resetting the transceiver until it
# gives out the correct phase. Each reset gives a random phase.
#
# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA
# would achieve this faster and in a cleaner way. But:
# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead
# of 1 UI at every pulse. It is unclear what the latency becomes.
# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed.
# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless
# transceiver "feature".
class BruteforceClockAligner(Module):
def __init__(self, comma, sys_clk_freq, check_period=6e-3):
self.rxdata = Signal(20)
self.restart = Signal()
check_max_val = ceil(check_period*sys_clk_freq)
check_counter = Signal(max=check_max_val+1)
check = Signal()
self.sync += [
check.eq(0),
If(check_counter == 0,
check.eq(1),
check_counter.eq(check_max_val)
).Else(
check_counter.eq(check_counter-1)
)
]
comma_n = ~comma & 0b1111111111
comma_seen_rxclk = Signal()
comma_seen = Signal()
self.specials += MultiReg(comma_seen_rxclk, comma_seen)
comma_seen_reset = PulseSynchronizer("sys", "rx")
self.submodules += comma_seen_reset
self.sync.rx += \
If(comma_seen_reset.o,
comma_seen_rxclk.eq(0)
).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n),
comma_seen_rxclk.eq(1)
)
self.comb += \
If(check,
If(~comma_seen, self.restart.eq(1)),
comma_seen_reset.i.eq(1)
)