From 08e4aa3e3f04f674c3acfe74bff1c5d0c9f32395 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Fri, 14 Oct 2016 00:36:13 +0800 Subject: [PATCH] drtio: GTX WIP --- artiq/gateware/drtio/transceiver/__init__.py | 0 .../gateware/drtio/transceiver/gtx_7series.py | 192 ++++++++++++++++++ .../drtio/transceiver/gtx_7series_init.py | 168 +++++++++++++++ 3 files changed, 360 insertions(+) create mode 100644 artiq/gateware/drtio/transceiver/__init__.py create mode 100644 artiq/gateware/drtio/transceiver/gtx_7series.py create mode 100644 artiq/gateware/drtio/transceiver/gtx_7series_init.py diff --git a/artiq/gateware/drtio/transceiver/__init__.py b/artiq/gateware/drtio/transceiver/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/artiq/gateware/drtio/transceiver/gtx_7series.py b/artiq/gateware/drtio/transceiver/gtx_7series.py new file mode 100644 index 000000000..d90aa0bcc --- /dev/null +++ b/artiq/gateware/drtio/transceiver/gtx_7series.py @@ -0,0 +1,192 @@ +from migen import * +from migen.genlib.resetsync import AsyncResetSynchronizer + +from misoc.cores.code_8b10b import Encoder, Decoder + +from artiq.gateware.drtio.transceiver.gtx_7series_init import * + + +class GTX_1000BASE_BX10(Module): + def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq): + self.submodules.encoder = ClockDomainsRenamer("rtio")( + Encoder(2, True)) + self.decoders = [ClockDomainsRenamer("rtio_rx")( + Decoder(True)) for _ in range(2)] + self.submodules += self.decoders + + self.rx_reset = Signal() + self.rx_ready = Signal() + + # # # + + refclk = Signal() + self.specials += Instance("IBUFDS_GTE2", + i_CEB=0, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_O=refclk + ) + + cplllock = Signal() + # TX generates RTIO clock, init must be in system domain + tx_init = GTXInit(sys_clk_freq, False) + # RX receives restart commands from RTIO domain + rx_init = ClockDomainsRenamer("rtio")( + GTXInit(62.5e6, True)) + self.submodules += tx_init, rx_init + self.comb += tx_init.cplllock.eq(cplllock), \ + rx_init.cplllock.eq(cplllock), \ + rx_init.restart.eq(self.rx_reset) + + txoutclk = Signal() + txdata = Signal(20) + rxoutclk = Signal() + rxdata = Signal(20) + self.specials += \ + Instance("GTXE2_CHANNEL", + # PMA Attributes + p_PMA_RSV=0x00018480, + p_PMA_RSV2=0x2050, + p_PMA_RSV3=0, + p_PMA_RSV4=0, + p_RX_BIAS_CFG=0b100, + p_RX_CM_TRIM=0b010, + p_RX_OS_CFG=0b10000000, + p_RX_CLK25_DIV=5, + p_TX_CLK25_DIV=5, + + # Power-Down Attributes + p_PD_TRANS_TIME_FROM_P2=0x3c, + p_PD_TRANS_TIME_NONE_P2=0x3c, + p_PD_TRANS_TIME_TO_P2=0x64, + + # CPLL + p_CPLL_CFG=0xBC07DC, + p_CPLL_FBDIV=4, + p_CPLL_FBDIV_45=5, + p_CPLL_REFCLK_DIV=1, + p_RXOUT_DIV=2, + p_TXOUT_DIV=2, + o_CPLLLOCK=cplllock, + i_CPLLLOCKEN=1, + i_CPLLREFCLKSEL=0b001, + i_TSTIN=2**20-1, + i_GTREFCLK0=refclk, + + # TX clock + p_TXBUF_EN="FALSE", + p_TX_XCLK_SEL="TXUSR", + o_TXOUTCLK=txoutclk, + i_TXSYSCLKSEL=0b00, + i_TXOUTCLKSEL=0b11, + + # TX Startup/Reset + i_GTTXRESET=tx_init.gtXxreset, + o_TXRESETDONE=tx_init.Xxresetdone, + i_TXDLYSRESET=tx_init.Xxdlysreset, + o_TXDLYSRESETDONE=tx_init.Xxdlysresetdone, + o_TXPHALIGNDONE=tx_init.Xxphaligndone, + i_TXUSERRDY=tx_init.Xxuserrdy, + + # TX data + p_TX_DATA_WIDTH=20, + p_TX_INT_DATAWIDTH=0, + i_TXCHARDISPMODE=Cat(txdata[9], txdata[19]), + i_TXCHARDISPVAL=Cat(txdata[8], txdata[18]), + i_TXDATA=Cat(txdata[:8], txdata[10:18]), + i_TXUSRCLK=ClockSignal("rtio"), + i_TXUSRCLK2=ClockSignal("rtio"), + + # TX electrical + i_TXBUFDIFFCTRL=0b100, + i_TXDIFFCTRL=0b1000, + + # RX Startup/Reset + i_GTRXRESET=rx_init.gtXxreset, + o_RXRESETDONE=rx_init.Xxresetdone, + i_RXDLYSRESET=rx_init.Xxdlysreset, + o_RXDLYSRESETDONE=rx_init.Xxdlysresetdone, + o_RXPHALIGNDONE=rx_init.Xxphaligndone, + i_RXUSERRDY=rx_init.Xxuserrdy, + + # RX AFE + p_RX_DFE_XYD_CFG=0, + i_RXDFEXYDEN=1, + i_RXDFEXYDHOLD=0, + i_RXDFEXYDOVRDEN=0, + i_RXLPMEN=0, + + # RX clock + p_RXBUF_EN="FALSE", + p_RX_XCLK_SEL="RXUSR", + i_RXDDIEN=1, + i_RXSYSCLKSEL=0b00, + i_RXOUTCLKSEL=0b010, + o_RXOUTCLK=rxoutclk, + i_RXUSRCLK=ClockSignal("rtio_rx"), + i_RXUSRCLK2=ClockSignal("rtio_rx"), + p_RXCDR_CFG=0x03000023FF10100020, + + # RX Clock Correction Attributes + p_CLK_CORRECT_USE="FALSE", + p_CLK_COR_SEQ_1_1=0b0100000000, + p_CLK_COR_SEQ_2_1=0b0100000000, + p_CLK_COR_SEQ_1_ENABLE=0b1111, + p_CLK_COR_SEQ_2_ENABLE=0b1111, + + # RX data + p_RX_DATA_WIDTH=20, + p_RX_INT_DATAWIDTH=0, + o_RXDISPERR=Cat(rxdata[9], rxdata[19]), + o_RXCHARISK=Cat(rxdata[8], rxdata[18]), + o_RXDATA=Cat(rxdata[:8], rxdata[10:18]), + + # Pads + i_GTXRXP=rx_pads.p, + i_GTXRXN=rx_pads.n, + o_GTXTXP=tx_pads.p, + o_GTXTXN=tx_pads.n, + ) + + self.clock_domains.cd_rtio = ClockDomain() + self.specials += [ + Instance("BUFG", i_I=txoutclk, o_O=self.cd_rtio.clk), + AsyncResetSynchronizer(self.cd_rtio, ~tx_init.done) + ] + self.clock_domains.cd_rtio_rx = ClockDomain() + self.specials += [ + Instance("BUFG", i_I=rxoutclk, o_O=self.cd_rtio_rx.clk), + AsyncResetSynchronizer(self.cd_rtio_rx, ~rx_init.done) + ] + + self.comb += [ + txdata.eq(Cat(self.encoder.output[0], self.encoder.output[1])), + self.decoders[0].input.eq(rxdata[:10]), + self.decoders[1].input.eq(rxdata[10:]) + ] + # TODO: clock aligner, reset/ready + + +class RXSynchronizer(Module): + """Delays the data received in the rtio_rx by a configurable amount + so that it meets s/h in the rtio domain, and recapture it in the rtio + domain. This has fixed latency. + + Since Xilinx doesn't provide decent on-chip delay lines, we implement the + delay with MMCM that provides a clock and a finely configurable phase, used + to resample the data. + + The phase has to be determined either empirically or by making sense of the + Xilinx scriptures (when existent) and should be constant for a given design + placement. + """ + def __init__(self): + self.cd_rtio_delayed = ClockDomain() + # TODO + + def sync(self, signal): + delayed = Signal.like(signal, related=signal) + synchronized = Signal.like(signal, related=signal) + self.sync.rtio_delayed += delayed.eq(signal) + self.sync.rtio += synchronized.eq(delayed) + return synchronized diff --git a/artiq/gateware/drtio/transceiver/gtx_7series_init.py b/artiq/gateware/drtio/transceiver/gtx_7series_init.py new file mode 100644 index 000000000..fa01c88f3 --- /dev/null +++ b/artiq/gateware/drtio/transceiver/gtx_7series_init.py @@ -0,0 +1,168 @@ +from math import ceil + +from migen import * +from migen.genlib.cdc import MultiReg, PulseSynchronizer +from migen.genlib.misc import WaitTimer +from migen.genlib.fsm import FSM + + +class GTXInit(Module): + # Based on LiteSATA by Enjoy-Digital + def __init__(self, sys_clk_freq, rx): + self.done = Signal() + self.restart = Signal() + + # GTX signals + self.cplllock = Signal() + self.gtXxreset = Signal() + self.Xxresetdone = Signal() + self.Xxdlysreset = Signal() + self.Xxdlysresetdone = Signal() + self.Xxphaligndone = Signal() + self.Xxuserrdy = Signal() + + # # # + + # Double-latch transceiver asynch outputs + cplllock = Signal() + Xxresetdone = Signal() + Xxdlysresetdone = Signal() + Xxphaligndone = Signal() + self.specials += [ + MultiReg(self.cplllock, cplllock), + MultiReg(self.Xxresetdone, Xxresetdone), + MultiReg(self.Xxdlysresetdone, Xxdlysresetdone), + MultiReg(self.Xxphaligndone, Xxphaligndone), + ] + + # Deglitch FSM outputs driving transceiver asynch inputs + gtXxreset = Signal() + Xxdlysreset = Signal() + Xxuserrdy = Signal() + self.sync += [ + self.gtXxreset.eq(gtXxreset), + self.Xxdlysreset.eq(Xxdlysreset), + self.Xxuserrdy.eq(Xxuserrdy) + ] + + # After configuration, transceiver resets have to stay low for + # at least 500ns (see AR43482) + startup_cycles = ceil(500*sys_clk_freq/1000000000) + startup_timer = WaitTimer(startup_cycles) + self.submodules += startup_timer + + startup_fsm = FSM(reset_state="INITIAL") + self.submodules += startup_fsm + + if rx: + cdr_stable_timer = WaitTimer(1024) + self.submodules += cdr_stable_timer + + Xxphaligndone_r = Signal(reset=1) + Xxphaligndone_rising = Signal() + self.sync += Xxphaligndone_r.eq(Xxphaligndone) + self.comb += Xxphaligndone_rising.eq(Xxphaligndone & ~Xxphaligndone_r) + + startup_fsm.act("INITIAL", + startup_timer.wait.eq(1), + If(startup_timer.done, NextState("RESET_GTX")) + ) + startup_fsm.act("RESET_GTX", + gtXxreset.eq(1), + NextState("WAIT_CPLL") + ) + startup_fsm.act("WAIT_CPLL", + gtXxreset.eq(1), + If(cplllock, NextState("RELEASE_RESET")) + ) + # Release GTX reset and wait for GTX resetdone + # (from UG476, GTX is reset on falling edge + # of gttxreset) + if rx: + startup_fsm.act("RELEASE_RESET", + Xxuserrdy.eq(1), + cdr_stable_timer.wait.eq(1), + If(Xxresetdone & cdr_stable_timer.done, NextState("ALIGN")) + ) + else: + startup_fsm.act("RELEASE_RESET", + Xxuserrdy.eq(1), + If(Xxresetdone, NextState("ALIGN")) + ) + # Start delay alignment (pulse) + startup_fsm.act("ALIGN", + Xxuserrdy.eq(1), + Xxdlysreset.eq(1), + NextState("WAIT_ALIGN") + ) + # Wait for delay alignment + startup_fsm.act("WAIT_ALIGN", + Xxuserrdy.eq(1), + If(Xxdlysresetdone, NextState("WAIT_FIRST_ALIGN_DONE")) + ) + # Wait 2 rising edges of rxphaligndone + # (from UG476 in buffer bypass config) + startup_fsm.act("WAIT_FIRST_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("WAIT_SECOND_ALIGN_DONE")) + ) + startup_fsm.act("WAIT_SECOND_ALIGN_DONE", + Xxuserrdy.eq(1), + If(Xxphaligndone_rising, NextState("READY")) + ) + startup_fsm.act("READY", + Xxuserrdy.eq(1), + self.done.eq(1), + If(self.restart, NextState("RESET_GTX")) + ) + + +# Changes the phase of the transceiver RX clock to align the comma to +# the MSBs of RXDATA, fixing the latency. +# +# This is implemented by repeatedly resetting the transceiver until it +# gives out the correct phase. Each reset gives a random phase. +# +# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA +# would achieve this faster and in a cleaner way. But: +# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead +# of 1 UI at every pulse. It is unclear what the latency becomes. +# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. +# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless +# transceiver "feature". +class BruteforceClockAligner(Module): + def __init__(self, comma, sys_clk_freq, check_period=6e-3): + self.rxdata = Signal(20) + self.restart = Signal() + + check_max_val = ceil(check_period*sys_clk_freq) + check_counter = Signal(max=check_max_val+1) + check = Signal() + self.sync += [ + check.eq(0), + If(check_counter == 0, + check.eq(1), + check_counter.eq(check_max_val) + ).Else( + check_counter.eq(check_counter-1) + ) + ] + + comma_n = ~comma & 0b1111111111 + comma_seen_rxclk = Signal() + comma_seen = Signal() + self.specials += MultiReg(comma_seen_rxclk, comma_seen) + comma_seen_reset = PulseSynchronizer("sys", "rx") + self.submodules += comma_seen_reset + self.sync.rx += \ + If(comma_seen_reset.o, + comma_seen_rxclk.eq(0) + ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), + comma_seen_rxclk.eq(1) + ) + + self.comb += \ + If(check, + If(~comma_seen, self.restart.eq(1)), + comma_seen_reset.i.eq(1) + )