From c57b66497c7c090f9551123c7082c6244e04ca39 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 23 Dec 2017 01:19:44 +0800 Subject: [PATCH] drtio: refactor/simplify GTH, use migen --- .../drtio/transceiver/clock_aligner.py | 113 -------- .../drtio/transceiver/gth_ultrascale.py | 265 +++--------------- .../drtio/transceiver/gth_ultrascale_init.py | 119 +++++++- 3 files changed, 149 insertions(+), 348 deletions(-) delete mode 100644 artiq/gateware/drtio/transceiver/clock_aligner.py diff --git a/artiq/gateware/drtio/transceiver/clock_aligner.py b/artiq/gateware/drtio/transceiver/clock_aligner.py deleted file mode 100644 index 050720998..000000000 --- a/artiq/gateware/drtio/transceiver/clock_aligner.py +++ /dev/null @@ -1,113 +0,0 @@ -from math import ceil -from functools import reduce -from operator import add - -from litex.gen import * -from litex.gen.genlib.cdc import MultiReg, PulseSynchronizer - - -# Changes the phase of the transceiver RX clock to align the comma to -# the LSBs of RXDATA, fixing the latency. -# -# This is implemented by repeatedly resetting the transceiver until it -# gives out the correct phase. Each reset gives a random phase. -# -# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA -# would achieve this faster and in a cleaner way. But: -# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead -# of 1 UI at every pulse. It is unclear what the latency becomes. -# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. -# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless -# transceiver "feature". -# -# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped -# compared to the usual 8b10b binary representation. -class BruteforceClockAligner(Module): - def __init__(self, comma, tx_clk_freq, check_period=6e-3): - self.rxdata = Signal(20) - self.restart = Signal() - - self.ready = Signal() - - check_max_val = ceil(check_period*tx_clk_freq) - check_counter = Signal(max=check_max_val+1) - check = Signal() - reset_check_counter = Signal() - self.sync.rtio_tx += [ - check.eq(0), - If(reset_check_counter, - check_counter.eq(check_max_val) - ).Else( - If(check_counter == 0, - check.eq(1), - check_counter.eq(check_max_val) - ).Else( - check_counter.eq(check_counter-1) - ) - ) - ] - - checks_reset = PulseSynchronizer("rtio_tx", "rtio_rx") - self.submodules += checks_reset - - comma_n = ~comma & 0b1111111111 - comma_seen_rxclk = Signal() - comma_seen = Signal() - comma_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(comma_seen_rxclk, comma_seen) - self.sync.rtio_rx += \ - If(checks_reset.o, - comma_seen_rxclk.eq(0) - ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), - comma_seen_rxclk.eq(1) - ) - - error_seen_rxclk = Signal() - error_seen = Signal() - error_seen_rxclk.attr.add("no_retiming") - self.specials += MultiReg(error_seen_rxclk, error_seen) - rx1cnt = Signal(max=11) - self.sync.rtio_rx += [ - rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), - If(checks_reset.o, - error_seen_rxclk.eq(0) - ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), - error_seen_rxclk.eq(1) - ) - ] - - fsm = ClockDomainsRenamer("rtio_tx")(FSM(reset_state="WAIT_COMMA")) - self.submodules += fsm - - fsm.act("WAIT_COMMA", - If(check, - # Errors are still OK at this stage, as the transceiver - # has just been reset and may output garbage data. - If(comma_seen, - NextState("WAIT_NOERROR") - ).Else( - self.restart.eq(1) - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("WAIT_NOERROR", - If(check, - If(comma_seen & ~error_seen, - NextState("READY") - ).Else( - self.restart.eq(1), - NextState("WAIT_COMMA") - ), - checks_reset.i.eq(1) - ) - ) - fsm.act("READY", - reset_check_counter.eq(1), - self.ready.eq(1), - If(error_seen, - checks_reset.i.eq(1), - self.restart.eq(1), - NextState("WAIT_COMMA") - ) - ) diff --git a/artiq/gateware/drtio/transceiver/gth_ultrascale.py b/artiq/gateware/drtio/transceiver/gth_ultrascale.py index 616a30a72..d847f928f 100644 --- a/artiq/gateware/drtio/transceiver/gth_ultrascale.py +++ b/artiq/gateware/drtio/transceiver/gth_ultrascale.py @@ -1,224 +1,26 @@ -from litex.gen import * -from litex.gen.genlib.resetsync import AsyncResetSynchronizer -from litex.gen.genlib.cdc import MultiReg +from migen import * +from migen.genlib.resetsync import AsyncResetSynchronizer +from migen.genlib.cdc import MultiReg -from litex.soc.interconnect.csr import * -from litex.soc.cores.code_8b10b import Encoder, Decoder +from misoc.interconnect.csr import * +from misoc.cores.code_8b10b import Encoder, Decoder -from drtio.common import TransceiverInterface, ChannelInterface -from drtio.gth_ultrascale_init import GTHInit -from drtio.clock_aligner import BruteforceClockAligner - - -class GTHChannelPLL(Module): - def __init__(self, refclk, refclk_freq, linerate): - self.refclk = refclk - self.reset = Signal() - self.lock = Signal() - self.config = self.compute_config(refclk_freq, linerate) - - @staticmethod - def compute_config(refclk_freq, linerate): - for n1 in 4, 5: - for n2 in 1, 2, 3, 4, 5: - for m in 1, 2: - vco_freq = refclk_freq*(n1*n2)/m - if 2.0e9 <= vco_freq <= 6.25e9: - for d in 1, 2, 4, 8, 16: - current_linerate = vco_freq*2/d - if current_linerate == linerate: - return {"n1": n1, "n2": n2, "m": m, "d": d, - "vco_freq": vco_freq, - "clkin": refclk_freq, - "linerate": linerate} - msg = "No config found for {:3.2f} MHz refclk / {:3.2f} Gbps linerate." - raise ValueError(msg.format(refclk_freq/1e6, linerate/1e9)) - - def __repr__(self): - r = """ -GTHChannelPLL -============== - overview: - --------- - +--------------------------------------------------+ - | | - | +-----+ +---------------------------+ +-----+ | - | | | | Phase Frequency Detector | | | | -CLKIN +----> /M +--> Charge Pump +-> VCO +---> CLKOUT - | | | | Loop Filter | | | | - | +-----+ +---------------------------+ +--+--+ | - | ^ | | - | | +-------+ +-------+ | | - | +----+ /N2 <----+ /N1 <----+ | - | +-------+ +-------+ | - +--------------------------------------------------+ - +-------+ - CLKOUT +-> 2/D +-> LINERATE - +-------+ - config: - ------- - CLKIN = {clkin}MHz - CLKOUT = CLKIN x (N1 x N2) / M = {clkin}MHz x ({n1} x {n2}) / {m} - = {vco_freq}GHz - LINERATE = CLKOUT x 2 / D = {vco_freq}GHz x 2 / {d} - = {linerate}GHz -""".format(clkin=self.config["clkin"]/1e6, - n1=self.config["n1"], - n2=self.config["n2"], - m=self.config["m"], - vco_freq=self.config["vco_freq"]/1e9, - d=self.config["d"], - linerate=self.config["linerate"]/1e9) - return r - - -class GTHQuadPLL(Module): - def __init__(self, refclk, refclk_freq, linerate): - self.clk = Signal() - self.refclk = Signal() - self.reset = Signal() - self.lock = Signal() - self.config = self.compute_config(refclk_freq, linerate) - - # # # - - self.specials += \ - Instance("GTHE3_COMMON", - # common - i_GTREFCLK00=refclk, - i_GTREFCLK01=refclk, - i_QPLLRSVD1=0, - i_QPLLRSVD2=0, - i_QPLLRSVD3=0, - i_QPLLRSVD4=0, - i_BGBYPASSB=1, - i_BGMONITORENB=1, - i_BGPDB=1, - i_BGRCALOVRD=0b11111, - i_BGRCALOVRDENB=0b1, - i_RCALENB=1, - - # qpll0 - p_QPLL0_FBDIV=self.config["n"], - p_QPLL0_REFCLK_DIV=self.config["m"], - i_QPLL0CLKRSVD0=0, - i_QPLL0CLKRSVD1=0, - i_QPLL0LOCKDETCLK=ClockSignal(), - i_QPLL0LOCKEN=1, - o_QPLL0LOCK=self.lock if self.config["qpll"] == "qpll0" else - Signal(), - o_QPLL0OUTCLK=self.clk if self.config["qpll"] == "qpll0" else - Signal(), - o_QPLL0OUTREFCLK=self.refclk if self.config["qpll"] == "qpll0" else - Signal(), - i_QPLL0PD=0 if self.config["qpll"] == "qpll0" else 1, - i_QPLL0REFCLKSEL=0b001, - i_QPLL0RESET=self.reset, - - # qpll1 - p_QPLL1_FBDIV=self.config["n"], - p_QPLL1_REFCLK_DIV=self.config["m"], - i_QPLL1CLKRSVD0=0, - i_QPLL1CLKRSVD1=0, - i_QPLL1LOCKDETCLK=ClockSignal(), - i_QPLL1LOCKEN=1, - o_QPLL1LOCK=self.lock if self.config["qpll"] == "qpll1" else - Signal(), - o_QPLL1OUTCLK=self.clk if self.config["qpll"] == "qpll1" else - Signal(), - o_QPLL1OUTREFCLK=self.refclk if self.config["qpll"] == "qpll1" else - Signal(), - i_QPLL1PD=0 if self.config["qpll"] == "qpll1" else 1, - i_QPLL1REFCLKSEL=0b001, - i_QPLL1RESET=self.reset, - ) - - @staticmethod - def compute_config(refclk_freq, linerate): - for n in [16, 20, 32, 40, 60, 64, 66, 75, 80, 84, - 90, 96, 100, 112, 120, 125, 150, 160]: - for m in 1, 2, 3, 4: - vco_freq = refclk_freq*n/m - if 8e9 <= vco_freq <= 13e9: - qpll = "qpll1" - elif 9.8e9 <= vco_freq <= 16.375e9: - qpll = "qpll0" - else: - qpll = None - if qpll is not None: - for d in 1, 2, 4, 8, 16: - current_linerate = (vco_freq/2)*2/d - if current_linerate == linerate: - return {"n": n, "m": m, "d": d, - "vco_freq": vco_freq, - "qpll": qpll, - "clkin": refclk_freq, - "clkout": vco_freq/2, - "linerate": linerate} - msg = "No config found for {:3.2f} MHz refclk / {:3.2f} Gbps linerate." - raise ValueError(msg.format(refclk_freq/1e6, linerate/1e9)) - - def __repr__(self): - r = """ -GTXQuadPLL -=========== - overview: - --------- - +-------------------------------------------------------------++ - | +------------+ | - | +-----+ +---------------------------+ | QPLL0 | +--+ | - | | | | Phase Frequency Detector +-> VCO | | | | -CLKIN +----> /M +--> Charge Pump | +------------+->/2+--> CLKOUT - | | | | Loop Filter +-> QPLL1 | | | | - | +-----+ +---------------------------+ | VCO | +--+ | - | ^ +-----+------+ | - | | +-------+ | | - | +--------+ /N <----------------+ | - | +-------+ | - +--------------------------------------------------------------+ - +-------+ - CLKOUT +-> 2/D +-> LINERATE - +-------+ - config: - ------- - CLKIN = {clkin}MHz - CLKOUT = CLKIN x N / (2 x M) = {clkin}MHz x {n} / (2 x {m}) - = {clkout}GHz - VCO = {vco_freq}GHz ({qpll}) - LINERATE = CLKOUT x 2 / D = {clkout}GHz x 2 / {d} - = {linerate}GHz -""".format(clkin=self.config["clkin"]/1e6, - n=self.config["n"], - m=self.config["m"], - clkout=self.config["clkout"]/1e9, - vco_freq=self.config["vco_freq"]/1e9, - qpll=self.config["qpll"].upper(), - d=self.config["d"], - linerate=self.config["linerate"]/1e9) - return r +from artiq.gateware.drtio.core import TransceiverInterface, ChannelInterface +from artiq.gateware.drtio.transceiver.gth_ultrascale_init import * class GTHSingle(Module): - def __init__(self, pll, tx_pads, rx_pads, sys_clk_freq, dw=20, mode="master"): + def __init__(self, refclk, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq, dw, mode): assert (dw == 20) or (dw == 40) assert mode in ["master", "slave"] - # # # - nwords = dw//10 - - use_cpll = isinstance(pll, GTHChannelPLL) - use_qpll0 = isinstance(pll, GTHQuadPLL) and pll.config["qpll"] == "qpll0" - use_qpll1 = isinstance(pll, GTHQuadPLL) and pll.config["qpll"] == "qpll1" - self.submodules.encoder = encoder = ClockDomainsRenamer("rtio_tx")( Encoder(nwords, True)) self.submodules.decoders = decoders = [ClockDomainsRenamer("rtio_rx")( (Decoder(True))) for _ in range(nwords)] self.rx_ready = Signal() - self.rtio_clk_freq = pll.config["linerate"]/dw - # transceiver direct clock outputs # useful to specify clock constraints in a way palatable to Vivado self.txoutclk = Signal() @@ -230,11 +32,13 @@ class GTHSingle(Module): tx_init = GTHInit(sys_clk_freq, False) # RX receives restart commands from RTIO domain rx_init = ClockDomainsRenamer("rtio_tx")( - GTHInit(self.rtio_clk_freq, True)) + GTHInit(rtio_clk_freq, True)) self.submodules += tx_init, rx_init + + pll_lock = Signal() self.comb += [ - tx_init.plllock.eq(pll.lock), - rx_init.plllock.eq(pll.lock) + tx_init.plllock.eq(pll_lock), + rx_init.plllock.eq(pll_lock) ] txdata = Signal(dw) @@ -263,31 +67,25 @@ class GTHSingle(Module): p_CPLL_CFG1=0xa4ac, p_CPLL_CFG2=0xf007, p_CPLL_CFG3=0x0000, - p_CPLL_FBDIV=1 if use_qpll0 or use_qpll1 else pll.config["n2"], - p_CPLL_FBDIV_45=4 if use_qpll0 or use_qpll1 else pll.config["n1"], - p_CPLL_REFCLK_DIV=1 if use_qpll0 or use_qpll1 else pll.config["m"], - p_RXOUT_DIV=pll.config["d"], - p_TXOUT_DIV=pll.config["d"], + p_CPLL_FBDIV=5, + p_CPLL_FBDIV_45=4, + p_CPLL_REFCLK_DIV=1, + p_RXOUT_DIV=2, + p_TXOUT_DIV=2, i_CPLLRESET=0, - i_CPLLPD=0 if use_qpll0 or use_qpll1 else pll.reset, - o_CPLLLOCK=Signal() if use_qpll0 or use_qpll1 else pll.lock, + i_CPLLPD=0, + o_CPLLLOCK=pll_lock, i_CPLLLOCKEN=1, i_CPLLREFCLKSEL=0b001, i_TSTIN=2**20-1, - i_GTREFCLK0=0 if use_qpll0 or use_qpll1 else pll.refclk, - - # QPLL - i_QPLL0CLK=0 if use_cpll or use_qpll1 else pll.clk, - i_QPLL0REFCLK=0 if use_cpll or use_qpll1 else pll.refclk, - i_QPLL1CLK=0 if use_cpll or use_qpll0 else pll.clk, - i_QPLL1REFCLK=0 if use_cpll or use_qpll0 else pll.refclk, + i_GTREFCLK0=refclk, # TX clock p_TXBUF_EN="FALSE", p_TX_XCLK_SEL="TXUSR", o_TXOUTCLK=self.txoutclk, - i_TXSYSCLKSEL=0b00 if use_cpll else 0b10 if use_qpll0 else 0b11, - i_TXPLLCLKSEL=0b00 if use_cpll else 0b11 if use_qpll0 else 0b10, + i_TXSYSCLKSEL=0b00, + i_TXPLLCLKSEL=0b00, i_TXOUTCLKSEL=0b11, # TX Startup/Reset @@ -376,11 +174,9 @@ class GTHSingle(Module): self.sync += tx_reset_deglitched.eq(~tx_init.done) self.clock_domains.cd_rtio_tx = ClockDomain() if mode == "master": - tx_bufg_div = pll.config["clkin"]/self.rtio_clk_freq - assert tx_bufg_div == int(tx_bufg_div) self.specials += \ Instance("BUFG_GT", i_I=self.txoutclk, o_O=self.cd_rtio_tx.clk, - i_DIV=int(tx_bufg_div)-1) + i_DIV=0) self.specials += AsyncResetSynchronizer(self.cd_rtio_tx, tx_reset_deglitched) # rx clocking @@ -401,7 +197,7 @@ class GTHSingle(Module): self.comb += decoders[i].input.eq(rxdata[10*i:10*(i+1)]) # clock alignment - clock_aligner = BruteforceClockAligner(0b0101111100, self.rtio_clk_freq) + clock_aligner = BruteforceClockAligner(0b0101111100, rtio_clk_freq) self.submodules += clock_aligner self.comb += [ clock_aligner.rxdata.eq(rxdata), @@ -411,19 +207,24 @@ class GTHSingle(Module): class GTH(Module, TransceiverInterface): - def __init__(self, plls, tx_pads, rx_pads, sys_clk_freq, dw, master=0): + def __init__(self, clock_pads, tx_pads, rx_pads, sys_clk_freq, rtio_clk_freq, dw=20, master=0): self.nchannels = nchannels = len(tx_pads) self.gths = [] # # # - nwords = dw//10 + refclk = Signal() + self.specials += Instance("IBUFDS_GTE3", + i_CEB=0, + i_I=clock_pads.p, + i_IB=clock_pads.n, + o_O=refclk) rtio_tx_clk = Signal() channel_interfaces = [] for i in range(nchannels): mode = "master" if i == master else "slave" - gth = GTHSingle(plls[i], tx_pads[i], rx_pads[i], sys_clk_freq, dw, mode) + gth = GTHSingle(refclk, tx_pads[i], rx_pads[i], sys_clk_freq, rtio_clk_freq, dw, mode) if mode == "master": self.comb += rtio_tx_clk.eq(gth.cd_rtio_tx.clk) else: diff --git a/artiq/gateware/drtio/transceiver/gth_ultrascale_init.py b/artiq/gateware/drtio/transceiver/gth_ultrascale_init.py index 070cb0bfb..0c2734051 100644 --- a/artiq/gateware/drtio/transceiver/gth_ultrascale_init.py +++ b/artiq/gateware/drtio/transceiver/gth_ultrascale_init.py @@ -1,8 +1,13 @@ from math import ceil +from functools import reduce +from operator import add -from litex.gen import * -from litex.gen.genlib.cdc import MultiReg -from litex.gen.genlib.misc import WaitTimer +from migen import * +from migen.genlib.cdc import MultiReg, PulseSynchronizer +from migen.genlib.misc import WaitTimer + + +__all__ = ["BruteforceClockAligner", "GTHInit"] class GTHInit(Module): @@ -135,3 +140,111 @@ class GTHInit(Module): self.done.eq(1), If(self.restart, NextState("RESET_ALL")) ) + + +# Changes the phase of the transceiver RX clock to align the comma to +# the LSBs of RXDATA, fixing the latency. +# +# This is implemented by repeatedly resetting the transceiver until it +# gives out the correct phase. Each reset gives a random phase. +# +# If Xilinx had designed the GTX transceiver correctly, RXSLIDE_MODE=PMA +# would achieve this faster and in a cleaner way. But: +# * the phase jumps are of 2 UI at every second RXSLIDE pulse, instead +# of 1 UI at every pulse. It is unclear what the latency becomes. +# * RXSLIDE_MODE=PMA cannot be used with the RX buffer bypassed. +# Those design flaws make RXSLIDE_MODE=PMA yet another broken and useless +# transceiver "feature". +# +# Warning: Xilinx transceivers are LSB first, and comma needs to be flipped +# compared to the usual 8b10b binary representation. +class BruteforceClockAligner(Module): + def __init__(self, comma, tx_clk_freq, check_period=6e-3): + self.rxdata = Signal(20) + self.restart = Signal() + + self.ready = Signal() + + check_max_val = ceil(check_period*tx_clk_freq) + check_counter = Signal(max=check_max_val+1) + check = Signal() + reset_check_counter = Signal() + self.sync.rtio_tx += [ + check.eq(0), + If(reset_check_counter, + check_counter.eq(check_max_val) + ).Else( + If(check_counter == 0, + check.eq(1), + check_counter.eq(check_max_val) + ).Else( + check_counter.eq(check_counter-1) + ) + ) + ] + + checks_reset = PulseSynchronizer("rtio_tx", "rtio_rx") + self.submodules += checks_reset + + comma_n = ~comma & 0b1111111111 + comma_seen_rxclk = Signal() + comma_seen = Signal() + comma_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(comma_seen_rxclk, comma_seen) + self.sync.rtio_rx += \ + If(checks_reset.o, + comma_seen_rxclk.eq(0) + ).Elif((self.rxdata[:10] == comma) | (self.rxdata[:10] == comma_n), + comma_seen_rxclk.eq(1) + ) + + error_seen_rxclk = Signal() + error_seen = Signal() + error_seen_rxclk.attr.add("no_retiming") + self.specials += MultiReg(error_seen_rxclk, error_seen) + rx1cnt = Signal(max=11) + self.sync.rtio_rx += [ + rx1cnt.eq(reduce(add, [self.rxdata[i] for i in range(10)])), + If(checks_reset.o, + error_seen_rxclk.eq(0) + ).Elif((rx1cnt != 4) & (rx1cnt != 5) & (rx1cnt != 6), + error_seen_rxclk.eq(1) + ) + ] + + fsm = ClockDomainsRenamer("rtio_tx")(FSM(reset_state="WAIT_COMMA")) + self.submodules += fsm + + fsm.act("WAIT_COMMA", + If(check, + # Errors are still OK at this stage, as the transceiver + # has just been reset and may output garbage data. + If(comma_seen, + NextState("WAIT_NOERROR") + ).Else( + self.restart.eq(1) + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("WAIT_NOERROR", + If(check, + If(comma_seen & ~error_seen, + NextState("READY") + ).Else( + self.restart.eq(1), + NextState("WAIT_COMMA") + ), + checks_reset.i.eq(1) + ) + ) + fsm.act("READY", + reset_check_counter.eq(1), + self.ready.eq(1), + If(error_seen, + checks_reset.i.eq(1), + self.restart.eq(1), + NextState("WAIT_COMMA") + ) + ) +